vm_radix.c revision 246840
1/* 2 * Copyright (c) 2013 EMC Corp. 3 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 4 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 */ 29 30/* 31 * Path-compressed radix trie implementation. 32 * The following code is not generalized into a general purpose library 33 * because there are way too many parameters embedded that should really 34 * be decided by the library consumers. At the same time, consumers 35 * of this code must achieve highest possible performance. 36 * 37 * The implementation takes into account the following rationale: 38 * - Size of the nodes might be as small as possible. 39 * - There is no bias toward lookup operations over inserts or removes, 40 * and vice-versa. 41 * - In average there are not many complete levels, than level 42 * compression may just complicate things. 43 */ 44 45#include <sys/cdefs.h> 46 47#include "opt_ddb.h" 48#include "opt_vm.h" 49 50#include <sys/param.h> 51#include <sys/systm.h> 52#include <sys/kernel.h> 53#include <sys/vmmeter.h> 54 55#include <vm/uma.h> 56#include <vm/vm.h> 57#include <vm/vm_param.h> 58#include <vm/vm_page.h> 59#include <vm/vm_radix.h> 60 61#ifdef DDB 62#include <ddb/ddb.h> 63#endif 64 65#ifndef VM_RADIX_BOOT_CACHE 66#define VM_RADIX_BOOT_CACHE 1500 67#endif 68 69/* 70 * Such sizes should permit to keep node children contained into a single 71 * cache-line, or to at least not span many of those. 72 * In particular, sparse tries should however be compressed properly and 73 * then make some extra-levels not a big deal. 74 */ 75#ifdef __LP64__ 76#define VM_RADIX_WIDTH 4 77#else 78#define VM_RADIX_WIDTH 3 79#endif 80 81#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH) 82#define VM_RADIX_MASK (VM_RADIX_COUNT - 1) 83#define VM_RADIX_LIMIT \ 84 (howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) - 1) 85 86/* Flag bits stored in node pointers. */ 87#define VM_RADIX_ISLEAF 0x1 88#define VM_RADIX_FLAGS 0x1 89#define VM_RADIX_PAD VM_RADIX_FLAGS 90 91/* Returns one unit associated with specified level. */ 92#define VM_RADIX_UNITLEVEL(lev) \ 93 ((vm_pindex_t)1 << ((VM_RADIX_LIMIT - (lev)) * VM_RADIX_WIDTH)) 94 95struct vm_radix_node { 96 void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */ 97 vm_pindex_t rn_owner; /* Owner of record. */ 98 uint16_t rn_count; /* Valid children. */ 99 uint16_t rn_clev; /* Current level. */ 100}; 101 102static uma_zone_t vm_radix_node_zone; 103 104/* 105 * Boot-time cache of struct vm_radix_node objects. 106 * This cache is used to cater page allocations before the UMA zone is 107 * actually setup and pre-allocated (ie. pmap_init()). 108 */ 109static u_int boot_cache_cnt; 110static struct vm_radix_node boot_cache[VM_RADIX_BOOT_CACHE]; 111 112static struct vm_radix_node * 113vm_radix_carve_bootcache(void) 114{ 115 struct vm_radix_node *rnode; 116 117 if (boot_cache_cnt == VM_RADIX_BOOT_CACHE) 118 panic("%s: Increase VM_RADIX_BOOT_CACHE (%u)", __func__, 119 VM_RADIX_BOOT_CACHE); 120 rnode = &boot_cache[boot_cache_cnt]; 121 boot_cache_cnt++; 122 return (rnode); 123} 124 125/* 126 * Allocate a radix node. Pre-allocation ensures that the request will be 127 * always successfully satisfied. 128 */ 129static __inline struct vm_radix_node * 130vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel) 131{ 132 struct vm_radix_node *rnode; 133 134 if (__predict_false(boot_cache_cnt <= VM_RADIX_BOOT_CACHE)) 135 rnode = vm_radix_carve_bootcache(); 136 else { 137 rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO); 138 139 /* 140 * The required number of nodes might be already correctly 141 * pre-allocated in vm_radix_init(). However, UMA can reserve 142 * few nodes on per-cpu specific buckets, which will not be 143 * accessible from the curcpu. The allocation could then 144 * return NULL when the pre-allocation pool is close to be 145 * exhausted. Anyway, in practice this should never be a 146 * problem because a new node is not always required for 147 * insert, thus the pre-allocation pool should already have 148 * some extra-pages that indirectly deal with this situation. 149 */ 150 if (rnode == NULL) 151 panic("%s: uma_zalloc() returned NULL for a new node", 152 __func__); 153 } 154 rnode->rn_owner = owner; 155 rnode->rn_count = count; 156 rnode->rn_clev = clevel; 157 return (rnode); 158} 159 160/* 161 * Free radix node. 162 */ 163static __inline void 164vm_radix_node_put(struct vm_radix_node *rnode) 165{ 166 167 if (__predict_false(rnode > boot_cache && 168 rnode <= &boot_cache[VM_RADIX_BOOT_CACHE])) 169 return; 170 uma_zfree(vm_radix_node_zone, rnode); 171} 172 173/* 174 * Return the position in the array for a given level. 175 */ 176static __inline int 177vm_radix_slot(vm_pindex_t index, uint16_t level) 178{ 179 180 return ((index >> ((VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH)) & 181 VM_RADIX_MASK); 182} 183 184/* Trims the key after the specified level. */ 185static __inline vm_pindex_t 186vm_radix_trimkey(vm_pindex_t index, uint16_t level) 187{ 188 vm_pindex_t ret; 189 190 ret = index; 191 if (level < VM_RADIX_LIMIT) { 192 ret >>= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH; 193 ret <<= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH; 194 } 195 return (ret); 196} 197 198/* 199 * Get the root node for a radix tree. 200 */ 201static __inline struct vm_radix_node * 202vm_radix_getroot(struct vm_radix *rtree) 203{ 204 205 return ((struct vm_radix_node *)(rtree->rt_root & ~VM_RADIX_FLAGS)); 206} 207 208/* 209 * Set the root node for a radix tree. 210 */ 211static __inline void 212vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode) 213{ 214 215 rtree->rt_root = (uintptr_t)rnode; 216} 217 218/* 219 * Returns the associated page extracted from rnode if available, 220 * NULL otherwise. 221 */ 222static __inline vm_page_t 223vm_radix_node_page(struct vm_radix_node *rnode) 224{ 225 226 return ((((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0) ? 227 (vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS) : NULL); 228} 229 230/* 231 * Adds the page as a child of provided node. 232 */ 233static __inline void 234vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev, 235 vm_page_t page) 236{ 237 int slot; 238 239 slot = vm_radix_slot(index, clev); 240 rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF); 241} 242 243/* 244 * Returns the slot where two keys differ. 245 * It cannot accept 2 equal keys. 246 */ 247static __inline uint16_t 248vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2) 249{ 250 uint16_t clev; 251 252 KASSERT(index1 != index2, ("%s: passing the same key value %jx", 253 __func__, (uintmax_t)index1)); 254 255 index1 ^= index2; 256 for (clev = 0; clev <= VM_RADIX_LIMIT ; clev++) 257 if (vm_radix_slot(index1, clev)) 258 return (clev); 259 panic("%s: it might have not reached this point", __func__); 260 return (0); 261} 262 263/* 264 * Returns TRUE if it can be determined that key does not belong to the 265 * specified rnode. FALSE otherwise. 266 */ 267static __inline boolean_t 268vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx) 269{ 270 271 if (rnode->rn_clev > 0) { 272 idx = vm_radix_trimkey(idx, rnode->rn_clev - 1); 273 idx -= rnode->rn_owner; 274 if (idx != 0) 275 return (TRUE); 276 } 277 return (FALSE); 278} 279 280/* 281 * Adjusts the idx key to the first upper level available, based on a valid 282 * initial level and map of available levels. 283 * Returns a value bigger than 0 to signal that there are not valid levels 284 * available. 285 */ 286static __inline int 287vm_radix_addlev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev) 288{ 289 vm_pindex_t wrapidx; 290 291 for (; levels[ilev] == FALSE || 292 vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1); ilev--) 293 if (ilev == 0) 294 break; 295 KASSERT(ilev > 0 || levels[0] == TRUE, 296 ("%s: levels back-scanning problem", __func__)); 297 if (ilev == 0 && vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1)) 298 return (1); 299 wrapidx = *idx; 300 *idx = vm_radix_trimkey(*idx, ilev); 301 *idx += VM_RADIX_UNITLEVEL(ilev); 302 if (*idx < wrapidx) 303 return (1); 304 return (0); 305} 306 307/* 308 * Adjusts the idx key to the first lower level available, based on a valid 309 * initial level and map of available levels. 310 * Returns a value bigger than 0 to signal that there are not valid levels 311 * available. 312 */ 313static __inline int 314vm_radix_declev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev) 315{ 316 vm_pindex_t wrapidx; 317 318 for (; levels[ilev] == FALSE || 319 vm_radix_slot(*idx, ilev) == 0; ilev--) 320 if (ilev == 0) 321 break; 322 KASSERT(ilev > 0 || levels[0] == TRUE, 323 ("%s: levels back-scanning problem", __func__)); 324 if (ilev == 0 && vm_radix_slot(*idx, ilev) == 0) 325 return (1); 326 wrapidx = *idx; 327 *idx = vm_radix_trimkey(*idx, ilev); 328 *idx |= VM_RADIX_UNITLEVEL(ilev) - 1; 329 *idx -= VM_RADIX_UNITLEVEL(ilev); 330 if (*idx < wrapidx) 331 return (1); 332 return (0); 333} 334 335/* 336 * Internal handwork for vm_radix_reclaim_allonodes() primitive. 337 * This function is recrusive. 338 */ 339static void 340vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode) 341{ 342 int slot; 343 344 for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) { 345 if (rnode->rn_child[slot] == NULL) 346 continue; 347 if (vm_radix_node_page(rnode->rn_child[slot]) == NULL) 348 vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]); 349 rnode->rn_count--; 350 } 351 vm_radix_node_put(rnode); 352} 353 354#ifdef INVARIANTS 355/* 356 * Radix node zone destructor. 357 */ 358static void 359vm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused) 360{ 361 struct vm_radix_node *rnode; 362 363 rnode = mem; 364 KASSERT(rnode->rn_count == 0, 365 ("vm_radix_node_put: Freeing node %p with %d children\n", mem, 366 rnode->rn_count)); 367} 368#endif 369 370/* 371 * Pre-allocate intermediate nodes from the UMA slab zone. 372 */ 373static void 374vm_radix_init(void *arg __unused) 375{ 376 int nitems; 377 378 vm_radix_node_zone = uma_zcreate("RADIX NODE", 379 sizeof(struct vm_radix_node), NULL, 380#ifdef INVARIANTS 381 vm_radix_node_zone_dtor, 382#else 383 NULL, 384#endif 385 NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_NOFREE); 386 nitems = uma_zone_set_max(vm_radix_node_zone, cnt.v_page_count); 387 if (nitems < cnt.v_page_count) 388 panic("%s: unexpected requested number of items", __func__); 389 uma_prealloc(vm_radix_node_zone, nitems); 390 boot_cache_cnt = VM_RADIX_BOOT_CACHE + 1; 391} 392SYSINIT(vm_radix_init, SI_SUB_KMEM, SI_ORDER_SECOND, vm_radix_init, NULL); 393 394/* 395 * Inserts the key-value pair in to the trie. 396 * Panics if the key already exists. 397 */ 398void 399vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, vm_page_t page) 400{ 401 vm_pindex_t newind; 402 struct vm_radix_node *rnode, *tmp, *tmp2; 403 vm_page_t m; 404 int slot; 405 uint16_t clev; 406 407 /* 408 * The owner of record for root is not really important because it 409 * will never be used. 410 */ 411 rnode = vm_radix_getroot(rtree); 412 if (rnode == NULL) { 413 rnode = vm_radix_node_get(0, 1, 0); 414 vm_radix_setroot(rtree, rnode); 415 vm_radix_addpage(rnode, index, 0, page); 416 return; 417 } 418 while (rnode != NULL) { 419 if (vm_radix_keybarr(rnode, index) == TRUE) 420 break; 421 slot = vm_radix_slot(index, rnode->rn_clev); 422 m = vm_radix_node_page(rnode->rn_child[slot]); 423 if (m != NULL) { 424 if (m->pindex == index) 425 panic("%s: key %jx is already present", 426 __func__, (uintmax_t)index); 427 clev = vm_radix_keydiff(m->pindex, index); 428 tmp = vm_radix_node_get(vm_radix_trimkey(index, 429 clev - 1), 2, clev); 430 rnode->rn_child[slot] = tmp; 431 vm_radix_addpage(tmp, index, clev, page); 432 vm_radix_addpage(tmp, m->pindex, clev, m); 433 return; 434 } 435 if (rnode->rn_child[slot] == NULL) { 436 rnode->rn_count++; 437 vm_radix_addpage(rnode, index, rnode->rn_clev, page); 438 return; 439 } 440 rnode = rnode->rn_child[slot]; 441 } 442 if (rnode == NULL) 443 panic("%s: path traversal ended unexpectedly", __func__); 444 445 /* 446 * Scan the trie from the top and find the parent to insert 447 * the new object. 448 */ 449 newind = rnode->rn_owner; 450 clev = vm_radix_keydiff(newind, index); 451 slot = VM_RADIX_COUNT; 452 for (rnode = vm_radix_getroot(rtree); ; rnode = tmp) { 453 KASSERT(rnode != NULL, ("%s: edge cannot be NULL in the scan", 454 __func__)); 455 KASSERT(clev >= rnode->rn_clev, 456 ("%s: unexpected trie depth: clev: %d, rnode->rn_clev: %d", 457 __func__, clev, rnode->rn_clev)); 458 slot = vm_radix_slot(index, rnode->rn_clev); 459 tmp = rnode->rn_child[slot]; 460 KASSERT(tmp != NULL && vm_radix_node_page(tmp) == NULL, 461 ("%s: unexpected lookup interruption", __func__)); 462 if (tmp->rn_clev > clev) 463 break; 464 } 465 KASSERT(rnode != NULL && tmp != NULL && slot < VM_RADIX_COUNT, 466 ("%s: invalid scan parameters rnode: %p, tmp: %p, slot: %d", 467 __func__, (void *)rnode, (void *)tmp, slot)); 468 469 /* 470 * A new node is needed because the right insertion level is reached. 471 * Setup the new intermediate node and add the 2 children: the 472 * new object and the older edge. 473 */ 474 tmp2 = vm_radix_node_get(vm_radix_trimkey(page->pindex, clev - 1), 2, 475 clev); 476 rnode->rn_child[slot] = tmp2; 477 vm_radix_addpage(tmp2, index, clev, page); 478 slot = vm_radix_slot(newind, clev); 479 tmp2->rn_child[slot] = tmp; 480} 481 482/* 483 * Returns the value stored at the index. If the index is not present 484 * NULL is returned. 485 */ 486vm_page_t 487vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) 488{ 489 struct vm_radix_node *rnode; 490 vm_page_t m; 491 int slot; 492 493 rnode = vm_radix_getroot(rtree); 494 while (rnode != NULL) { 495 if (vm_radix_keybarr(rnode, index) == TRUE) 496 return (NULL); 497 slot = vm_radix_slot(index, rnode->rn_clev); 498 rnode = rnode->rn_child[slot]; 499 m = vm_radix_node_page(rnode); 500 if (m != NULL) { 501 if (m->pindex == index) 502 return (m); 503 else 504 return (NULL); 505 } 506 } 507 return (NULL); 508} 509 510/* 511 * Look up any entry at a position bigger than or equal to index. 512 */ 513vm_page_t 514vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index) 515{ 516 vm_pindex_t inc; 517 vm_page_t m; 518 struct vm_radix_node *rnode; 519 int slot; 520 uint16_t difflev; 521 boolean_t maplevels[VM_RADIX_LIMIT + 1]; 522#ifdef INVARIANTS 523 int loops = 0; 524#endif 525 526restart: 527 KASSERT(++loops < 1000, ("%s: too many loops", __func__)); 528 for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++) 529 maplevels[difflev] = FALSE; 530 rnode = vm_radix_getroot(rtree); 531 while (rnode != NULL) { 532 maplevels[rnode->rn_clev] = TRUE; 533 534 /* 535 * If the keys differ before the current bisection node 536 * the search key might rollback to the earlierst 537 * available bisection node, or to the smaller value 538 * in the current domain (if the owner is bigger than the 539 * search key). 540 * The search for a valid bisection node is helped through 541 * the use of maplevels array which should bring immediately 542 * a lower useful level, skipping holes. 543 */ 544 if (vm_radix_keybarr(rnode, index) == TRUE) { 545 difflev = vm_radix_keydiff(index, rnode->rn_owner); 546 if (index > rnode->rn_owner) { 547 if (vm_radix_addlev(&index, maplevels, 548 difflev) > 0) 549 break; 550 } else 551 index = vm_radix_trimkey(rnode->rn_owner, 552 difflev); 553 goto restart; 554 } 555 slot = vm_radix_slot(index, rnode->rn_clev); 556 m = vm_radix_node_page(rnode->rn_child[slot]); 557 if (m != NULL && m->pindex >= index) 558 return (m); 559 if (rnode->rn_child[slot] != NULL && m == NULL) { 560 rnode = rnode->rn_child[slot]; 561 continue; 562 } 563 564 /* 565 * Look for an available edge or page within the current 566 * bisection node. 567 */ 568 if (slot < (VM_RADIX_COUNT - 1)) { 569 inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); 570 index = vm_radix_trimkey(index, rnode->rn_clev); 571 index += inc; 572 slot++; 573 for (;; index += inc, slot++) { 574 m = vm_radix_node_page(rnode->rn_child[slot]); 575 if (m != NULL && m->pindex >= index) 576 return (m); 577 if ((rnode->rn_child[slot] != NULL && 578 m == NULL) || slot == (VM_RADIX_COUNT - 1)) 579 break; 580 } 581 } 582 583 /* 584 * If a valid page or edge, bigger than the search slot, is 585 * found in the traversal, skip to the next higher-level key. 586 */ 587 if (slot == (VM_RADIX_COUNT - 1) && 588 (rnode->rn_child[slot] == NULL || m != NULL)) { 589 if (rnode->rn_clev == 0 || vm_radix_addlev(&index, 590 maplevels, rnode->rn_clev - 1) > 0) 591 break; 592 goto restart; 593 } 594 rnode = rnode->rn_child[slot]; 595 } 596 return (NULL); 597} 598 599/* 600 * Look up any entry at a position less than or equal to index. 601 */ 602vm_page_t 603vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index) 604{ 605 vm_pindex_t inc; 606 vm_page_t m; 607 struct vm_radix_node *rnode; 608 int slot; 609 uint16_t difflev; 610 boolean_t maplevels[VM_RADIX_LIMIT + 1]; 611#ifdef INVARIANTS 612 int loops = 0; 613#endif 614 615restart: 616 KASSERT(++loops < 1000, ("%s: too many loops", __func__)); 617 for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++) 618 maplevels[difflev] = FALSE; 619 rnode = vm_radix_getroot(rtree); 620 while (rnode != NULL) { 621 maplevels[rnode->rn_clev] = TRUE; 622 623 /* 624 * If the keys differ before the current bisection node 625 * the search key might rollback to the earlierst 626 * available bisection node, or to the higher value 627 * in the current domain (if the owner is smaller than the 628 * search key). 629 * The search for a valid bisection node is helped through 630 * the use of maplevels array which should bring immediately 631 * a lower useful level, skipping holes. 632 */ 633 if (vm_radix_keybarr(rnode, index) == TRUE) { 634 difflev = vm_radix_keydiff(index, rnode->rn_owner); 635 if (index > rnode->rn_owner) { 636 index = vm_radix_trimkey(rnode->rn_owner, 637 difflev); 638 index |= VM_RADIX_UNITLEVEL(difflev) - 1; 639 } else if (vm_radix_declev(&index, maplevels, 640 difflev) > 0) 641 break; 642 goto restart; 643 } 644 slot = vm_radix_slot(index, rnode->rn_clev); 645 m = vm_radix_node_page(rnode->rn_child[slot]); 646 if (m != NULL && m->pindex <= index) 647 return (m); 648 if (rnode->rn_child[slot] != NULL && m == NULL) { 649 rnode = rnode->rn_child[slot]; 650 continue; 651 } 652 653 /* 654 * Look for an available edge or page within the current 655 * bisection node. 656 */ 657 if (slot > 0) { 658 inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); 659 index = vm_radix_trimkey(index, rnode->rn_clev); 660 index |= inc - 1; 661 index -= inc; 662 slot--; 663 for (;; index -= inc, slot--) { 664 m = vm_radix_node_page(rnode->rn_child[slot]); 665 if (m != NULL && m->pindex <= index) 666 return (m); 667 if ((rnode->rn_child[slot] != NULL && 668 m == NULL) || slot == 0) 669 break; 670 } 671 } 672 673 /* 674 * If a valid page or edge, smaller than the search slot, is 675 * found in the traversal, skip to the next higher-level key. 676 */ 677 if (slot == 0 && (rnode->rn_child[slot] == NULL || m != NULL)) { 678 if (rnode->rn_clev == 0 || vm_radix_declev(&index, 679 maplevels, rnode->rn_clev - 1) > 0) 680 break; 681 goto restart; 682 } 683 rnode = rnode->rn_child[slot]; 684 } 685 return (NULL); 686} 687 688/* 689 * Remove the specified index from the tree. 690 * Panics if the key is not present. 691 */ 692void 693vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) 694{ 695 struct vm_radix_node *rnode, *parent; 696 vm_page_t m; 697 int i, slot; 698 699 parent = NULL; 700 rnode = vm_radix_getroot(rtree); 701 for (;;) { 702 if (rnode == NULL) 703 panic("vm_radix_remove: impossible to locate the key"); 704 slot = vm_radix_slot(index, rnode->rn_clev); 705 m = vm_radix_node_page(rnode->rn_child[slot]); 706 if (m != NULL && m->pindex == index) { 707 rnode->rn_child[slot] = NULL; 708 rnode->rn_count--; 709 if (rnode->rn_count > 1) 710 break; 711 if (parent == NULL) { 712 if (rnode->rn_count == 0) { 713 vm_radix_node_put(rnode); 714 vm_radix_setroot(rtree, NULL); 715 } 716 break; 717 } 718 for (i = 0; i < VM_RADIX_COUNT; i++) 719 if (rnode->rn_child[i] != NULL) 720 break; 721 KASSERT(i != VM_RADIX_COUNT, 722 ("%s: invalid node configuration", __func__)); 723 slot = vm_radix_slot(index, parent->rn_clev); 724 KASSERT(parent->rn_child[slot] == rnode, 725 ("%s: invalid child value", __func__)); 726 parent->rn_child[slot] = rnode->rn_child[i]; 727 rnode->rn_count--; 728 rnode->rn_child[i] = NULL; 729 vm_radix_node_put(rnode); 730 break; 731 } 732 if (m != NULL && m->pindex != index) 733 panic("%s: invalid key found", __func__); 734 parent = rnode; 735 rnode = rnode->rn_child[slot]; 736 } 737} 738 739/* 740 * Remove and free all the nodes from the radix tree. 741 * This function is recrusive but there is a tight control on it as the 742 * maximum depth of the tree is fixed. 743 */ 744void 745vm_radix_reclaim_allnodes(struct vm_radix *rtree) 746{ 747 struct vm_radix_node *root; 748 749 root = vm_radix_getroot(rtree); 750 if (root == NULL) 751 return; 752 vm_radix_reclaim_allnodes_int(root); 753 vm_radix_setroot(rtree, NULL); 754} 755 756#ifdef DDB 757/* 758 * Show details about the given radix node. 759 */ 760DB_SHOW_COMMAND(radixnode, db_show_radixnode) 761{ 762 struct vm_radix_node *rnode; 763 int i; 764 765 if (!have_addr) 766 return; 767 rnode = (struct vm_radix_node *)addr; 768 db_printf("radixnode %p, owner %jx, children count %u, level %u:\n", 769 (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count, 770 rnode->rn_clev); 771 for (i = 0; i < VM_RADIX_COUNT; i++) 772 if (rnode->rn_child[i] != NULL) 773 db_printf("slot: %d, val: %p, page: %p, clev: %d\n", 774 i, (void *)rnode->rn_child[i], 775 (void *)vm_radix_node_page(rnode->rn_child[i]), 776 rnode->rn_clev); 777} 778#endif /* DDB */ 779