vm_radix.c revision 247742
1226645Sattilio/* 2246726Sattilio * Copyright (c) 2013 EMC Corp. 3226930Sjeff * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 4226645Sattilio * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 5226645Sattilio * All rights reserved. 6226645Sattilio * 7226645Sattilio * Redistribution and use in source and binary forms, with or without 8226645Sattilio * modification, are permitted provided that the following conditions 9226645Sattilio * are met: 10226645Sattilio * 1. Redistributions of source code must retain the above copyright 11226645Sattilio * notice, this list of conditions and the following disclaimer. 12226645Sattilio * 2. Redistributions in binary form must reproduce the above copyright 13226645Sattilio * notice, this list of conditions and the following disclaimer in the 14226645Sattilio * documentation and/or other materials provided with the distribution. 15226645Sattilio * 16226645Sattilio * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17226645Sattilio * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18226645Sattilio * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19226645Sattilio * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20226645Sattilio * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21226645Sattilio * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22226645Sattilio * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23226645Sattilio * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24226645Sattilio * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25226645Sattilio * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26226645Sattilio * SUCH DAMAGE. 27226645Sattilio * 28226645Sattilio */ 29226645Sattilio 30226645Sattilio/* 31246726Sattilio * Path-compressed radix trie implementation. 32246726Sattilio * The following code is not generalized into a general purpose library 33246726Sattilio * because there are way too many parameters embedded that should really 34246726Sattilio * be decided by the library consumers. At the same time, consumers 35246726Sattilio * of this code must achieve highest possible performance. 36246726Sattilio * 37246726Sattilio * The implementation takes into account the following rationale: 38246726Sattilio * - Size of the nodes might be as small as possible. 39246726Sattilio * - There is no bias toward lookup operations over inserts or removes, 40246726Sattilio * and vice-versa. 41246726Sattilio * - In average there are not many complete levels, than level 42246726Sattilio * compression may just complicate things. 43226645Sattilio */ 44226645Sattilio 45226645Sattilio#include <sys/cdefs.h> 46226645Sattilio 47246726Sattilio#include "opt_ddb.h" 48246726Sattilio 49226645Sattilio#include <sys/param.h> 50226645Sattilio#include <sys/systm.h> 51226645Sattilio#include <sys/kernel.h> 52246840Sattilio#include <sys/vmmeter.h> 53246726Sattilio 54226873Sattilio#include <vm/uma.h> 55226645Sattilio#include <vm/vm.h> 56226876Sjeff#include <vm/vm_param.h> 57226873Sattilio#include <vm/vm_page.h> 58226645Sattilio#include <vm/vm_radix.h> 59226645Sattilio 60246726Sattilio#ifdef DDB 61246726Sattilio#include <ddb/ddb.h> 62246726Sattilio#endif 63226645Sattilio 64233034Sattilio/* 65246726Sattilio * Such sizes should permit to keep node children contained into a single 66246726Sattilio * cache-line, or to at least not span many of those. 67246726Sattilio * In particular, sparse tries should however be compressed properly and 68246726Sattilio * then make some extra-levels not a big deal. 69233034Sattilio */ 70246726Sattilio#ifdef __LP64__ 71246726Sattilio#define VM_RADIX_WIDTH 4 72233034Sattilio#else 73246726Sattilio#define VM_RADIX_WIDTH 3 74233034Sattilio#endif 75233034Sattilio 76232326Sattilio#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH) 77232326Sattilio#define VM_RADIX_MASK (VM_RADIX_COUNT - 1) 78246726Sattilio#define VM_RADIX_LIMIT \ 79246726Sattilio (howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) - 1) 80232326Sattilio 81232326Sattilio/* Flag bits stored in node pointers. */ 82246726Sattilio#define VM_RADIX_ISLEAF 0x1 83246726Sattilio#define VM_RADIX_FLAGS 0x1 84246726Sattilio#define VM_RADIX_PAD VM_RADIX_FLAGS 85232326Sattilio 86246726Sattilio/* Returns one unit associated with specified level. */ 87246726Sattilio#define VM_RADIX_UNITLEVEL(lev) \ 88246726Sattilio ((vm_pindex_t)1 << ((VM_RADIX_LIMIT - (lev)) * VM_RADIX_WIDTH)) 89232326Sattilio 90232326Sattiliostruct vm_radix_node { 91232326Sattilio void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */ 92246726Sattilio vm_pindex_t rn_owner; /* Owner of record. */ 93246726Sattilio uint16_t rn_count; /* Valid children. */ 94246726Sattilio uint16_t rn_clev; /* Current level. */ 95232326Sattilio}; 96232326Sattilio 97226873Sattiliostatic uma_zone_t vm_radix_node_zone; 98226645Sattilio 99246794Sattilio/* 100246726Sattilio * Allocate a radix node. Pre-allocation ensures that the request will be 101246726Sattilio * always successfully satisfied. 102246726Sattilio */ 103246726Sattiliostatic __inline struct vm_radix_node * 104246726Sattiliovm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel) 105226873Sattilio{ 106246726Sattilio struct vm_radix_node *rnode; 107226645Sattilio 108247742Sattilio rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO); 109226873Sattilio 110247742Sattilio /* 111247742Sattilio * The required number of nodes might be already correctly 112247742Sattilio * pre-allocated in vm_radix_init(). However, UMA can reserve 113247742Sattilio * few nodes on per-cpu specific buckets, which will not be 114247742Sattilio * accessible from the curcpu. The allocation could then 115247742Sattilio * return NULL when the pre-allocation pool is close to be 116247742Sattilio * exhausted. Anyway, in practice this should never be a 117247742Sattilio * problem because a new node is not always required for 118247742Sattilio * insert, thus the pre-allocation pool should already have 119247742Sattilio * some extra-pages that indirectly deal with this situation. 120247742Sattilio */ 121247742Sattilio if (rnode == NULL) 122247742Sattilio panic("%s: uma_zalloc() returned NULL for a new node", 123247742Sattilio __func__); 124246726Sattilio rnode->rn_owner = owner; 125246726Sattilio rnode->rn_count = count; 126246726Sattilio rnode->rn_clev = clevel; 127246726Sattilio return (rnode); 128246726Sattilio} 129226873Sattilio 130246726Sattilio/* 131246726Sattilio * Free radix node. 132246726Sattilio */ 133246726Sattiliostatic __inline void 134246726Sattiliovm_radix_node_put(struct vm_radix_node *rnode) 135246726Sattilio{ 136246726Sattilio 137246726Sattilio uma_zfree(vm_radix_node_zone, rnode); 138226873Sattilio} 139226873Sattilio 140246726Sattilio/* 141246726Sattilio * Return the position in the array for a given level. 142246726Sattilio */ 143246726Sattiliostatic __inline int 144246726Sattiliovm_radix_slot(vm_pindex_t index, uint16_t level) 145226873Sattilio{ 146226873Sattilio 147246726Sattilio return ((index >> ((VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH)) & 148246726Sattilio VM_RADIX_MASK); 149226873Sattilio} 150226873Sattilio 151246726Sattilio/* Trims the key after the specified level. */ 152246726Sattiliostatic __inline vm_pindex_t 153246726Sattiliovm_radix_trimkey(vm_pindex_t index, uint16_t level) 154226873Sattilio{ 155246726Sattilio vm_pindex_t ret; 156226873Sattilio 157246726Sattilio ret = index; 158246726Sattilio if (level < VM_RADIX_LIMIT) { 159246726Sattilio ret >>= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH; 160246726Sattilio ret <<= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH; 161246726Sattilio } 162246726Sattilio return (ret); 163226873Sattilio} 164226873Sattilio 165226645Sattilio/* 166246726Sattilio * Get the root node for a radix tree. 167226873Sattilio */ 168246726Sattiliostatic __inline struct vm_radix_node * 169246726Sattiliovm_radix_getroot(struct vm_radix *rtree) 170226873Sattilio{ 171226873Sattilio 172246726Sattilio return ((struct vm_radix_node *)(rtree->rt_root & ~VM_RADIX_FLAGS)); 173226873Sattilio} 174226873Sattilio 175226873Sattilio/* 176246726Sattilio * Set the root node for a radix tree. 177226645Sattilio */ 178246726Sattiliostatic __inline void 179246726Sattiliovm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode) 180226645Sattilio{ 181226645Sattilio 182246726Sattilio rtree->rt_root = (uintptr_t)rnode; 183226645Sattilio} 184226645Sattilio 185226645Sattilio/* 186246726Sattilio * Returns the associated page extracted from rnode if available, 187246726Sattilio * NULL otherwise. 188226645Sattilio */ 189246726Sattiliostatic __inline vm_page_t 190246726Sattiliovm_radix_node_page(struct vm_radix_node *rnode) 191226645Sattilio{ 192226645Sattilio 193246726Sattilio return ((((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0) ? 194246726Sattilio (vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS) : NULL); 195226645Sattilio} 196226645Sattilio 197226645Sattilio/* 198246726Sattilio * Adds the page as a child of provided node. 199226645Sattilio */ 200246726Sattiliostatic __inline void 201246726Sattiliovm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev, 202246726Sattilio vm_page_t page) 203226645Sattilio{ 204246726Sattilio int slot; 205226645Sattilio 206246726Sattilio slot = vm_radix_slot(index, clev); 207246726Sattilio rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF); 208226645Sattilio} 209226645Sattilio 210233034Sattilio/* 211246726Sattilio * Returns the slot where two keys differ. 212246726Sattilio * It cannot accept 2 equal keys. 213233034Sattilio */ 214246726Sattiliostatic __inline uint16_t 215246726Sattiliovm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2) 216226873Sattilio{ 217246726Sattilio uint16_t clev; 218226873Sattilio 219246726Sattilio KASSERT(index1 != index2, ("%s: passing the same key value %jx", 220246726Sattilio __func__, (uintmax_t)index1)); 221233034Sattilio 222246726Sattilio index1 ^= index2; 223246726Sattilio for (clev = 0; clev <= VM_RADIX_LIMIT ; clev++) 224246726Sattilio if (vm_radix_slot(index1, clev)) 225246726Sattilio return (clev); 226246726Sattilio panic("%s: it might have not reached this point", __func__); 227246726Sattilio return (0); 228226873Sattilio} 229226873Sattilio 230226645Sattilio/* 231246726Sattilio * Returns TRUE if it can be determined that key does not belong to the 232246726Sattilio * specified rnode. FALSE otherwise. 233226876Sjeff */ 234246726Sattiliostatic __inline boolean_t 235246726Sattiliovm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx) 236226876Sjeff{ 237226876Sjeff 238246726Sattilio if (rnode->rn_clev > 0) { 239246726Sattilio idx = vm_radix_trimkey(idx, rnode->rn_clev - 1); 240246726Sattilio idx -= rnode->rn_owner; 241246726Sattilio if (idx != 0) 242246726Sattilio return (TRUE); 243246726Sattilio } 244246726Sattilio return (FALSE); 245226876Sjeff} 246226876Sjeff 247226876Sjeff/* 248246726Sattilio * Adjusts the idx key to the first upper level available, based on a valid 249246726Sattilio * initial level and map of available levels. 250246726Sattilio * Returns a value bigger than 0 to signal that there are not valid levels 251246726Sattilio * available. 252226876Sjeff */ 253246726Sattiliostatic __inline int 254246726Sattiliovm_radix_addlev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev) 255226876Sjeff{ 256246726Sattilio vm_pindex_t wrapidx; 257226876Sjeff 258246726Sattilio for (; levels[ilev] == FALSE || 259246726Sattilio vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1); ilev--) 260246726Sattilio if (ilev == 0) 261246726Sattilio break; 262246726Sattilio KASSERT(ilev > 0 || levels[0] == TRUE, 263246726Sattilio ("%s: levels back-scanning problem", __func__)); 264246726Sattilio if (ilev == 0 && vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1)) 265246726Sattilio return (1); 266246726Sattilio wrapidx = *idx; 267246726Sattilio *idx = vm_radix_trimkey(*idx, ilev); 268246726Sattilio *idx += VM_RADIX_UNITLEVEL(ilev); 269247232Sattilio return (*idx < wrapidx); 270226876Sjeff} 271226876Sjeff 272246726Sattilio/* 273246726Sattilio * Adjusts the idx key to the first lower level available, based on a valid 274246726Sattilio * initial level and map of available levels. 275246726Sattilio * Returns a value bigger than 0 to signal that there are not valid levels 276246726Sattilio * available. 277246726Sattilio */ 278246726Sattiliostatic __inline int 279246726Sattiliovm_radix_declev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev) 280226930Sjeff{ 281246726Sattilio vm_pindex_t wrapidx; 282226930Sjeff 283246726Sattilio for (; levels[ilev] == FALSE || 284246726Sattilio vm_radix_slot(*idx, ilev) == 0; ilev--) 285246726Sattilio if (ilev == 0) 286246726Sattilio break; 287246726Sattilio KASSERT(ilev > 0 || levels[0] == TRUE, 288246726Sattilio ("%s: levels back-scanning problem", __func__)); 289246726Sattilio if (ilev == 0 && vm_radix_slot(*idx, ilev) == 0) 290246726Sattilio return (1); 291246726Sattilio wrapidx = *idx; 292246726Sattilio *idx = vm_radix_trimkey(*idx, ilev); 293246726Sattilio *idx |= VM_RADIX_UNITLEVEL(ilev) - 1; 294246726Sattilio *idx -= VM_RADIX_UNITLEVEL(ilev); 295247233Sattilio return (*idx > wrapidx); 296226930Sjeff} 297226930Sjeff 298246726Sattilio/* 299246726Sattilio * Internal handwork for vm_radix_reclaim_allonodes() primitive. 300246726Sattilio * This function is recrusive. 301246726Sattilio */ 302226980Sattiliostatic void 303246726Sattiliovm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode) 304226980Sattilio{ 305226980Sattilio int slot; 306226980Sattilio 307226980Sattilio for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) { 308226980Sattilio if (rnode->rn_child[slot] == NULL) 309226980Sattilio continue; 310246726Sattilio if (vm_radix_node_page(rnode->rn_child[slot]) == NULL) 311246726Sattilio vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]); 312226980Sattilio rnode->rn_count--; 313226980Sattilio } 314226980Sattilio vm_radix_node_put(rnode); 315226980Sattilio} 316226980Sattilio 317246836Sattilio#ifdef INVARIANTS 318226876Sjeff/* 319246836Sattilio * Radix node zone destructor. 320246836Sattilio */ 321246836Sattiliostatic void 322246836Sattiliovm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused) 323246836Sattilio{ 324246836Sattilio struct vm_radix_node *rnode; 325246836Sattilio 326246836Sattilio rnode = mem; 327246836Sattilio KASSERT(rnode->rn_count == 0, 328246836Sattilio ("vm_radix_node_put: Freeing node %p with %d children\n", mem, 329246836Sattilio rnode->rn_count)); 330246836Sattilio} 331246836Sattilio#endif 332246836Sattilio 333246836Sattilio/* 334246726Sattilio * Pre-allocate intermediate nodes from the UMA slab zone. 335246726Sattilio */ 336246794Sattiliostatic void 337247742Sattiliovm_radix_prealloc(void *arg __unused) 338246726Sattilio{ 339246726Sattilio 340247742Sattilio if (!uma_zone_reserve_kva(vm_radix_node_zone, cnt.v_page_count)) 341247742Sattilio panic("%s: unable to create new zone", __func__); 342247742Sattilio uma_prealloc(vm_radix_node_zone, cnt.v_page_count); 343247742Sattilio} 344247742SattilioSYSINIT(vm_radix_prealloc, SI_SUB_KMEM, SI_ORDER_SECOND, vm_radix_prealloc, 345247742Sattilio NULL); 346247742Sattilio 347247742Sattilio/* 348247742Sattilio * Initialize the UMA slab zone. 349247742Sattilio * Until vm_radix_prealloc() is called, the zone will be served by the 350247742Sattilio * UMA boot-time pre-allocated pool of pages. 351247742Sattilio */ 352247742Sattiliovoid 353247742Sattiliovm_radix_init(void) 354247742Sattilio{ 355247742Sattilio 356246726Sattilio vm_radix_node_zone = uma_zcreate("RADIX NODE", 357246726Sattilio sizeof(struct vm_radix_node), NULL, 358246726Sattilio#ifdef INVARIANTS 359246726Sattilio vm_radix_node_zone_dtor, 360246726Sattilio#else 361246726Sattilio NULL, 362246726Sattilio#endif 363246726Sattilio NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_NOFREE); 364246726Sattilio} 365246726Sattilio 366246726Sattilio/* 367246726Sattilio * Inserts the key-value pair in to the trie. 368226645Sattilio * Panics if the key already exists. 369226645Sattilio */ 370246430Sattiliovoid 371246430Sattiliovm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, vm_page_t page) 372226645Sattilio{ 373246726Sattilio vm_pindex_t newind; 374246726Sattilio struct vm_radix_node *rnode, *tmp, *tmp2; 375246726Sattilio vm_page_t m; 376236763Sattilio int slot; 377246726Sattilio uint16_t clev; 378226645Sattilio 379226645Sattilio /* 380246726Sattilio * The owner of record for root is not really important because it 381246726Sattilio * will never be used. 382226645Sattilio */ 383246726Sattilio rnode = vm_radix_getroot(rtree); 384246726Sattilio if (rnode == NULL) { 385246726Sattilio rnode = vm_radix_node_get(0, 1, 0); 386246726Sattilio vm_radix_setroot(rtree, rnode); 387246726Sattilio vm_radix_addpage(rnode, index, 0, page); 388246726Sattilio return; 389246726Sattilio } 390246795Sattilio while (rnode != NULL) { 391246726Sattilio if (vm_radix_keybarr(rnode, index) == TRUE) 392246726Sattilio break; 393246726Sattilio slot = vm_radix_slot(index, rnode->rn_clev); 394246726Sattilio m = vm_radix_node_page(rnode->rn_child[slot]); 395246726Sattilio if (m != NULL) { 396246726Sattilio if (m->pindex == index) 397246726Sattilio panic("%s: key %jx is already present", 398246726Sattilio __func__, (uintmax_t)index); 399246726Sattilio clev = vm_radix_keydiff(m->pindex, index); 400246726Sattilio tmp = vm_radix_node_get(vm_radix_trimkey(index, 401246726Sattilio clev - 1), 2, clev); 402246726Sattilio rnode->rn_child[slot] = tmp; 403246726Sattilio vm_radix_addpage(tmp, index, clev, page); 404246726Sattilio vm_radix_addpage(tmp, m->pindex, clev, m); 405246726Sattilio return; 406226645Sattilio } 407226645Sattilio if (rnode->rn_child[slot] == NULL) { 408236760Sattilio rnode->rn_count++; 409246726Sattilio vm_radix_addpage(rnode, index, rnode->rn_clev, page); 410246726Sattilio return; 411246726Sattilio } 412226645Sattilio rnode = rnode->rn_child[slot]; 413226645Sattilio } 414246726Sattilio if (rnode == NULL) 415246726Sattilio panic("%s: path traversal ended unexpectedly", __func__); 416226645Sattilio 417246726Sattilio /* 418246726Sattilio * Scan the trie from the top and find the parent to insert 419246726Sattilio * the new object. 420246726Sattilio */ 421246726Sattilio newind = rnode->rn_owner; 422246726Sattilio clev = vm_radix_keydiff(newind, index); 423246726Sattilio slot = VM_RADIX_COUNT; 424246726Sattilio for (rnode = vm_radix_getroot(rtree); ; rnode = tmp) { 425246726Sattilio KASSERT(rnode != NULL, ("%s: edge cannot be NULL in the scan", 426246726Sattilio __func__)); 427246726Sattilio KASSERT(clev >= rnode->rn_clev, 428246726Sattilio ("%s: unexpected trie depth: clev: %d, rnode->rn_clev: %d", 429246726Sattilio __func__, clev, rnode->rn_clev)); 430246726Sattilio slot = vm_radix_slot(index, rnode->rn_clev); 431246726Sattilio tmp = rnode->rn_child[slot]; 432246726Sattilio KASSERT(tmp != NULL && vm_radix_node_page(tmp) == NULL, 433246726Sattilio ("%s: unexpected lookup interruption", __func__)); 434246726Sattilio if (tmp->rn_clev > clev) 435246726Sattilio break; 436246726Sattilio } 437246726Sattilio KASSERT(rnode != NULL && tmp != NULL && slot < VM_RADIX_COUNT, 438246726Sattilio ("%s: invalid scan parameters rnode: %p, tmp: %p, slot: %d", 439246726Sattilio __func__, (void *)rnode, (void *)tmp, slot)); 440246726Sattilio 441246726Sattilio /* 442246726Sattilio * A new node is needed because the right insertion level is reached. 443246726Sattilio * Setup the new intermediate node and add the 2 children: the 444246726Sattilio * new object and the older edge. 445246726Sattilio */ 446246726Sattilio tmp2 = vm_radix_node_get(vm_radix_trimkey(page->pindex, clev - 1), 2, 447246726Sattilio clev); 448246726Sattilio rnode->rn_child[slot] = tmp2; 449246726Sattilio vm_radix_addpage(tmp2, index, clev, page); 450246726Sattilio slot = vm_radix_slot(newind, clev); 451246726Sattilio tmp2->rn_child[slot] = tmp; 452226645Sattilio} 453226645Sattilio 454226645Sattilio/* 455226645Sattilio * Returns the value stored at the index. If the index is not present 456226645Sattilio * NULL is returned. 457226645Sattilio */ 458246430Sattiliovm_page_t 459238245Sattiliovm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) 460226645Sattilio{ 461226645Sattilio struct vm_radix_node *rnode; 462246726Sattilio vm_page_t m; 463226645Sattilio int slot; 464226645Sattilio 465246726Sattilio rnode = vm_radix_getroot(rtree); 466246795Sattilio while (rnode != NULL) { 467246726Sattilio if (vm_radix_keybarr(rnode, index) == TRUE) 468246726Sattilio return (NULL); 469246726Sattilio slot = vm_radix_slot(index, rnode->rn_clev); 470226645Sattilio rnode = rnode->rn_child[slot]; 471246726Sattilio m = vm_radix_node_page(rnode); 472246726Sattilio if (m != NULL) { 473246726Sattilio if (m->pindex == index) 474246726Sattilio return (m); 475246726Sattilio else 476246726Sattilio return (NULL); 477246726Sattilio } 478226645Sattilio } 479246726Sattilio return (NULL); 480226645Sattilio} 481226645Sattilio 482226645Sattilio/* 483246726Sattilio * Look up any entry at a position bigger than or equal to index. 484226645Sattilio */ 485246726Sattiliovm_page_t 486246726Sattiliovm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index) 487226645Sattilio{ 488246726Sattilio vm_pindex_t inc; 489246726Sattilio vm_page_t m; 490226645Sattilio struct vm_radix_node *rnode; 491226645Sattilio int slot; 492246726Sattilio uint16_t difflev; 493246726Sattilio boolean_t maplevels[VM_RADIX_LIMIT + 1]; 494246726Sattilio#ifdef INVARIANTS 495246726Sattilio int loops = 0; 496246726Sattilio#endif 497226645Sattilio 498226876Sjeffrestart: 499246726Sattilio KASSERT(++loops < 1000, ("%s: too many loops", __func__)); 500246726Sattilio for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++) 501246726Sattilio maplevels[difflev] = FALSE; 502246726Sattilio rnode = vm_radix_getroot(rtree); 503246795Sattilio while (rnode != NULL) { 504246726Sattilio maplevels[rnode->rn_clev] = TRUE; 505246726Sattilio 506246726Sattilio /* 507246726Sattilio * If the keys differ before the current bisection node 508246726Sattilio * the search key might rollback to the earlierst 509246726Sattilio * available bisection node, or to the smaller value 510246726Sattilio * in the current domain (if the owner is bigger than the 511246726Sattilio * search key). 512246726Sattilio * The search for a valid bisection node is helped through 513246726Sattilio * the use of maplevels array which should bring immediately 514246726Sattilio * a lower useful level, skipping holes. 515246726Sattilio */ 516246726Sattilio if (vm_radix_keybarr(rnode, index) == TRUE) { 517246726Sattilio difflev = vm_radix_keydiff(index, rnode->rn_owner); 518246726Sattilio if (index > rnode->rn_owner) { 519246726Sattilio if (vm_radix_addlev(&index, maplevels, 520246726Sattilio difflev) > 0) 521246726Sattilio break; 522246726Sattilio } else 523246726Sattilio index = vm_radix_trimkey(rnode->rn_owner, 524246726Sattilio difflev); 525246726Sattilio goto restart; 526246726Sattilio } 527246726Sattilio slot = vm_radix_slot(index, rnode->rn_clev); 528246726Sattilio m = vm_radix_node_page(rnode->rn_child[slot]); 529246726Sattilio if (m != NULL && m->pindex >= index) 530246726Sattilio return (m); 531246726Sattilio if (rnode->rn_child[slot] != NULL && m == NULL) { 532226930Sjeff rnode = rnode->rn_child[slot]; 533226930Sjeff continue; 534226930Sjeff } 535246726Sattilio 536226645Sattilio /* 537246726Sattilio * Look for an available edge or page within the current 538246726Sattilio * bisection node. 539246726Sattilio */ 540246726Sattilio if (slot < (VM_RADIX_COUNT - 1)) { 541246726Sattilio inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); 542246726Sattilio index = vm_radix_trimkey(index, rnode->rn_clev); 543246726Sattilio index += inc; 544246726Sattilio slot++; 545246726Sattilio for (;; index += inc, slot++) { 546246726Sattilio m = vm_radix_node_page(rnode->rn_child[slot]); 547246726Sattilio if (m != NULL && m->pindex >= index) 548246726Sattilio return (m); 549246726Sattilio if ((rnode->rn_child[slot] != NULL && 550246726Sattilio m == NULL) || slot == (VM_RADIX_COUNT - 1)) 551246726Sattilio break; 552246726Sattilio } 553246726Sattilio } 554230750Sattilio 555246726Sattilio /* 556246726Sattilio * If a valid page or edge, bigger than the search slot, is 557246726Sattilio * found in the traversal, skip to the next higher-level key. 558246726Sattilio */ 559246726Sattilio if (slot == (VM_RADIX_COUNT - 1) && 560246726Sattilio (rnode->rn_child[slot] == NULL || m != NULL)) { 561246726Sattilio if (rnode->rn_clev == 0 || vm_radix_addlev(&index, 562246726Sattilio maplevels, rnode->rn_clev - 1) > 0) 563226930Sjeff break; 564246726Sattilio goto restart; 565226645Sattilio } 566246726Sattilio rnode = rnode->rn_child[slot]; 567226645Sattilio } 568245254Sattilio return (NULL); 569226645Sattilio} 570226645Sattilio 571226645Sattilio/* 572226646Sjeff * Look up any entry at a position less than or equal to index. 573226645Sattilio */ 574246430Sattiliovm_page_t 575238245Sattiliovm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index) 576226645Sattilio{ 577246726Sattilio vm_pindex_t inc; 578246726Sattilio vm_page_t m; 579226646Sjeff struct vm_radix_node *rnode; 580226646Sjeff int slot; 581246726Sattilio uint16_t difflev; 582246726Sattilio boolean_t maplevels[VM_RADIX_LIMIT + 1]; 583246726Sattilio#ifdef INVARIANTS 584246726Sattilio int loops = 0; 585246726Sattilio#endif 586226645Sattilio 587226876Sjeffrestart: 588246726Sattilio KASSERT(++loops < 1000, ("%s: too many loops", __func__)); 589246726Sattilio for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++) 590246726Sattilio maplevels[difflev] = FALSE; 591246726Sattilio rnode = vm_radix_getroot(rtree); 592246795Sattilio while (rnode != NULL) { 593246726Sattilio maplevels[rnode->rn_clev] = TRUE; 594246726Sattilio 595226646Sjeff /* 596246726Sattilio * If the keys differ before the current bisection node 597246726Sattilio * the search key might rollback to the earlierst 598246726Sattilio * available bisection node, or to the higher value 599246726Sattilio * in the current domain (if the owner is smaller than the 600246726Sattilio * search key). 601246726Sattilio * The search for a valid bisection node is helped through 602246726Sattilio * the use of maplevels array which should bring immediately 603246726Sattilio * a lower useful level, skipping holes. 604226646Sjeff */ 605246726Sattilio if (vm_radix_keybarr(rnode, index) == TRUE) { 606246726Sattilio difflev = vm_radix_keydiff(index, rnode->rn_owner); 607246726Sattilio if (index > rnode->rn_owner) { 608246726Sattilio index = vm_radix_trimkey(rnode->rn_owner, 609246726Sattilio difflev); 610246726Sattilio index |= VM_RADIX_UNITLEVEL(difflev) - 1; 611246726Sattilio } else if (vm_radix_declev(&index, maplevels, 612246726Sattilio difflev) > 0) 613246726Sattilio break; 614246726Sattilio goto restart; 615246726Sattilio } 616246726Sattilio slot = vm_radix_slot(index, rnode->rn_clev); 617246726Sattilio m = vm_radix_node_page(rnode->rn_child[slot]); 618246726Sattilio if (m != NULL && m->pindex <= index) 619246726Sattilio return (m); 620246726Sattilio if (rnode->rn_child[slot] != NULL && m == NULL) { 621246726Sattilio rnode = rnode->rn_child[slot]; 622246726Sattilio continue; 623246726Sattilio } 624246726Sattilio 625246726Sattilio /* 626246726Sattilio * Look for an available edge or page within the current 627246726Sattilio * bisection node. 628246726Sattilio */ 629246726Sattilio if (slot > 0) { 630246726Sattilio inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); 631246726Sattilio index = vm_radix_trimkey(index, rnode->rn_clev); 632246726Sattilio index |= inc - 1; 633226646Sjeff index -= inc; 634226646Sjeff slot--; 635246726Sattilio for (;; index -= inc, slot--) { 636246726Sattilio m = vm_radix_node_page(rnode->rn_child[slot]); 637246726Sattilio if (m != NULL && m->pindex <= index) 638246726Sattilio return (m); 639246726Sattilio if ((rnode->rn_child[slot] != NULL && 640246726Sattilio m == NULL) || slot == 0) 641226646Sjeff break; 642226646Sjeff } 643226646Sjeff } 644246726Sattilio 645246726Sattilio /* 646246730Sattilio * If a valid page or edge, smaller than the search slot, is 647246726Sattilio * found in the traversal, skip to the next higher-level key. 648246726Sattilio */ 649246726Sattilio if (slot == 0 && (rnode->rn_child[slot] == NULL || m != NULL)) { 650246726Sattilio if (rnode->rn_clev == 0 || vm_radix_declev(&index, 651246726Sattilio maplevels, rnode->rn_clev - 1) > 0) 652246726Sattilio break; 653246726Sattilio goto restart; 654226646Sjeff } 655246726Sattilio rnode = rnode->rn_child[slot]; 656226646Sjeff } 657226645Sattilio return (NULL); 658226645Sattilio} 659226645Sattilio 660226645Sattilio/* 661246726Sattilio * Remove the specified index from the tree. 662246726Sattilio * Panics if the key is not present. 663226645Sattilio */ 664236763Sattiliovoid 665238245Sattiliovm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) 666226645Sattilio{ 667246726Sattilio struct vm_radix_node *rnode, *parent; 668246726Sattilio vm_page_t m; 669246726Sattilio int i, slot; 670226645Sattilio 671246726Sattilio parent = NULL; 672246726Sattilio rnode = vm_radix_getroot(rtree); 673236760Sattilio for (;;) { 674246726Sattilio if (rnode == NULL) 675246726Sattilio panic("vm_radix_remove: impossible to locate the key"); 676246726Sattilio slot = vm_radix_slot(index, rnode->rn_clev); 677246726Sattilio m = vm_radix_node_page(rnode->rn_child[slot]); 678246726Sattilio if (m != NULL && m->pindex == index) { 679246726Sattilio rnode->rn_child[slot] = NULL; 680246726Sattilio rnode->rn_count--; 681246726Sattilio if (rnode->rn_count > 1) 682246726Sattilio break; 683246726Sattilio if (parent == NULL) { 684246726Sattilio if (rnode->rn_count == 0) { 685246726Sattilio vm_radix_node_put(rnode); 686246726Sattilio vm_radix_setroot(rtree, NULL); 687246726Sattilio } 688246726Sattilio break; 689246726Sattilio } 690246726Sattilio for (i = 0; i < VM_RADIX_COUNT; i++) 691246726Sattilio if (rnode->rn_child[i] != NULL) 692246726Sattilio break; 693246726Sattilio KASSERT(i != VM_RADIX_COUNT, 694246726Sattilio ("%s: invalid node configuration", __func__)); 695246726Sattilio slot = vm_radix_slot(index, parent->rn_clev); 696246726Sattilio KASSERT(parent->rn_child[slot] == rnode, 697246726Sattilio ("%s: invalid child value", __func__)); 698246726Sattilio parent->rn_child[slot] = rnode->rn_child[i]; 699246726Sattilio rnode->rn_count--; 700246726Sattilio rnode->rn_child[i] = NULL; 701246726Sattilio vm_radix_node_put(rnode); 702236760Sattilio break; 703236760Sattilio } 704246726Sattilio if (m != NULL && m->pindex != index) 705246726Sattilio panic("%s: invalid key found", __func__); 706246726Sattilio parent = rnode; 707246726Sattilio rnode = rnode->rn_child[slot]; 708236760Sattilio } 709226645Sattilio} 710226645Sattilio 711226645Sattilio/* 712226980Sattilio * Remove and free all the nodes from the radix tree. 713226980Sattilio * This function is recrusive but there is a tight control on it as the 714226980Sattilio * maximum depth of the tree is fixed. 715226980Sattilio */ 716226980Sattiliovoid 717226980Sattiliovm_radix_reclaim_allnodes(struct vm_radix *rtree) 718226980Sattilio{ 719226980Sattilio struct vm_radix_node *root; 720226980Sattilio 721246726Sattilio root = vm_radix_getroot(rtree); 722246726Sattilio if (root == NULL) 723226980Sattilio return; 724246726Sattilio vm_radix_reclaim_allnodes_int(root); 725246726Sattilio vm_radix_setroot(rtree, NULL); 726226980Sattilio} 727246726Sattilio 728246726Sattilio#ifdef DDB 729246726Sattilio/* 730246837Sattilio * Show details about the given radix node. 731246726Sattilio */ 732246726SattilioDB_SHOW_COMMAND(radixnode, db_show_radixnode) 733246726Sattilio{ 734246726Sattilio struct vm_radix_node *rnode; 735246726Sattilio int i; 736246726Sattilio 737246726Sattilio if (!have_addr) 738246726Sattilio return; 739246726Sattilio rnode = (struct vm_radix_node *)addr; 740246726Sattilio db_printf("radixnode %p, owner %jx, children count %u, level %u:\n", 741246726Sattilio (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count, 742246726Sattilio rnode->rn_clev); 743246726Sattilio for (i = 0; i < VM_RADIX_COUNT; i++) 744246726Sattilio if (rnode->rn_child[i] != NULL) 745246726Sattilio db_printf("slot: %d, val: %p, page: %p, clev: %d\n", 746246726Sattilio i, (void *)rnode->rn_child[i], 747246726Sattilio (void *)vm_radix_node_page(rnode->rn_child[i]), 748246726Sattilio rnode->rn_clev); 749246726Sattilio} 750246726Sattilio#endif /* DDB */ 751