1226645Sattilio/* 2246726Sattilio * Copyright (c) 2013 EMC Corp. 3226930Sjeff * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 4226645Sattilio * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 5226645Sattilio * All rights reserved. 6226645Sattilio * 7226645Sattilio * Redistribution and use in source and binary forms, with or without 8226645Sattilio * modification, are permitted provided that the following conditions 9226645Sattilio * are met: 10226645Sattilio * 1. Redistributions of source code must retain the above copyright 11226645Sattilio * notice, this list of conditions and the following disclaimer. 12226645Sattilio * 2. Redistributions in binary form must reproduce the above copyright 13226645Sattilio * notice, this list of conditions and the following disclaimer in the 14226645Sattilio * documentation and/or other materials provided with the distribution. 15226645Sattilio * 16226645Sattilio * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17226645Sattilio * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18226645Sattilio * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19226645Sattilio * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20226645Sattilio * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21226645Sattilio * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22226645Sattilio * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23226645Sattilio * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24226645Sattilio * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25226645Sattilio * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26226645Sattilio * SUCH DAMAGE. 27226645Sattilio * 28226645Sattilio */ 29226645Sattilio 30226645Sattilio/* 31246726Sattilio * Path-compressed radix trie implementation. 32246726Sattilio * The following code is not generalized into a general purpose library 33246726Sattilio * because there are way too many parameters embedded that should really 34246726Sattilio * be decided by the library consumers. At the same time, consumers 35246726Sattilio * of this code must achieve highest possible performance. 36246726Sattilio * 37246726Sattilio * The implementation takes into account the following rationale: 38248424Sattilio * - Size of the nodes should be as small as possible but still big enough 39248424Sattilio * to avoid a large maximum depth for the trie. This is a balance 40248424Sattilio * between the necessity to not wire too much physical memory for the nodes 41248424Sattilio * and the necessity to avoid too much cache pollution during the trie 42248424Sattilio * operations. 43248424Sattilio * - There is not a huge bias toward the number of lookup operations over 44248424Sattilio * the number of insert and remove operations. This basically implies 45248424Sattilio * that optimizations supposedly helping one operation but hurting the 46248424Sattilio * other might be carefully evaluated. 47248424Sattilio * - On average not many nodes are expected to be fully populated, hence 48248424Sattilio * level compression may just complicate things. 49226645Sattilio */ 50226645Sattilio 51226645Sattilio#include <sys/cdefs.h> 52248448Sattilio__FBSDID("$FreeBSD: stable/11/sys/vm/vm_radix.c 327785 2018-01-10 20:39:26Z markj $"); 53226645Sattilio 54246726Sattilio#include "opt_ddb.h" 55246726Sattilio 56226645Sattilio#include <sys/param.h> 57226645Sattilio#include <sys/systm.h> 58226645Sattilio#include <sys/kernel.h> 59246840Sattilio#include <sys/vmmeter.h> 60246726Sattilio 61226873Sattilio#include <vm/uma.h> 62226645Sattilio#include <vm/vm.h> 63226876Sjeff#include <vm/vm_param.h> 64226873Sattilio#include <vm/vm_page.h> 65226645Sattilio#include <vm/vm_radix.h> 66226645Sattilio 67246726Sattilio#ifdef DDB 68246726Sattilio#include <ddb/ddb.h> 69246726Sattilio#endif 70226645Sattilio 71233034Sattilio/* 72247771Salc * These widths should allow the pointers to a node's children to fit within 73247771Salc * a single cache line. The extra levels from a narrow width should not be 74247771Salc * a problem thanks to path compression. 75233034Sattilio */ 76246726Sattilio#ifdef __LP64__ 77246726Sattilio#define VM_RADIX_WIDTH 4 78233034Sattilio#else 79246726Sattilio#define VM_RADIX_WIDTH 3 80233034Sattilio#endif 81233034Sattilio 82232326Sattilio#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH) 83232326Sattilio#define VM_RADIX_MASK (VM_RADIX_COUNT - 1) 84246726Sattilio#define VM_RADIX_LIMIT \ 85298482Spfg (howmany(sizeof(vm_pindex_t) * NBBY, VM_RADIX_WIDTH) - 1) 86232326Sattilio 87232326Sattilio/* Flag bits stored in node pointers. */ 88246726Sattilio#define VM_RADIX_ISLEAF 0x1 89246726Sattilio#define VM_RADIX_FLAGS 0x1 90246726Sattilio#define VM_RADIX_PAD VM_RADIX_FLAGS 91232326Sattilio 92246726Sattilio/* Returns one unit associated with specified level. */ 93246726Sattilio#define VM_RADIX_UNITLEVEL(lev) \ 94250520Salc ((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH)) 95232326Sattilio 96232326Sattiliostruct vm_radix_node { 97246726Sattilio vm_pindex_t rn_owner; /* Owner of record. */ 98246726Sattilio uint16_t rn_count; /* Valid children. */ 99246726Sattilio uint16_t rn_clev; /* Current level. */ 100249221Salc void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */ 101232326Sattilio}; 102232326Sattilio 103226873Sattiliostatic uma_zone_t vm_radix_node_zone; 104226645Sattilio 105246794Sattilio/* 106254141Sattilio * Allocate a radix node. 107246726Sattilio */ 108246726Sattiliostatic __inline struct vm_radix_node * 109246726Sattiliovm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel) 110226873Sattilio{ 111246726Sattilio struct vm_radix_node *rnode; 112226645Sattilio 113254141Sattilio rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO); 114247742Sattilio if (rnode == NULL) 115254141Sattilio return (NULL); 116246726Sattilio rnode->rn_owner = owner; 117246726Sattilio rnode->rn_count = count; 118246726Sattilio rnode->rn_clev = clevel; 119246726Sattilio return (rnode); 120246726Sattilio} 121226873Sattilio 122246726Sattilio/* 123246726Sattilio * Free radix node. 124246726Sattilio */ 125246726Sattiliostatic __inline void 126246726Sattiliovm_radix_node_put(struct vm_radix_node *rnode) 127246726Sattilio{ 128246726Sattilio 129246726Sattilio uma_zfree(vm_radix_node_zone, rnode); 130226873Sattilio} 131226873Sattilio 132246726Sattilio/* 133246726Sattilio * Return the position in the array for a given level. 134246726Sattilio */ 135246726Sattiliostatic __inline int 136246726Sattiliovm_radix_slot(vm_pindex_t index, uint16_t level) 137226873Sattilio{ 138226873Sattilio 139250520Salc return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK); 140226873Sattilio} 141226873Sattilio 142246726Sattilio/* Trims the key after the specified level. */ 143246726Sattiliostatic __inline vm_pindex_t 144246726Sattiliovm_radix_trimkey(vm_pindex_t index, uint16_t level) 145226873Sattilio{ 146246726Sattilio vm_pindex_t ret; 147226873Sattilio 148246726Sattilio ret = index; 149250520Salc if (level > 0) { 150250520Salc ret >>= level * VM_RADIX_WIDTH; 151250520Salc ret <<= level * VM_RADIX_WIDTH; 152246726Sattilio } 153246726Sattilio return (ret); 154226873Sattilio} 155226873Sattilio 156226645Sattilio/* 157246726Sattilio * Get the root node for a radix tree. 158226873Sattilio */ 159246726Sattiliostatic __inline struct vm_radix_node * 160246726Sattiliovm_radix_getroot(struct vm_radix *rtree) 161226873Sattilio{ 162226873Sattilio 163249221Salc return ((struct vm_radix_node *)rtree->rt_root); 164226873Sattilio} 165226873Sattilio 166226873Sattilio/* 167246726Sattilio * Set the root node for a radix tree. 168226645Sattilio */ 169246726Sattiliostatic __inline void 170246726Sattiliovm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode) 171226645Sattilio{ 172226645Sattilio 173246726Sattilio rtree->rt_root = (uintptr_t)rnode; 174226645Sattilio} 175226645Sattilio 176226645Sattilio/* 177248728Salc * Returns TRUE if the specified radix node is a leaf and FALSE otherwise. 178248728Salc */ 179248728Salcstatic __inline boolean_t 180248728Salcvm_radix_isleaf(struct vm_radix_node *rnode) 181248728Salc{ 182248728Salc 183248728Salc return (((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0); 184248728Salc} 185248728Salc 186248728Salc/* 187249038Salc * Returns the associated page extracted from rnode. 188226645Sattilio */ 189246726Sattiliostatic __inline vm_page_t 190249038Salcvm_radix_topage(struct vm_radix_node *rnode) 191226645Sattilio{ 192226645Sattilio 193249038Salc return ((vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS)); 194226645Sattilio} 195226645Sattilio 196226645Sattilio/* 197247771Salc * Adds the page as a child of the provided node. 198226645Sattilio */ 199246726Sattiliostatic __inline void 200246726Sattiliovm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev, 201246726Sattilio vm_page_t page) 202226645Sattilio{ 203246726Sattilio int slot; 204226645Sattilio 205246726Sattilio slot = vm_radix_slot(index, clev); 206246726Sattilio rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF); 207226645Sattilio} 208226645Sattilio 209233034Sattilio/* 210246726Sattilio * Returns the slot where two keys differ. 211246726Sattilio * It cannot accept 2 equal keys. 212233034Sattilio */ 213246726Sattiliostatic __inline uint16_t 214246726Sattiliovm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2) 215226873Sattilio{ 216246726Sattilio uint16_t clev; 217226873Sattilio 218246726Sattilio KASSERT(index1 != index2, ("%s: passing the same key value %jx", 219246726Sattilio __func__, (uintmax_t)index1)); 220233034Sattilio 221246726Sattilio index1 ^= index2; 222250520Salc for (clev = VM_RADIX_LIMIT;; clev--) 223250334Salc if (vm_radix_slot(index1, clev) != 0) 224246726Sattilio return (clev); 225226873Sattilio} 226226873Sattilio 227226645Sattilio/* 228246726Sattilio * Returns TRUE if it can be determined that key does not belong to the 229247771Salc * specified rnode. Otherwise, returns FALSE. 230226876Sjeff */ 231246726Sattiliostatic __inline boolean_t 232246726Sattiliovm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx) 233226876Sjeff{ 234226876Sjeff 235250520Salc if (rnode->rn_clev < VM_RADIX_LIMIT) { 236250520Salc idx = vm_radix_trimkey(idx, rnode->rn_clev + 1); 237249211Salc return (idx != rnode->rn_owner); 238246726Sattilio } 239246726Sattilio return (FALSE); 240226876Sjeff} 241226876Sjeff 242226876Sjeff/* 243247773Salc * Internal helper for vm_radix_reclaim_allnodes(). 244247769Salc * This function is recursive. 245246726Sattilio */ 246226980Sattiliostatic void 247246726Sattiliovm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode) 248226980Sattilio{ 249226980Sattilio int slot; 250226980Sattilio 251248684Salc KASSERT(rnode->rn_count <= VM_RADIX_COUNT, 252248684Salc ("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode)); 253248684Salc for (slot = 0; rnode->rn_count != 0; slot++) { 254226980Sattilio if (rnode->rn_child[slot] == NULL) 255226980Sattilio continue; 256248728Salc if (!vm_radix_isleaf(rnode->rn_child[slot])) 257246726Sattilio vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]); 258248431Salc rnode->rn_child[slot] = NULL; 259226980Sattilio rnode->rn_count--; 260226980Sattilio } 261226980Sattilio vm_radix_node_put(rnode); 262226980Sattilio} 263226980Sattilio 264246836Sattilio#ifdef INVARIANTS 265226876Sjeff/* 266246836Sattilio * Radix node zone destructor. 267246836Sattilio */ 268246836Sattiliostatic void 269246836Sattiliovm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused) 270246836Sattilio{ 271246836Sattilio struct vm_radix_node *rnode; 272248431Salc int slot; 273246836Sattilio 274246836Sattilio rnode = mem; 275246836Sattilio KASSERT(rnode->rn_count == 0, 276248431Salc ("vm_radix_node_put: rnode %p has %d children", rnode, 277246836Sattilio rnode->rn_count)); 278248431Salc for (slot = 0; slot < VM_RADIX_COUNT; slot++) 279248431Salc KASSERT(rnode->rn_child[slot] == NULL, 280248431Salc ("vm_radix_node_put: rnode %p has a child", rnode)); 281246836Sattilio} 282246836Sattilio#endif 283246836Sattilio 284254141Sattilio#ifndef UMA_MD_SMALL_ALLOC 285246836Sattilio/* 286254141Sattilio * Reserve the KVA necessary to satisfy the node allocation. 287254141Sattilio * This is mandatory in architectures not supporting direct 288254141Sattilio * mapping as they will need otherwise to carve into the kernel maps for 289254141Sattilio * every node allocation, resulting into deadlocks for consumers already 290254141Sattilio * working with kernel maps. 291248431Salc */ 292246794Sattiliostatic void 293254141Sattiliovm_radix_reserve_kva(void *arg __unused) 294246726Sattilio{ 295246726Sattilio 296249605Salc /* 297249605Salc * Calculate the number of reserved nodes, discounting the pages that 298249605Salc * are needed to store them. 299249605Salc */ 300254141Sattilio if (!uma_zone_reserve_kva(vm_radix_node_zone, 301263620Sbdrewery ((vm_paddr_t)vm_cnt.v_page_count * PAGE_SIZE) / (PAGE_SIZE + 302254141Sattilio sizeof(struct vm_radix_node)))) 303254141Sattilio panic("%s: unable to reserve KVA", __func__); 304247742Sattilio} 305267992ShselaskySYSINIT(vm_radix_reserve_kva, SI_SUB_KMEM, SI_ORDER_THIRD, 306254141Sattilio vm_radix_reserve_kva, NULL); 307254141Sattilio#endif 308247742Sattilio 309247742Sattilio/* 310247742Sattilio * Initialize the UMA slab zone. 311247742Sattilio */ 312247742Sattiliovoid 313321513Skibvm_radix_zinit(void) 314247742Sattilio{ 315247742Sattilio 316246726Sattilio vm_radix_node_zone = uma_zcreate("RADIX NODE", 317246726Sattilio sizeof(struct vm_radix_node), NULL, 318246726Sattilio#ifdef INVARIANTS 319246726Sattilio vm_radix_node_zone_dtor, 320246726Sattilio#else 321246726Sattilio NULL, 322246726Sattilio#endif 323254141Sattilio NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM); 324246726Sattilio} 325246726Sattilio 326246726Sattilio/* 327247771Salc * Inserts the key-value pair into the trie. 328226645Sattilio * Panics if the key already exists. 329226645Sattilio */ 330254141Sattilioint 331248428Salcvm_radix_insert(struct vm_radix *rtree, vm_page_t page) 332226645Sattilio{ 333248428Salc vm_pindex_t index, newind; 334249502Salc void **parentp; 335249502Salc struct vm_radix_node *rnode, *tmp; 336246726Sattilio vm_page_t m; 337236763Sattilio int slot; 338246726Sattilio uint16_t clev; 339226645Sattilio 340248428Salc index = page->pindex; 341248222Sattilio 342226645Sattilio /* 343246726Sattilio * The owner of record for root is not really important because it 344246726Sattilio * will never be used. 345226645Sattilio */ 346246726Sattilio rnode = vm_radix_getroot(rtree); 347246726Sattilio if (rnode == NULL) { 348249502Salc rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF; 349254141Sattilio return (0); 350246726Sattilio } 351249502Salc parentp = (void **)&rtree->rt_root; 352249502Salc for (;;) { 353249502Salc if (vm_radix_isleaf(rnode)) { 354249502Salc m = vm_radix_topage(rnode); 355246726Sattilio if (m->pindex == index) 356246726Sattilio panic("%s: key %jx is already present", 357246726Sattilio __func__, (uintmax_t)index); 358246726Sattilio clev = vm_radix_keydiff(m->pindex, index); 359246726Sattilio tmp = vm_radix_node_get(vm_radix_trimkey(index, 360250520Salc clev + 1), 2, clev); 361318716Smarkj if (tmp == NULL) 362254141Sattilio return (ENOMEM); 363249502Salc *parentp = tmp; 364246726Sattilio vm_radix_addpage(tmp, index, clev, page); 365246726Sattilio vm_radix_addpage(tmp, m->pindex, clev, m); 366254141Sattilio return (0); 367249502Salc } else if (vm_radix_keybarr(rnode, index)) 368249502Salc break; 369249502Salc slot = vm_radix_slot(index, rnode->rn_clev); 370226645Sattilio if (rnode->rn_child[slot] == NULL) { 371236760Sattilio rnode->rn_count++; 372246726Sattilio vm_radix_addpage(rnode, index, rnode->rn_clev, page); 373254141Sattilio return (0); 374246726Sattilio } 375249502Salc parentp = &rnode->rn_child[slot]; 376226645Sattilio rnode = rnode->rn_child[slot]; 377249502Salc } 378226645Sattilio 379246726Sattilio /* 380246726Sattilio * A new node is needed because the right insertion level is reached. 381246726Sattilio * Setup the new intermediate node and add the 2 children: the 382246726Sattilio * new object and the older edge. 383246726Sattilio */ 384249182Salc newind = rnode->rn_owner; 385249182Salc clev = vm_radix_keydiff(newind, index); 386254141Sattilio tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev); 387318716Smarkj if (tmp == NULL) 388254141Sattilio return (ENOMEM); 389249502Salc *parentp = tmp; 390249182Salc vm_radix_addpage(tmp, index, clev, page); 391246726Sattilio slot = vm_radix_slot(newind, clev); 392249182Salc tmp->rn_child[slot] = rnode; 393254141Sattilio return (0); 394226645Sattilio} 395226645Sattilio 396226645Sattilio/* 397254719Salc * Returns TRUE if the specified radix tree contains a single leaf and FALSE 398254719Salc * otherwise. 399254719Salc */ 400254719Salcboolean_t 401254719Salcvm_radix_is_singleton(struct vm_radix *rtree) 402254719Salc{ 403254719Salc struct vm_radix_node *rnode; 404254719Salc 405254719Salc rnode = vm_radix_getroot(rtree); 406254719Salc if (rnode == NULL) 407254719Salc return (FALSE); 408254719Salc return (vm_radix_isleaf(rnode)); 409254719Salc} 410254719Salc 411254719Salc/* 412247771Salc * Returns the value stored at the index. If the index is not present, 413226645Sattilio * NULL is returned. 414226645Sattilio */ 415246430Sattiliovm_page_t 416238245Sattiliovm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) 417226645Sattilio{ 418226645Sattilio struct vm_radix_node *rnode; 419246726Sattilio vm_page_t m; 420226645Sattilio int slot; 421226645Sattilio 422246726Sattilio rnode = vm_radix_getroot(rtree); 423246795Sattilio while (rnode != NULL) { 424249038Salc if (vm_radix_isleaf(rnode)) { 425249038Salc m = vm_radix_topage(rnode); 426246726Sattilio if (m->pindex == index) 427246726Sattilio return (m); 428246726Sattilio else 429249427Salc break; 430249427Salc } else if (vm_radix_keybarr(rnode, index)) 431249427Salc break; 432249427Salc slot = vm_radix_slot(index, rnode->rn_clev); 433249427Salc rnode = rnode->rn_child[slot]; 434226645Sattilio } 435246726Sattilio return (NULL); 436226645Sattilio} 437226645Sattilio 438226645Sattilio/* 439247771Salc * Look up the nearest entry at a position bigger than or equal to index. 440226645Sattilio */ 441246726Sattiliovm_page_t 442246726Sattiliovm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index) 443226645Sattilio{ 444250259Salc struct vm_radix_node *stack[VM_RADIX_LIMIT]; 445246726Sattilio vm_pindex_t inc; 446246726Sattilio vm_page_t m; 447249038Salc struct vm_radix_node *child, *rnode; 448246726Sattilio#ifdef INVARIANTS 449246726Sattilio int loops = 0; 450246726Sattilio#endif 451250259Salc int slot, tos; 452226645Sattilio 453249427Salc rnode = vm_radix_getroot(rtree); 454249427Salc if (rnode == NULL) 455249427Salc return (NULL); 456249427Salc else if (vm_radix_isleaf(rnode)) { 457249427Salc m = vm_radix_topage(rnode); 458249427Salc if (m->pindex >= index) 459249427Salc return (m); 460249427Salc else 461249427Salc return (NULL); 462249427Salc } 463250259Salc tos = 0; 464249427Salc for (;;) { 465246726Sattilio /* 466249986Salc * If the keys differ before the current bisection node, 467249986Salc * then the search key might rollback to the earliest 468249986Salc * available bisection node or to the smallest key 469249986Salc * in the current node (if the owner is bigger than the 470246726Sattilio * search key). 471246726Sattilio */ 472247770Salc if (vm_radix_keybarr(rnode, index)) { 473246726Sattilio if (index > rnode->rn_owner) { 474250259Salcascend: 475250259Salc KASSERT(++loops < 1000, 476250259Salc ("vm_radix_lookup_ge: too many loops")); 477250259Salc 478250259Salc /* 479250259Salc * Pop nodes from the stack until either the 480250259Salc * stack is empty or a node that could have a 481250259Salc * matching descendant is found. 482250259Salc */ 483250259Salc do { 484250259Salc if (tos == 0) 485250259Salc return (NULL); 486250259Salc rnode = stack[--tos]; 487250259Salc } while (vm_radix_slot(index, 488250259Salc rnode->rn_clev) == (VM_RADIX_COUNT - 1)); 489250259Salc 490250259Salc /* 491250259Salc * The following computation cannot overflow 492250259Salc * because index's slot at the current level 493250259Salc * is less than VM_RADIX_COUNT - 1. 494250259Salc */ 495250259Salc index = vm_radix_trimkey(index, 496250259Salc rnode->rn_clev); 497250259Salc index += VM_RADIX_UNITLEVEL(rnode->rn_clev); 498246726Sattilio } else 499249986Salc index = rnode->rn_owner; 500250259Salc KASSERT(!vm_radix_keybarr(rnode, index), 501250259Salc ("vm_radix_lookup_ge: keybarr failed")); 502246726Sattilio } 503246726Sattilio slot = vm_radix_slot(index, rnode->rn_clev); 504249038Salc child = rnode->rn_child[slot]; 505249038Salc if (vm_radix_isleaf(child)) { 506249038Salc m = vm_radix_topage(child); 507249038Salc if (m->pindex >= index) 508249038Salc return (m); 509249038Salc } else if (child != NULL) 510249038Salc goto descend; 511246726Sattilio 512226645Sattilio /* 513246726Sattilio * Look for an available edge or page within the current 514246726Sattilio * bisection node. 515246726Sattilio */ 516246726Sattilio if (slot < (VM_RADIX_COUNT - 1)) { 517246726Sattilio inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); 518246726Sattilio index = vm_radix_trimkey(index, rnode->rn_clev); 519249038Salc do { 520249038Salc index += inc; 521249038Salc slot++; 522249038Salc child = rnode->rn_child[slot]; 523249038Salc if (vm_radix_isleaf(child)) { 524249038Salc m = vm_radix_topage(child); 525249038Salc if (m->pindex >= index) 526249038Salc return (m); 527249038Salc } else if (child != NULL) 528249038Salc goto descend; 529249038Salc } while (slot < (VM_RADIX_COUNT - 1)); 530246726Sattilio } 531249038Salc KASSERT(child == NULL || vm_radix_isleaf(child), 532249038Salc ("vm_radix_lookup_ge: child is radix node")); 533230750Sattilio 534246726Sattilio /* 535250259Salc * If a page or edge bigger than the search slot is not found 536250259Salc * in the current node, ascend to the next higher-level node. 537246726Sattilio */ 538250259Salc goto ascend; 539249038Salcdescend: 540250520Salc KASSERT(rnode->rn_clev > 0, 541250259Salc ("vm_radix_lookup_ge: pushing leaf's parent")); 542250259Salc KASSERT(tos < VM_RADIX_LIMIT, 543250259Salc ("vm_radix_lookup_ge: stack overflow")); 544250259Salc stack[tos++] = rnode; 545249038Salc rnode = child; 546226645Sattilio } 547226645Sattilio} 548226645Sattilio 549226645Sattilio/* 550247771Salc * Look up the nearest entry at a position less than or equal to index. 551226645Sattilio */ 552246430Sattiliovm_page_t 553238245Sattiliovm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index) 554226645Sattilio{ 555250259Salc struct vm_radix_node *stack[VM_RADIX_LIMIT]; 556246726Sattilio vm_pindex_t inc; 557246726Sattilio vm_page_t m; 558249038Salc struct vm_radix_node *child, *rnode; 559246726Sattilio#ifdef INVARIANTS 560246726Sattilio int loops = 0; 561246726Sattilio#endif 562250259Salc int slot, tos; 563226645Sattilio 564249427Salc rnode = vm_radix_getroot(rtree); 565249427Salc if (rnode == NULL) 566249427Salc return (NULL); 567249427Salc else if (vm_radix_isleaf(rnode)) { 568249427Salc m = vm_radix_topage(rnode); 569249427Salc if (m->pindex <= index) 570249427Salc return (m); 571249427Salc else 572249427Salc return (NULL); 573249427Salc } 574250259Salc tos = 0; 575249427Salc for (;;) { 576226646Sjeff /* 577249986Salc * If the keys differ before the current bisection node, 578249986Salc * then the search key might rollback to the earliest 579249986Salc * available bisection node or to the largest key 580249986Salc * in the current node (if the owner is smaller than the 581246726Sattilio * search key). 582226646Sjeff */ 583247770Salc if (vm_radix_keybarr(rnode, index)) { 584246726Sattilio if (index > rnode->rn_owner) { 585249986Salc index = rnode->rn_owner + VM_RADIX_COUNT * 586250259Salc VM_RADIX_UNITLEVEL(rnode->rn_clev); 587249986Salc } else { 588250259Salcascend: 589250259Salc KASSERT(++loops < 1000, 590250259Salc ("vm_radix_lookup_le: too many loops")); 591250259Salc 592250259Salc /* 593250259Salc * Pop nodes from the stack until either the 594250259Salc * stack is empty or a node that could have a 595250259Salc * matching descendant is found. 596250259Salc */ 597250259Salc do { 598250259Salc if (tos == 0) 599250259Salc return (NULL); 600250259Salc rnode = stack[--tos]; 601250259Salc } while (vm_radix_slot(index, 602250259Salc rnode->rn_clev) == 0); 603250259Salc 604250259Salc /* 605250259Salc * The following computation cannot overflow 606250259Salc * because index's slot at the current level 607250259Salc * is greater than 0. 608250259Salc */ 609250259Salc index = vm_radix_trimkey(index, 610250259Salc rnode->rn_clev); 611249986Salc } 612250259Salc index--; 613250259Salc KASSERT(!vm_radix_keybarr(rnode, index), 614250259Salc ("vm_radix_lookup_le: keybarr failed")); 615246726Sattilio } 616246726Sattilio slot = vm_radix_slot(index, rnode->rn_clev); 617249038Salc child = rnode->rn_child[slot]; 618249038Salc if (vm_radix_isleaf(child)) { 619249038Salc m = vm_radix_topage(child); 620249038Salc if (m->pindex <= index) 621249038Salc return (m); 622249038Salc } else if (child != NULL) 623249038Salc goto descend; 624246726Sattilio 625246726Sattilio /* 626246726Sattilio * Look for an available edge or page within the current 627246726Sattilio * bisection node. 628246726Sattilio */ 629246726Sattilio if (slot > 0) { 630246726Sattilio inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); 631246726Sattilio index |= inc - 1; 632249038Salc do { 633249038Salc index -= inc; 634249038Salc slot--; 635249038Salc child = rnode->rn_child[slot]; 636249038Salc if (vm_radix_isleaf(child)) { 637249038Salc m = vm_radix_topage(child); 638249038Salc if (m->pindex <= index) 639249038Salc return (m); 640249038Salc } else if (child != NULL) 641249038Salc goto descend; 642249038Salc } while (slot > 0); 643226646Sjeff } 644249038Salc KASSERT(child == NULL || vm_radix_isleaf(child), 645249038Salc ("vm_radix_lookup_le: child is radix node")); 646246726Sattilio 647246726Sattilio /* 648250259Salc * If a page or edge smaller than the search slot is not found 649250259Salc * in the current node, ascend to the next higher-level node. 650246726Sattilio */ 651250259Salc goto ascend; 652249038Salcdescend: 653250520Salc KASSERT(rnode->rn_clev > 0, 654250259Salc ("vm_radix_lookup_le: pushing leaf's parent")); 655250259Salc KASSERT(tos < VM_RADIX_LIMIT, 656250259Salc ("vm_radix_lookup_le: stack overflow")); 657250259Salc stack[tos++] = rnode; 658249038Salc rnode = child; 659226646Sjeff } 660226645Sattilio} 661226645Sattilio 662226645Sattilio/* 663318716Smarkj * Remove the specified index from the trie, and return the value stored at 664318716Smarkj * that index. If the index is not present, return NULL. 665226645Sattilio */ 666318716Smarkjvm_page_t 667238245Sattiliovm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) 668226645Sattilio{ 669246726Sattilio struct vm_radix_node *rnode, *parent; 670246726Sattilio vm_page_t m; 671246726Sattilio int i, slot; 672226645Sattilio 673249502Salc rnode = vm_radix_getroot(rtree); 674249502Salc if (vm_radix_isleaf(rnode)) { 675249502Salc m = vm_radix_topage(rnode); 676249502Salc if (m->pindex != index) 677318716Smarkj return (NULL); 678249502Salc vm_radix_setroot(rtree, NULL); 679318716Smarkj return (m); 680249502Salc } 681246726Sattilio parent = NULL; 682236760Sattilio for (;;) { 683246726Sattilio if (rnode == NULL) 684318716Smarkj return (NULL); 685246726Sattilio slot = vm_radix_slot(index, rnode->rn_clev); 686249038Salc if (vm_radix_isleaf(rnode->rn_child[slot])) { 687249038Salc m = vm_radix_topage(rnode->rn_child[slot]); 688249038Salc if (m->pindex != index) 689318716Smarkj return (NULL); 690246726Sattilio rnode->rn_child[slot] = NULL; 691246726Sattilio rnode->rn_count--; 692246726Sattilio if (rnode->rn_count > 1) 693318716Smarkj return (m); 694246726Sattilio for (i = 0; i < VM_RADIX_COUNT; i++) 695246726Sattilio if (rnode->rn_child[i] != NULL) 696246726Sattilio break; 697246726Sattilio KASSERT(i != VM_RADIX_COUNT, 698246726Sattilio ("%s: invalid node configuration", __func__)); 699249502Salc if (parent == NULL) 700249502Salc vm_radix_setroot(rtree, rnode->rn_child[i]); 701249502Salc else { 702249502Salc slot = vm_radix_slot(index, parent->rn_clev); 703249502Salc KASSERT(parent->rn_child[slot] == rnode, 704249502Salc ("%s: invalid child value", __func__)); 705249502Salc parent->rn_child[slot] = rnode->rn_child[i]; 706249502Salc } 707246726Sattilio rnode->rn_count--; 708246726Sattilio rnode->rn_child[i] = NULL; 709246726Sattilio vm_radix_node_put(rnode); 710318716Smarkj return (m); 711236760Sattilio } 712246726Sattilio parent = rnode; 713246726Sattilio rnode = rnode->rn_child[slot]; 714236760Sattilio } 715226645Sattilio} 716226645Sattilio 717226645Sattilio/* 718226980Sattilio * Remove and free all the nodes from the radix tree. 719247771Salc * This function is recursive but there is a tight control on it as the 720226980Sattilio * maximum depth of the tree is fixed. 721226980Sattilio */ 722226980Sattiliovoid 723226980Sattiliovm_radix_reclaim_allnodes(struct vm_radix *rtree) 724226980Sattilio{ 725226980Sattilio struct vm_radix_node *root; 726226980Sattilio 727246726Sattilio root = vm_radix_getroot(rtree); 728246726Sattilio if (root == NULL) 729226980Sattilio return; 730248684Salc vm_radix_setroot(rtree, NULL); 731249502Salc if (!vm_radix_isleaf(root)) 732249502Salc vm_radix_reclaim_allnodes_int(root); 733226980Sattilio} 734246726Sattilio 735254141Sattilio/* 736259107Salc * Replace an existing page in the trie with another one. 737259107Salc * Panics if there is not an old page in the trie at the new page's index. 738254141Sattilio */ 739254141Sattiliovm_page_t 740259107Salcvm_radix_replace(struct vm_radix *rtree, vm_page_t newpage) 741254141Sattilio{ 742254141Sattilio struct vm_radix_node *rnode; 743254141Sattilio vm_page_t m; 744259107Salc vm_pindex_t index; 745254141Sattilio int slot; 746254141Sattilio 747259107Salc index = newpage->pindex; 748254141Sattilio rnode = vm_radix_getroot(rtree); 749254141Sattilio if (rnode == NULL) 750254141Sattilio panic("%s: replacing page on an empty trie", __func__); 751254141Sattilio if (vm_radix_isleaf(rnode)) { 752254141Sattilio m = vm_radix_topage(rnode); 753254141Sattilio if (m->pindex != index) 754254141Sattilio panic("%s: original replacing root key not found", 755254141Sattilio __func__); 756254141Sattilio rtree->rt_root = (uintptr_t)newpage | VM_RADIX_ISLEAF; 757254141Sattilio return (m); 758254141Sattilio } 759254141Sattilio for (;;) { 760254141Sattilio slot = vm_radix_slot(index, rnode->rn_clev); 761254141Sattilio if (vm_radix_isleaf(rnode->rn_child[slot])) { 762254141Sattilio m = vm_radix_topage(rnode->rn_child[slot]); 763254141Sattilio if (m->pindex == index) { 764254141Sattilio rnode->rn_child[slot] = 765254141Sattilio (void *)((uintptr_t)newpage | 766254141Sattilio VM_RADIX_ISLEAF); 767254141Sattilio return (m); 768254141Sattilio } else 769254141Sattilio break; 770254141Sattilio } else if (rnode->rn_child[slot] == NULL || 771254141Sattilio vm_radix_keybarr(rnode->rn_child[slot], index)) 772254141Sattilio break; 773254141Sattilio rnode = rnode->rn_child[slot]; 774254141Sattilio } 775254141Sattilio panic("%s: original replacing page not found", __func__); 776254141Sattilio} 777254141Sattilio 778327785Smarkjvoid 779327785Smarkjvm_radix_wait(void) 780327785Smarkj{ 781327785Smarkj uma_zwait(vm_radix_node_zone); 782327785Smarkj} 783327785Smarkj 784246726Sattilio#ifdef DDB 785246726Sattilio/* 786246837Sattilio * Show details about the given radix node. 787246726Sattilio */ 788246726SattilioDB_SHOW_COMMAND(radixnode, db_show_radixnode) 789246726Sattilio{ 790246726Sattilio struct vm_radix_node *rnode; 791246726Sattilio int i; 792246726Sattilio 793246726Sattilio if (!have_addr) 794246726Sattilio return; 795246726Sattilio rnode = (struct vm_radix_node *)addr; 796246726Sattilio db_printf("radixnode %p, owner %jx, children count %u, level %u:\n", 797246726Sattilio (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count, 798246726Sattilio rnode->rn_clev); 799246726Sattilio for (i = 0; i < VM_RADIX_COUNT; i++) 800246726Sattilio if (rnode->rn_child[i] != NULL) 801246726Sattilio db_printf("slot: %d, val: %p, page: %p, clev: %d\n", 802246726Sattilio i, (void *)rnode->rn_child[i], 803249038Salc vm_radix_isleaf(rnode->rn_child[i]) ? 804249038Salc vm_radix_topage(rnode->rn_child[i]) : NULL, 805246726Sattilio rnode->rn_clev); 806246726Sattilio} 807246726Sattilio#endif /* DDB */ 808