vm_radix.c revision 226873
1/* 2 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 3 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 */ 28 29 30/* 31 * Radix tree implementation. 32 */ 33 34#include <sys/cdefs.h> 35 36#include <sys/param.h> 37#include <sys/conf.h> 38#include <sys/systm.h> 39#include <sys/kernel.h> 40#include <sys/malloc.h> 41#include <sys/queue.h> 42#include <sys/param.h> 43#include <sys/lock.h> 44#include <sys/mutex.h> 45#include <sys/ktr.h> 46#include <vm/uma.h> 47#include <vm/vm.h> 48#include <vm/vm_extern.h> 49#include <vm/vm_kern.h> 50#include <vm/vm_page.h> 51#include <vm/vm_radix.h> 52#include <vm/vm_object.h> 53 54#include <sys/kdb.h> 55 56CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE); 57 58static uma_zone_t vm_radix_node_zone; 59 60#if 0 61static void * 62vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait) 63{ 64 vm_offset_t addr; 65 vm_page_t m; 66 int pflags; 67 68 /* Inform UMA that this allocator uses kernel_map. */ 69 *flags = UMA_SLAB_KERNEL; 70 71 pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ; 72 73 /* 74 * As kmem_alloc_nofault() can however fail, let just assume that 75 * M_NOWAIT is on and act accordingly. 76 */ 77 pflags |= ((wait & M_USE_RESERVE) != 0) ? VM_ALLOC_INTERRUPT : 78 VM_ALLOC_SYSTEM; 79 if ((wait & M_ZERO) != 0) 80 pflags |= VM_ALLOC_ZERO; 81 addr = kmem_alloc_nofault(kernel_map, size); 82 if (addr == 0) 83 return (NULL); 84 85 /* Just one page allocation is assumed here. */ 86 m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS), 87 pflags); 88 if (m == NULL) { 89 kmem_free(kernel_map, addr, size); 90 return (NULL); 91 } 92 if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0) 93 pmap_zero_page(m); 94 pmap_qenter(addr, &m, 1); 95 return ((void *)addr); 96} 97 98static void 99vm_radix_node_zone_freef(void *item, int size, uint8_t flags) 100{ 101 vm_page_t m; 102 vm_offset_t voitem; 103 104 MPASS((flags & UMA_SLAB_KERNEL) != 0); 105 106 /* Just one page allocation is assumed here. */ 107 voitem = (vm_offset_t)item; 108 m = PHYS_TO_VM_PAGE(pmap_kextract(voitem)); 109 pmap_qremove(voitem, 1); 110 vm_page_free(m); 111 kmem_free(kernel_map, voitem, size); 112} 113 114static void 115init_vm_radix_alloc(void *dummy __unused) 116{ 117 118 uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf); 119 uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef); 120} 121SYSINIT(vm_radix, SI_SUB_KMEM, SI_ORDER_SECOND, init_vm_radix_alloc, NULL); 122#endif 123 124/* 125 * Radix node zone destructor. 126 */ 127#ifdef INVARIANTS 128static void 129vm_radix_node_zone_dtor(void *mem, int size, void *arg) 130{ 131 struct vm_radix_node *rnode; 132 133 rnode = mem; 134 KASSERT(rnode->rn_count == 0, 135 ("vm_radix_node_put: Freeing a node with %d children\n", 136 rnode->rn_count)); 137} 138#endif 139 140/* 141 * Allocate a radix node. Initializes all elements to 0. 142 */ 143static struct vm_radix_node * 144vm_radix_node_get(void) 145{ 146 147 return (uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO)); 148} 149 150/* 151 * Free radix node. 152 */ 153static void 154vm_radix_node_put(struct vm_radix_node *rnode) 155{ 156 157 uma_zfree(vm_radix_node_zone, rnode); 158} 159 160/* 161 * Return the position in the array for a given level. 162 */ 163static inline int 164vm_radix_slot(vm_pindex_t index, int level) 165{ 166 167 return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK); 168} 169 170void 171vm_radix_init(void) 172{ 173 174 vm_radix_node_zone = uma_zcreate("RADIX NODE", 175 sizeof(struct vm_radix_node), NULL, 176#ifdef INVARIANTS 177 vm_radix_node_zone_dtor, 178#else 179 NULL, 180#endif 181 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM); 182} 183 184/* 185 * Inserts the key-value pair in to the radix tree. Returns errno. 186 * Panics if the key already exists. 187 */ 188int 189vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, void *val) 190{ 191 struct vm_radix_node *rnode; 192 int slot; 193 int level; 194 195 CTR3(KTR_VM, 196 "insert: tree %p, index %p, val %p", rtree, (void *)index, val); 197 if (index == -1) 198 panic("vm_radix_insert: -1 is not a valid index.\n"); 199 /* 200 * Increase the height by adding nodes at the root until 201 * there is sufficient space. 202 */ 203 while (rtree->rt_height == 0 || 204 index > VM_RADIX_MAX(rtree->rt_height)) { 205 CTR3(KTR_VM, "insert: expanding %jd > %jd height %d", 206 index, VM_RADIX_MAX(rtree->rt_height), rtree->rt_height); 207 /* 208 * Only allocate tree nodes if they are needed. 209 */ 210 if (rtree->rt_root == NULL || rtree->rt_root->rn_count != 0) { 211 rnode = vm_radix_node_get(); 212 if (rnode == NULL) 213 return (ENOMEM); 214 if (rtree->rt_root) { 215 rnode->rn_child[0] = rtree->rt_root; 216 rnode->rn_count = 1; 217 } 218 rtree->rt_root = rnode; 219 } 220 rtree->rt_height++; 221 KASSERT(rtree->rt_height <= VM_RADIX_LIMIT, 222 ("vm_radix_insert: Tree %p height %d too tall", rtree, 223 rtree->rt_height)); 224 } 225 226 /* Now that the tree is tall enough, fill in the path to the index. */ 227 rnode = rtree->rt_root; 228 for (level = rtree->rt_height - 1; level > 0; level--) { 229 slot = vm_radix_slot(index, level); 230 /* Add the required intermidiate nodes. */ 231 if (rnode->rn_child[slot] == NULL) { 232 rnode->rn_child[slot] = vm_radix_node_get(); 233 if (rnode->rn_child[slot] == NULL) 234 return (ENOMEM); 235 rnode->rn_count++; 236 } 237 CTR5(KTR_VM, 238 "insert: tree %p, index %p, level %d, slot %d, child %p", 239 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 240 rnode = rnode->rn_child[slot]; 241 } 242 243 slot = vm_radix_slot(index, level); 244 CTR5(KTR_VM, "insert: tree %p, index %p, level %d, slot %d, child %p", 245 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 246 KASSERT(rnode->rn_child[slot] == NULL, 247 ("vm_radix_insert: Duplicate value %p at index: %lu\n", 248 rnode->rn_child[slot], (u_long)index)); 249 rnode->rn_child[slot] = val; 250 rnode->rn_count++; 251 252 return 0; 253} 254 255/* 256 * Returns the value stored at the index. If the index is not present 257 * NULL is returned. 258 */ 259void * 260vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) 261{ 262 struct vm_radix_node *rnode; 263 int slot; 264 int level; 265 266 if (index > VM_RADIX_MAX(rtree->rt_height)) 267 return NULL; 268 level = rtree->rt_height - 1; 269 rnode = rtree->rt_root; 270 while (rnode) { 271 slot = vm_radix_slot(index, level); 272 CTR5(KTR_VM, 273 "lookup: tree %p, index %p, level %d, slot %d, child %p", 274 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 275 if (level == 0) 276 return rnode->rn_child[slot]; 277 rnode = rnode->rn_child[slot]; 278 level--; 279 } 280 CTR2(KTR_VM, "lookup: tree %p, index %p failed", rtree, (void *)index); 281 282 return NULL; 283} 284 285/* 286 * Looks up as many as cnt values between start and end inclusive, and stores 287 * them in the caller allocated array out. The next index can be used to 288 * restart the scan. This optimizes forward scans in the tree. 289 */ 290int 291vm_radix_lookupn(struct vm_radix *rtree, vm_pindex_t start, 292 vm_pindex_t end, void **out, int cnt, vm_pindex_t *next) 293{ 294 struct vm_radix_node *rnode; 295 struct vm_radix_node *child; 296 vm_pindex_t max; 297 vm_pindex_t inc; 298 int slot; 299 int level; 300 void *val; 301 int outidx; 302 int loops = 0; 303 304 CTR3(KTR_VM, "lookupn: tree %p, start %p, end %p", 305 rtree, (void *)start, (void *)end); 306 outidx = 0; 307 max = VM_RADIX_MAX(rtree->rt_height); 308 if (start > max) 309 return 0; 310 if (end > max || end == 0) 311 end = max; 312restart: 313 loops++; 314 if (loops > 1000) 315 panic("vm_radix_lookupn: looping %d\n", loops); 316 /* 317 * Search the tree from the top for any leaf node holding an index 318 * between start and end. 319 */ 320 level = rtree->rt_height - 1; 321 rnode = rtree->rt_root; 322 while (rnode) { 323 slot = vm_radix_slot(start, level); 324 CTR5(KTR_VM, 325 "lookupn: tree %p, index %p, level %d, slot %d, child %p", 326 rtree, (void *)start, level, slot, rnode->rn_child[slot]); 327 if (level == 0) 328 break; 329 /* 330 * If we don't have an exact match we must start our search 331 * from the next leaf and adjust our index appropriately. 332 */ 333 if ((child = rnode->rn_child[slot]) == NULL) { 334 /* 335 * Calculate how much to increment our index by 336 * based on the tree level. We must truncate the 337 * lower bits to start from the begnning of the next 338 * leaf. 339 */ 340 inc = 1LL << (level * VM_RADIX_WIDTH); 341 start &= ~VM_RADIX_MAX(level); 342 start += inc; 343 slot++; 344 CTR5(KTR_VM, 345 "lookupn: start %p end %p inc %d mask 0x%lX slot %d", 346 (void *)start, (void *)end, inc, ~VM_RADIX_MAX(level), slot); 347 for (; slot < VM_RADIX_COUNT && start <= end; 348 slot++, start += inc) { 349 child = rnode->rn_child[slot]; 350 if (child) 351 break; 352 } 353 } 354 rnode = child; 355 level--; 356 } 357 if (rnode == NULL) { 358 /* 359 * If we still have another range to search, start looking 360 * from the next node. Otherwise, return what we've already 361 * found. The loop above has already adjusted start to the 362 * beginning of the next node. 363 * 364 * Detect start wrapping back to 0 and return in this case. 365 */ 366 if (start <= end && start != 0) 367 goto restart; 368 goto out; 369 } 370 for (; outidx < cnt && slot < VM_RADIX_COUNT && start <= end; 371 slot++, start++) { 372 val = rnode->rn_child[slot]; 373 if (val == NULL) 374 continue; 375 out[outidx++] = val; 376 } 377 /* 378 * Go fetch the next page to fill the requested number of pages 379 * otherwise the caller will simply call us again to fulfill the 380 * same request after the structures are pushed out of cache. 381 */ 382 if (outidx < cnt && start <= end) 383 goto restart; 384 385out: 386 *next = start; 387 388 return (outidx); 389} 390 391/* 392 * Look up any entry at a position less than or equal to index. 393 */ 394void * 395vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index) 396{ 397 struct vm_radix_node *rnode; 398 struct vm_radix_node *child; 399 vm_pindex_t max; 400 vm_pindex_t inc; 401 int slot; 402 int level; 403 int loops = 0; 404 405 CTR2(KTR_VM, 406 "lookup_le: tree %p, index %p", rtree, (void *)index); 407 if (rtree->rt_root == NULL) 408 return (NULL); 409 max = VM_RADIX_MAX(rtree->rt_height); 410 if (index > max || index == 0) 411 index = max; 412restart: 413 loops++; 414 if (loops > 1000) 415 panic("vm_radix_looku_le: looping %d\n", loops); 416 /* 417 * Search the tree from the top for any leaf node holding an index 418 * lower than 'index'. 419 */ 420 level = rtree->rt_height - 1; 421 rnode = rtree->rt_root; 422 while (rnode) { 423 slot = vm_radix_slot(index, level); 424 CTR5(KTR_VM, 425 "lookup_le: tree %p, index %p, level %d, slot %d, child %p", 426 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 427 if (level == 0) 428 break; 429 /* 430 * If we don't have an exact match we must start our search 431 * from the next leaf and adjust our index appropriately. 432 */ 433 if ((child = rnode->rn_child[slot]) == NULL) { 434 /* 435 * Calculate how much to decrement our index by 436 * based on the tree level. We must set the 437 * lower bits to start from the end of the next 438 * leaf. 439 */ 440 inc = 1LL << (level * VM_RADIX_WIDTH); 441 index |= VM_RADIX_MAX(level); 442 index -= inc; 443 slot--; 444 CTR4(KTR_VM, 445 "lookup_le: start %p inc %ld mask 0x%lX slot %d", 446 (void *)index, inc, VM_RADIX_MAX(level), slot); 447 for (; slot >= 0; slot--, index -= inc) { 448 child = rnode->rn_child[slot]; 449 if (child) 450 break; 451 } 452 } 453 rnode = child; 454 level--; 455 } 456 if (rnode) { 457 for (; slot >= 0; slot--, index--) { 458 if (rnode->rn_child[slot]) 459 return (rnode->rn_child[slot]); 460 } 461 } 462 if (index != -1) 463 goto restart; 464 return (NULL); 465} 466 467/* 468 * Remove the specified index from the tree. If possible the height of the 469 * tree is adjusted after deletion. The value stored at index is returned 470 * panics if the key is not present. 471 */ 472void * 473vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) 474{ 475 struct vm_radix_node *stack[VM_RADIX_LIMIT]; 476 struct vm_radix_node *rnode; 477 void *val; 478 int level; 479 int slot; 480 481 KASSERT(index <= VM_RADIX_MAX(rtree->rt_height), 482 ("vm_radix_remove: %p index %jd out of range %jd.", 483 rtree, index, VM_RADIX_MAX(rtree->rt_height))); 484 val = NULL; 485 rnode = rtree->rt_root; 486 level = rtree->rt_height - 1; 487 /* 488 * Find the node and record the path in stack. 489 */ 490 while (level && rnode) { 491 stack[level] = rnode; 492 slot = vm_radix_slot(index, level); 493 rnode = rnode->rn_child[slot]; 494 CTR5(KTR_VM, 495 "remove: tree %p, index %p, level %d, slot %d, child %p", 496 rtree, (void *)index, level, slot, rnode->rn_child[slot]); 497 level--; 498 } 499 slot = vm_radix_slot(index, 0); 500 KASSERT(rnode != NULL && rnode->rn_child[slot] != NULL, 501 ("vm_radix_remove: index %jd not present in the tree.\n", index)); 502 503 val = rnode->rn_child[slot]; 504 for (;;) { 505 rnode->rn_child[slot] = NULL; 506 rnode->rn_count--; 507 if (rnode->rn_count > 0) 508 break; 509 vm_radix_node_put(rnode); 510 if (rnode == rtree->rt_root) { 511 rtree->rt_root = NULL; 512 rtree->rt_height = 0; 513 break; 514 } 515 rnode = stack[++level]; 516 slot = vm_radix_slot(index, level); 517 518 } 519 return (val); 520} 521 522/* 523 * Attempts to reduce the height of the tree. 524 */ 525void 526vm_radix_shrink(struct vm_radix *rtree) 527{ 528 struct vm_radix_node *tmp; 529 530 if (rtree->rt_root == NULL) 531 return; 532 533 /* Adjust the height of the tree. */ 534 while (rtree->rt_root->rn_count == 1 && 535 rtree->rt_root->rn_child[0] != NULL) { 536 tmp = rtree->rt_root; 537 rtree->rt_root = tmp->rn_child[0]; 538 rtree->rt_height--; 539 tmp->rn_count--; 540 vm_radix_node_put(tmp); 541 } 542 /* Finally see if we have an empty tree. */ 543 if (rtree->rt_root->rn_count == 0) { 544 vm_radix_node_put(rtree->rt_root); 545 rtree->rt_root = NULL; 546 rtree->rt_height = 0; 547 } 548} 549