vm_radix.c (250520) | vm_radix.c (254141) |
---|---|
1/* 2 * Copyright (c) 2013 EMC Corp. 3 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 4 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions --- 35 unchanged lines hidden (view full) --- 44 * the number of insert and remove operations. This basically implies 45 * that optimizations supposedly helping one operation but hurting the 46 * other might be carefully evaluated. 47 * - On average not many nodes are expected to be fully populated, hence 48 * level compression may just complicate things. 49 */ 50 51#include <sys/cdefs.h> | 1/* 2 * Copyright (c) 2013 EMC Corp. 3 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 4 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions --- 35 unchanged lines hidden (view full) --- 44 * the number of insert and remove operations. This basically implies 45 * that optimizations supposedly helping one operation but hurting the 46 * other might be carefully evaluated. 47 * - On average not many nodes are expected to be fully populated, hence 48 * level compression may just complicate things. 49 */ 50 51#include <sys/cdefs.h> |
52__FBSDID("$FreeBSD: head/sys/vm/vm_radix.c 250520 2013-05-11 18:01:41Z alc $"); | 52__FBSDID("$FreeBSD: head/sys/vm/vm_radix.c 254141 2013-08-09 11:28:55Z attilio $"); |
53 54#include "opt_ddb.h" 55 56#include <sys/param.h> 57#include <sys/systm.h> 58#include <sys/kernel.h> 59#include <sys/vmmeter.h> 60 --- 37 unchanged lines hidden (view full) --- 98 uint16_t rn_count; /* Valid children. */ 99 uint16_t rn_clev; /* Current level. */ 100 void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */ 101}; 102 103static uma_zone_t vm_radix_node_zone; 104 105/* | 53 54#include "opt_ddb.h" 55 56#include <sys/param.h> 57#include <sys/systm.h> 58#include <sys/kernel.h> 59#include <sys/vmmeter.h> 60 --- 37 unchanged lines hidden (view full) --- 98 uint16_t rn_count; /* Valid children. */ 99 uint16_t rn_clev; /* Current level. */ 100 void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */ 101}; 102 103static uma_zone_t vm_radix_node_zone; 104 105/* |
106 * Allocate a radix node. Pre-allocation should ensure that the request 107 * will always be satisfied. | 106 * Allocate a radix node. |
108 */ 109static __inline struct vm_radix_node * 110vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel) 111{ 112 struct vm_radix_node *rnode; 113 | 107 */ 108static __inline struct vm_radix_node * 109vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel) 110{ 111 struct vm_radix_node *rnode; 112 |
114 rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT); 115 116 /* 117 * The required number of nodes should already be pre-allocated 118 * by vm_radix_prealloc(). However, UMA can hold a few nodes 119 * in per-CPU buckets, which will not be accessible by the 120 * current CPU. Thus, the allocation could return NULL when 121 * the pre-allocated pool is close to exhaustion. Anyway, 122 * in practice this should never occur because a new node 123 * is not always required for insert. Thus, the pre-allocated 124 * pool should have some extra pages that prevent this from 125 * becoming a problem. 126 */ | 113 rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO); |
127 if (rnode == NULL) | 114 if (rnode == NULL) |
128 panic("%s: uma_zalloc() returned NULL for a new node", 129 __func__); | 115 return (NULL); |
130 rnode->rn_owner = owner; 131 rnode->rn_count = count; 132 rnode->rn_clev = clevel; 133 return (rnode); 134} 135 136/* 137 * Free radix node. --- 152 unchanged lines hidden (view full) --- 290 ("vm_radix_node_put: rnode %p has %d children", rnode, 291 rnode->rn_count)); 292 for (slot = 0; slot < VM_RADIX_COUNT; slot++) 293 KASSERT(rnode->rn_child[slot] == NULL, 294 ("vm_radix_node_put: rnode %p has a child", rnode)); 295} 296#endif 297 | 116 rnode->rn_owner = owner; 117 rnode->rn_count = count; 118 rnode->rn_clev = clevel; 119 return (rnode); 120} 121 122/* 123 * Free radix node. --- 152 unchanged lines hidden (view full) --- 276 ("vm_radix_node_put: rnode %p has %d children", rnode, 277 rnode->rn_count)); 278 for (slot = 0; slot < VM_RADIX_COUNT; slot++) 279 KASSERT(rnode->rn_child[slot] == NULL, 280 ("vm_radix_node_put: rnode %p has a child", rnode)); 281} 282#endif 283 |
284#ifndef UMA_MD_SMALL_ALLOC |
|
298/* | 285/* |
299 * Radix node zone initializer. | 286 * Reserve the KVA necessary to satisfy the node allocation. 287 * This is mandatory in architectures not supporting direct 288 * mapping as they will need otherwise to carve into the kernel maps for 289 * every node allocation, resulting into deadlocks for consumers already 290 * working with kernel maps. |
300 */ | 291 */ |
301static int 302vm_radix_node_zone_init(void *mem, int size __unused, int flags __unused) 303{ 304 struct vm_radix_node *rnode; 305 306 rnode = mem; 307 memset(rnode->rn_child, 0, sizeof(rnode->rn_child)); 308 return (0); 309} 310 311/* 312 * Pre-allocate intermediate nodes from the UMA slab zone. 313 */ | |
314static void | 292static void |
315vm_radix_prealloc(void *arg __unused) | 293vm_radix_reserve_kva(void *arg __unused) |
316{ | 294{ |
317 int nodes; | |
318 319 /* 320 * Calculate the number of reserved nodes, discounting the pages that 321 * are needed to store them. 322 */ | 295 296 /* 297 * Calculate the number of reserved nodes, discounting the pages that 298 * are needed to store them. 299 */ |
323 nodes = ((vm_paddr_t)cnt.v_page_count * PAGE_SIZE) / (PAGE_SIZE + 324 sizeof(struct vm_radix_node)); 325 if (!uma_zone_reserve_kva(vm_radix_node_zone, nodes)) 326 panic("%s: unable to create new zone", __func__); 327 uma_prealloc(vm_radix_node_zone, nodes); | 300 if (!uma_zone_reserve_kva(vm_radix_node_zone, 301 ((vm_paddr_t)cnt.v_page_count * PAGE_SIZE) / (PAGE_SIZE + 302 sizeof(struct vm_radix_node)))) 303 panic("%s: unable to reserve KVA", __func__); |
328} | 304} |
329SYSINIT(vm_radix_prealloc, SI_SUB_KMEM, SI_ORDER_SECOND, vm_radix_prealloc, 330 NULL); | 305SYSINIT(vm_radix_reserve_kva, SI_SUB_KMEM, SI_ORDER_SECOND, 306 vm_radix_reserve_kva, NULL); 307#endif |
331 332/* 333 * Initialize the UMA slab zone. 334 * Until vm_radix_prealloc() is called, the zone will be served by the 335 * UMA boot-time pre-allocated pool of pages. 336 */ 337void 338vm_radix_init(void) 339{ 340 341 vm_radix_node_zone = uma_zcreate("RADIX NODE", 342 sizeof(struct vm_radix_node), NULL, 343#ifdef INVARIANTS 344 vm_radix_node_zone_dtor, 345#else 346 NULL, 347#endif | 308 309/* 310 * Initialize the UMA slab zone. 311 * Until vm_radix_prealloc() is called, the zone will be served by the 312 * UMA boot-time pre-allocated pool of pages. 313 */ 314void 315vm_radix_init(void) 316{ 317 318 vm_radix_node_zone = uma_zcreate("RADIX NODE", 319 sizeof(struct vm_radix_node), NULL, 320#ifdef INVARIANTS 321 vm_radix_node_zone_dtor, 322#else 323 NULL, 324#endif |
348 vm_radix_node_zone_init, NULL, VM_RADIX_PAD, UMA_ZONE_VM | 349 UMA_ZONE_NOFREE); | 325 NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM); |
350} 351 352/* 353 * Inserts the key-value pair into the trie. 354 * Panics if the key already exists. 355 */ | 326} 327 328/* 329 * Inserts the key-value pair into the trie. 330 * Panics if the key already exists. 331 */ |
356void | 332int |
357vm_radix_insert(struct vm_radix *rtree, vm_page_t page) 358{ 359 vm_pindex_t index, newind; 360 void **parentp; 361 struct vm_radix_node *rnode, *tmp; 362 vm_page_t m; 363 int slot; 364 uint16_t clev; 365 366 index = page->pindex; 367 | 333vm_radix_insert(struct vm_radix *rtree, vm_page_t page) 334{ 335 vm_pindex_t index, newind; 336 void **parentp; 337 struct vm_radix_node *rnode, *tmp; 338 vm_page_t m; 339 int slot; 340 uint16_t clev; 341 342 index = page->pindex; 343 |
344restart: 345 |
|
368 /* 369 * The owner of record for root is not really important because it 370 * will never be used. 371 */ 372 rnode = vm_radix_getroot(rtree); 373 if (rnode == NULL) { 374 rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF; | 346 /* 347 * The owner of record for root is not really important because it 348 * will never be used. 349 */ 350 rnode = vm_radix_getroot(rtree); 351 if (rnode == NULL) { 352 rtree->rt_root = (uintptr_t)page | VM_RADIX_ISLEAF; |
375 return; | 353 return (0); |
376 } 377 parentp = (void **)&rtree->rt_root; 378 for (;;) { 379 if (vm_radix_isleaf(rnode)) { 380 m = vm_radix_topage(rnode); 381 if (m->pindex == index) 382 panic("%s: key %jx is already present", 383 __func__, (uintmax_t)index); 384 clev = vm_radix_keydiff(m->pindex, index); | 354 } 355 parentp = (void **)&rtree->rt_root; 356 for (;;) { 357 if (vm_radix_isleaf(rnode)) { 358 m = vm_radix_topage(rnode); 359 if (m->pindex == index) 360 panic("%s: key %jx is already present", 361 __func__, (uintmax_t)index); 362 clev = vm_radix_keydiff(m->pindex, index); |
363 364 /* 365 * During node allocation the trie that is being 366 * walked can be modified because of recursing radix 367 * trie operations. 368 * If this is the case, the recursing functions signal 369 * such situation and the insert operation must 370 * start from scratch again. 371 * The freed radix node will then be in the UMA 372 * caches very likely to avoid the same situation 373 * to happen. 374 */ 375 rtree->rt_flags |= RT_INSERT_INPROG; |
|
385 tmp = vm_radix_node_get(vm_radix_trimkey(index, 386 clev + 1), 2, clev); | 376 tmp = vm_radix_node_get(vm_radix_trimkey(index, 377 clev + 1), 2, clev); |
378 rtree->rt_flags &= ~RT_INSERT_INPROG; 379 if (tmp == NULL) { 380 rtree->rt_flags &= ~RT_TRIE_MODIFIED; 381 return (ENOMEM); 382 } 383 if ((rtree->rt_flags & RT_TRIE_MODIFIED) != 0) { 384 rtree->rt_flags &= ~RT_TRIE_MODIFIED; 385 tmp->rn_count = 0; 386 vm_radix_node_put(tmp); 387 goto restart; 388 } |
|
387 *parentp = tmp; 388 vm_radix_addpage(tmp, index, clev, page); 389 vm_radix_addpage(tmp, m->pindex, clev, m); | 389 *parentp = tmp; 390 vm_radix_addpage(tmp, index, clev, page); 391 vm_radix_addpage(tmp, m->pindex, clev, m); |
390 return; | 392 return (0); |
391 } else if (vm_radix_keybarr(rnode, index)) 392 break; 393 slot = vm_radix_slot(index, rnode->rn_clev); 394 if (rnode->rn_child[slot] == NULL) { 395 rnode->rn_count++; 396 vm_radix_addpage(rnode, index, rnode->rn_clev, page); | 393 } else if (vm_radix_keybarr(rnode, index)) 394 break; 395 slot = vm_radix_slot(index, rnode->rn_clev); 396 if (rnode->rn_child[slot] == NULL) { 397 rnode->rn_count++; 398 vm_radix_addpage(rnode, index, rnode->rn_clev, page); |
397 return; | 399 return (0); |
398 } 399 parentp = &rnode->rn_child[slot]; 400 rnode = rnode->rn_child[slot]; 401 } 402 403 /* 404 * A new node is needed because the right insertion level is reached. 405 * Setup the new intermediate node and add the 2 children: the 406 * new object and the older edge. 407 */ 408 newind = rnode->rn_owner; 409 clev = vm_radix_keydiff(newind, index); | 400 } 401 parentp = &rnode->rn_child[slot]; 402 rnode = rnode->rn_child[slot]; 403 } 404 405 /* 406 * A new node is needed because the right insertion level is reached. 407 * Setup the new intermediate node and add the 2 children: the 408 * new object and the older edge. 409 */ 410 newind = rnode->rn_owner; 411 clev = vm_radix_keydiff(newind, index); |
410 tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, 411 clev); | 412 413 /* See the comments above. */ 414 rtree->rt_flags |= RT_INSERT_INPROG; 415 tmp = vm_radix_node_get(vm_radix_trimkey(index, clev + 1), 2, clev); 416 rtree->rt_flags &= ~RT_INSERT_INPROG; 417 if (tmp == NULL) { 418 rtree->rt_flags &= ~RT_TRIE_MODIFIED; 419 return (ENOMEM); 420 } 421 if ((rtree->rt_flags & RT_TRIE_MODIFIED) != 0) { 422 rtree->rt_flags &= ~RT_TRIE_MODIFIED; 423 tmp->rn_count = 0; 424 vm_radix_node_put(tmp); 425 goto restart; 426 } |
412 *parentp = tmp; 413 vm_radix_addpage(tmp, index, clev, page); 414 slot = vm_radix_slot(newind, clev); 415 tmp->rn_child[slot] = rnode; | 427 *parentp = tmp; 428 vm_radix_addpage(tmp, index, clev, page); 429 slot = vm_radix_slot(newind, clev); 430 tmp->rn_child[slot] = rnode; |
431 return (0); |
|
416} 417 418/* 419 * Returns the value stored at the index. If the index is not present, 420 * NULL is returned. 421 */ 422vm_page_t 423vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) --- 248 unchanged lines hidden (view full) --- 672 */ 673void 674vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) 675{ 676 struct vm_radix_node *rnode, *parent; 677 vm_page_t m; 678 int i, slot; 679 | 432} 433 434/* 435 * Returns the value stored at the index. If the index is not present, 436 * NULL is returned. 437 */ 438vm_page_t 439vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) --- 248 unchanged lines hidden (view full) --- 688 */ 689void 690vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) 691{ 692 struct vm_radix_node *rnode, *parent; 693 vm_page_t m; 694 int i, slot; 695 |
696 /* 697 * Detect if a page is going to be removed from a trie which is 698 * already undergoing another trie operation. 699 * Right now this is only possible for vm_radix_remove() recursing 700 * into vm_radix_insert(). 701 * If this is the case, the caller must be notified about this 702 * situation. It will also takecare to update the RT_TRIE_MODIFIED 703 * accordingly. 704 * The RT_TRIE_MODIFIED bit is set here because the remove operation 705 * will always succeed. 706 */ 707 if ((rtree->rt_flags & RT_INSERT_INPROG) != 0) 708 rtree->rt_flags |= RT_TRIE_MODIFIED; 709 |
|
680 rnode = vm_radix_getroot(rtree); 681 if (vm_radix_isleaf(rnode)) { 682 m = vm_radix_topage(rnode); 683 if (m->pindex != index) 684 panic("%s: invalid key found", __func__); 685 vm_radix_setroot(rtree, NULL); 686 return; 687 } --- 38 unchanged lines hidden (view full) --- 726 * This function is recursive but there is a tight control on it as the 727 * maximum depth of the tree is fixed. 728 */ 729void 730vm_radix_reclaim_allnodes(struct vm_radix *rtree) 731{ 732 struct vm_radix_node *root; 733 | 710 rnode = vm_radix_getroot(rtree); 711 if (vm_radix_isleaf(rnode)) { 712 m = vm_radix_topage(rnode); 713 if (m->pindex != index) 714 panic("%s: invalid key found", __func__); 715 vm_radix_setroot(rtree, NULL); 716 return; 717 } --- 38 unchanged lines hidden (view full) --- 756 * This function is recursive but there is a tight control on it as the 757 * maximum depth of the tree is fixed. 758 */ 759void 760vm_radix_reclaim_allnodes(struct vm_radix *rtree) 761{ 762 struct vm_radix_node *root; 763 |
764 KASSERT((rtree->rt_flags & RT_INSERT_INPROG) == 0, 765 ("vm_radix_reclaim_allnodes: unexpected trie recursion")); 766 |
|
734 root = vm_radix_getroot(rtree); 735 if (root == NULL) 736 return; 737 vm_radix_setroot(rtree, NULL); 738 if (!vm_radix_isleaf(root)) 739 vm_radix_reclaim_allnodes_int(root); 740} 741 | 767 root = vm_radix_getroot(rtree); 768 if (root == NULL) 769 return; 770 vm_radix_setroot(rtree, NULL); 771 if (!vm_radix_isleaf(root)) 772 vm_radix_reclaim_allnodes_int(root); 773} 774 |
775/* 776 * Replace an existing page into the trie with another one. 777 * Panics if the replacing page is not present or if the new page has an 778 * invalid key. 779 */ 780vm_page_t 781vm_radix_replace(struct vm_radix *rtree, vm_page_t newpage, vm_pindex_t index) 782{ 783 struct vm_radix_node *rnode; 784 vm_page_t m; 785 int slot; 786 787 KASSERT(newpage->pindex == index, ("%s: newpage index invalid", 788 __func__)); 789 790 rnode = vm_radix_getroot(rtree); 791 if (rnode == NULL) 792 panic("%s: replacing page on an empty trie", __func__); 793 if (vm_radix_isleaf(rnode)) { 794 m = vm_radix_topage(rnode); 795 if (m->pindex != index) 796 panic("%s: original replacing root key not found", 797 __func__); 798 rtree->rt_root = (uintptr_t)newpage | VM_RADIX_ISLEAF; 799 return (m); 800 } 801 for (;;) { 802 slot = vm_radix_slot(index, rnode->rn_clev); 803 if (vm_radix_isleaf(rnode->rn_child[slot])) { 804 m = vm_radix_topage(rnode->rn_child[slot]); 805 if (m->pindex == index) { 806 rnode->rn_child[slot] = 807 (void *)((uintptr_t)newpage | 808 VM_RADIX_ISLEAF); 809 return (m); 810 } else 811 break; 812 } else if (rnode->rn_child[slot] == NULL || 813 vm_radix_keybarr(rnode->rn_child[slot], index)) 814 break; 815 rnode = rnode->rn_child[slot]; 816 } 817 panic("%s: original replacing page not found", __func__); 818} 819 |
|
742#ifdef DDB 743/* 744 * Show details about the given radix node. 745 */ 746DB_SHOW_COMMAND(radixnode, db_show_radixnode) 747{ 748 struct vm_radix_node *rnode; 749 int i; --- 16 unchanged lines hidden --- | 820#ifdef DDB 821/* 822 * Show details about the given radix node. 823 */ 824DB_SHOW_COMMAND(radixnode, db_show_radixnode) 825{ 826 struct vm_radix_node *rnode; 827 int i; --- 16 unchanged lines hidden --- |