vm_radix.c (246430) | vm_radix.c (246726) |
---|---|
1/* | 1/* |
2 * Copyright (c) 2013 EMC Corp. |
|
2 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 3 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright --- 11 unchanged lines hidden (view full) --- 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 */ 28 | 3 * Copyright (c) 2011 Jeffrey Roberson <jeff@freebsd.org> 4 * Copyright (c) 2008 Mayur Shardul <mayur.shardul@gmail.com> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright --- 11 unchanged lines hidden (view full) --- 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 */ 29 |
29 | |
30/* | 30/* |
31 * Radix tree implementation. | 31 * Path-compressed radix trie implementation. 32 * The following code is not generalized into a general purpose library 33 * because there are way too many parameters embedded that should really 34 * be decided by the library consumers. At the same time, consumers 35 * of this code must achieve highest possible performance. 36 * 37 * The implementation takes into account the following rationale: 38 * - Size of the nodes might be as small as possible. 39 * - There is no bias toward lookup operations over inserts or removes, 40 * and vice-versa. 41 * - In average there are not many complete levels, than level 42 * compression may just complicate things. |
32 */ 33 34#include <sys/cdefs.h> 35 | 43 */ 44 45#include <sys/cdefs.h> 46 |
47#include "opt_ddb.h" 48 |
|
36#include <sys/param.h> 37#include <sys/conf.h> 38#include <sys/systm.h> 39#include <sys/kernel.h> 40#include <sys/malloc.h> 41#include <sys/queue.h> | 49#include <sys/param.h> 50#include <sys/conf.h> 51#include <sys/systm.h> 52#include <sys/kernel.h> 53#include <sys/malloc.h> 54#include <sys/queue.h> |
42#include <sys/param.h> | |
43#include <sys/lock.h> 44#include <sys/mutex.h> | 55#include <sys/lock.h> 56#include <sys/mutex.h> |
45#include <sys/ktr.h> | 57 |
46#include <vm/uma.h> 47#include <vm/vm.h> 48#include <vm/vm_param.h> 49#include <vm/vm_extern.h> 50#include <vm/vm_kern.h> 51#include <vm/vm_page.h> | 58#include <vm/uma.h> 59#include <vm/vm.h> 60#include <vm/vm_param.h> 61#include <vm/vm_extern.h> 62#include <vm/vm_kern.h> 63#include <vm/vm_page.h> |
52#ifndef UMA_MD_SMALL_ALLOC 53#include <vm/vm_map.h> 54#endif | |
55#include <vm/vm_radix.h> | 64#include <vm/vm_radix.h> |
56#include <vm/vm_object.h> | |
57 | 65 |
58#include <sys/kdb.h> | 66#ifdef DDB 67#include <ddb/ddb.h> 68#endif |
59 | 69 |
60#ifndef UMA_MD_SMALL_ALLOC 61#define VM_RADIX_RNODE_MAP_SCALE (1024 * 1024 / 2) 62#define VM_RADIX_WIDTH 4 63 | |
64/* | 70/* |
65 * Bits of height in root. 66 * The mask of smaller power of 2 containing VM_RADIX_LIMIT. | 71 * Such sizes should permit to keep node children contained into a single 72 * cache-line, or to at least not span many of those. 73 * In particular, sparse tries should however be compressed properly and 74 * then make some extra-levels not a big deal. |
67 */ | 75 */ |
68#define VM_RADIX_HEIGHT 0x1f | 76#ifdef __LP64__ 77#define VM_RADIX_WIDTH 4 |
69#else | 78#else |
70#define VM_RADIX_WIDTH 5 71 72/* See the comment above. */ 73#define VM_RADIX_HEIGHT 0xf | 79#define VM_RADIX_WIDTH 3 |
74#endif 75 76#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH) 77#define VM_RADIX_MASK (VM_RADIX_COUNT - 1) | 80#endif 81 82#define VM_RADIX_COUNT (1 << VM_RADIX_WIDTH) 83#define VM_RADIX_MASK (VM_RADIX_COUNT - 1) |
78#define VM_RADIX_MAXVAL ((vm_pindex_t)-1) 79#define VM_RADIX_LIMIT howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) | 84#define VM_RADIX_LIMIT \ 85 (howmany((sizeof(vm_pindex_t) * NBBY), VM_RADIX_WIDTH) - 1) |
80 81/* Flag bits stored in node pointers. */ | 86 87/* Flag bits stored in node pointers. */ |
82#define VM_RADIX_FLAGS 0x3 | 88#define VM_RADIX_ISLEAF 0x1 89#define VM_RADIX_FLAGS 0x1 90#define VM_RADIX_PAD VM_RADIX_FLAGS |
83 | 91 |
84/* Calculates maximum value for a tree of height h. */ 85#define VM_RADIX_MAX(h) \ 86 ((h) == VM_RADIX_LIMIT ? VM_RADIX_MAXVAL : \ 87 (((vm_pindex_t)1 << ((h) * VM_RADIX_WIDTH)) - 1)) | 92/* Returns one unit associated with specified level. */ 93#define VM_RADIX_UNITLEVEL(lev) \ 94 ((vm_pindex_t)1 << ((VM_RADIX_LIMIT - (lev)) * VM_RADIX_WIDTH)) |
88 | 95 |
89/* 90 * On 32-bits architectures KTR cannot handle 64-bits values. 91 * Add macros for splitting in 32-bits quantity and provide format strings. 92 * Note that braces are not used because they would break compilation. 93 * Also, note that arguments are cast to u_long in order to follow KTR 94 * convention. 95 */ 96#ifdef KTR 97#define KFRMT64(x) __STRING(x)"l 0x%08lx, "__STRING(x)"h 0x%08lx" 98#define KSPLT64L(x) ((u_long)((x) & 0xFFFFFFFF)) 99#define KSPLT64H(x) ((u_long)(((x) >> 32) & 0xFFFFFFFF)) 100#endif 101 102CTASSERT(VM_RADIX_HEIGHT >= VM_RADIX_LIMIT); 103 | |
104struct vm_radix_node { 105 void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */ | 96struct vm_radix_node { 97 void *rn_child[VM_RADIX_COUNT]; /* Child nodes. */ |
106 volatile uint32_t rn_count; /* Valid children. */ | 98 vm_pindex_t rn_owner; /* Owner of record. */ 99 uint16_t rn_count; /* Valid children. */ 100 uint16_t rn_clev; /* Current level. */ |
107}; 108 | 101}; 102 |
109CTASSERT(sizeof(struct vm_radix_node) < PAGE_SIZE); 110 | |
111static uma_zone_t vm_radix_node_zone; 112 | 103static uma_zone_t vm_radix_node_zone; 104 |
113#ifndef UMA_MD_SMALL_ALLOC 114static vm_map_t rnode_map; 115static u_long rnode_map_scale; 116 117static void * 118vm_radix_node_zone_allocf(uma_zone_t zone, int size, uint8_t *flags, int wait) | 105#ifdef INVARIANTS 106/* 107 * Radix node zone destructor. 108 */ 109static void 110vm_radix_node_zone_dtor(void *mem, int size __unused, void *arg __unused) |
119{ | 111{ |
120 vm_offset_t addr; 121 vm_page_t m; 122 int pflags; | 112 struct vm_radix_node *rnode; |
123 | 113 |
124 /* Inform UMA that this allocator uses rnode_map. */ 125 *flags = UMA_SLAB_KERNEL; | 114 rnode = mem; 115 KASSERT(rnode->rn_count == 0, 116 ("vm_radix_node_put: Freeing node %p with %d children\n", mem, 117 rnode->rn_count)); 118} 119#endif |
126 | 120 |
127 pflags = VM_ALLOC_WIRED | VM_ALLOC_NOOBJ; | 121/* 122 * Allocate a radix node. Pre-allocation ensures that the request will be 123 * always successfully satisfied. 124 */ 125static __inline struct vm_radix_node * 126vm_radix_node_get(vm_pindex_t owner, uint16_t count, uint16_t clevel) 127{ 128 struct vm_radix_node *rnode; |
128 | 129 |
130 rnode = uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO); 131 |
|
129 /* | 132 /* |
130 * As kmem_alloc_nofault() can however fail, let just assume that 131 * M_NOWAIT is on and act accordingly. | 133 * The required number of nodes might be already correctly 134 * pre-allocated in vm_radix_init(). However, UMA can reserve few 135 * nodes on per-cpu specific buckets, which will not be accessible 136 * from the curcpu. The allocation could then return NULL when the 137 * pre-allocation pool is close to be exhausted. 138 * Anyway, in practice this should never be a problem because a new 139 * node is not always required for insert, thus the pre-allocation 140 * pool should already have some extra-pages that indirectly deal with 141 * this situation. |
132 */ | 142 */ |
133 pflags |= ((wait & M_USE_RESERVE) != 0) ? VM_ALLOC_INTERRUPT : 134 VM_ALLOC_SYSTEM; 135 if ((wait & M_ZERO) != 0) 136 pflags |= VM_ALLOC_ZERO; 137 addr = kmem_alloc_nofault(rnode_map, size); 138 if (addr == 0) 139 return (NULL); 140 141 /* Just one page allocation is assumed here. */ 142 m = vm_page_alloc(NULL, OFF_TO_IDX(addr - VM_MIN_KERNEL_ADDRESS), 143 pflags); 144 if (m == NULL) { 145 kmem_free(rnode_map, addr, size); 146 return (NULL); 147 } 148 if ((wait & M_ZERO) != 0 && (m->flags & PG_ZERO) == 0) 149 pmap_zero_page(m); 150 pmap_qenter(addr, &m, 1); 151 return ((void *)addr); | 143 if (rnode == NULL) 144 panic("%s: uma_zalloc() returned NULL for a new node", 145 __func__); 146 rnode->rn_owner = owner; 147 rnode->rn_count = count; 148 rnode->rn_clev = clevel; 149 return (rnode); |
152} 153 | 150} 151 |
154static void 155vm_radix_node_zone_freef(void *item, int size, uint8_t flags) | 152/* 153 * Free radix node. 154 */ 155static __inline void 156vm_radix_node_put(struct vm_radix_node *rnode) |
156{ | 157{ |
157 vm_page_t m; 158 vm_offset_t voitem; | |
159 | 158 |
160 MPASS((flags & UMA_SLAB_KERNEL) != 0); | 159 uma_zfree(vm_radix_node_zone, rnode); 160} |
161 | 161 |
162 /* Just one page allocation is assumed here. */ 163 voitem = (vm_offset_t)item; 164 m = PHYS_TO_VM_PAGE(pmap_kextract(voitem)); 165 pmap_qremove(voitem, 1); 166 vm_page_lock(m); 167 vm_page_unwire(m, 0); 168 vm_page_free(m); 169 vm_page_unlock(m); 170 kmem_free(rnode_map, voitem, size); | 162/* 163 * Return the position in the array for a given level. 164 */ 165static __inline int 166vm_radix_slot(vm_pindex_t index, uint16_t level) 167{ 168 169 return ((index >> ((VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH)) & 170 VM_RADIX_MASK); |
171} 172 | 171} 172 |
173static void 174init_vm_radix_alloc(void *dummy __unused) | 173/* Trims the key after the specified level. */ 174static __inline vm_pindex_t 175vm_radix_trimkey(vm_pindex_t index, uint16_t level) |
175{ | 176{ |
177 vm_pindex_t ret; |
|
176 | 178 |
177 uma_zone_set_max(vm_radix_node_zone, rnode_map_scale); 178 uma_zone_set_allocf(vm_radix_node_zone, vm_radix_node_zone_allocf); 179 uma_zone_set_freef(vm_radix_node_zone, vm_radix_node_zone_freef); | 179 ret = index; 180 if (level < VM_RADIX_LIMIT) { 181 ret >>= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH; 182 ret <<= (VM_RADIX_LIMIT - level) * VM_RADIX_WIDTH; 183 } 184 return (ret); |
180} | 185} |
181SYSINIT(vm_radix, SI_SUB_KMEM, SI_ORDER_SECOND, init_vm_radix_alloc, NULL); 182#endif | |
183 184/* | 186 187/* |
185 * Radix node zone destructor. | 188 * Get the root node for a radix tree. |
186 */ | 189 */ |
187#ifdef INVARIANTS 188static void 189vm_radix_node_zone_dtor(void *mem, int size, void *arg) | 190static __inline struct vm_radix_node * 191vm_radix_getroot(struct vm_radix *rtree) |
190{ | 192{ |
191 struct vm_radix_node *rnode; | |
192 | 193 |
193 rnode = mem; 194 KASSERT(rnode->rn_count == 0, 195 ("vm_radix_node_put: Freeing a node with %d children\n", 196 rnode->rn_count)); | 194 return ((struct vm_radix_node *)(rtree->rt_root & ~VM_RADIX_FLAGS)); |
197} | 195} |
198#endif | |
199 200/* | 196 197/* |
201 * Allocate a radix node. Initializes all elements to 0. | 198 * Set the root node for a radix tree. |
202 */ | 199 */ |
203static __inline struct vm_radix_node * 204vm_radix_node_get(void) | 200static __inline void 201vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode) |
205{ 206 | 202{ 203 |
207 return (uma_zalloc(vm_radix_node_zone, M_NOWAIT | M_ZERO)); | 204 rtree->rt_root = (uintptr_t)rnode; |
208} 209 210/* | 205} 206 207/* |
211 * Free radix node. | 208 * Returns the associated page extracted from rnode if available, 209 * NULL otherwise. |
212 */ | 210 */ |
213static __inline void 214vm_radix_node_put(struct vm_radix_node *rnode) | 211static __inline vm_page_t 212vm_radix_node_page(struct vm_radix_node *rnode) |
215{ 216 | 213{ 214 |
217 uma_zfree(vm_radix_node_zone, rnode); | 215 return ((((uintptr_t)rnode & VM_RADIX_ISLEAF) != 0) ? 216 (vm_page_t)((uintptr_t)rnode & ~VM_RADIX_FLAGS) : NULL); |
218} 219 220/* | 217} 218 219/* |
221 * Return the position in the array for a given level. | 220 * Adds the page as a child of provided node. |
222 */ | 221 */ |
223static __inline int 224vm_radix_slot(vm_pindex_t index, int level) | 222static __inline void 223vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev, 224 vm_page_t page) |
225{ | 225{ |
226 int slot; |
|
226 | 227 |
227 return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK); | 228 slot = vm_radix_slot(index, clev); 229 rnode->rn_child[slot] = (void *)((uintptr_t)page | VM_RADIX_ISLEAF); |
228} 229 230/* | 230} 231 232/* |
231 * Initialize the radix node submap (for architectures not supporting 232 * direct-mapping) and the radix node zone. 233 * 234 * WITNESS reports a lock order reversal, for architectures not 235 * supporting direct-mapping, between the "system map" lock 236 * and the "vm object" lock. This is because the well established ordering 237 * "system map" -> "vm object" is not honoured in this case as allocating 238 * from the radix node submap ends up adding a mapping entry to it, meaning 239 * it is necessary to lock the submap. However, the radix node submap is 240 * a leaf and self-contained, thus a deadlock cannot happen here and 241 * adding MTX_NOWITNESS to all map locks would be largerly sub-optimal. | 233 * Returns the slot where two keys differ. 234 * It cannot accept 2 equal keys. |
242 */ | 235 */ |
243void 244vm_radix_init(void) | 236static __inline uint16_t 237vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2) |
245{ | 238{ |
246#ifndef UMA_MD_SMALL_ALLOC 247 vm_offset_t maxaddr, minaddr; | 239 uint16_t clev; |
248 | 240 |
249 rnode_map_scale = VM_RADIX_RNODE_MAP_SCALE; 250 TUNABLE_ULONG_FETCH("hw.rnode_map_scale", &rnode_map_scale); 251 rnode_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, 252 rnode_map_scale * sizeof(struct vm_radix_node), FALSE); 253 rnode_map->system_map = 1; 254#endif | 241 KASSERT(index1 != index2, ("%s: passing the same key value %jx", 242 __func__, (uintmax_t)index1)); |
255 | 243 |
256 vm_radix_node_zone = uma_zcreate("RADIX NODE", 257 sizeof(struct vm_radix_node), NULL, 258#ifdef INVARIANTS 259 vm_radix_node_zone_dtor, 260#else 261 NULL, 262#endif 263 NULL, NULL, VM_RADIX_HEIGHT, UMA_ZONE_VM); | 244 index1 ^= index2; 245 for (clev = 0; clev <= VM_RADIX_LIMIT ; clev++) 246 if (vm_radix_slot(index1, clev)) 247 return (clev); 248 panic("%s: it might have not reached this point", __func__); 249 return (0); |
264} 265 266/* | 250} 251 252/* |
267 * Extract the root node and height from a radix tree with a single load. | 253 * Returns TRUE if it can be determined that key does not belong to the 254 * specified rnode. FALSE otherwise. |
268 */ | 255 */ |
269static __inline int 270vm_radix_height(struct vm_radix *rtree, struct vm_radix_node **rnode) | 256static __inline boolean_t 257vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx) |
271{ | 258{ |
272 uintptr_t root; 273 int height; | |
274 | 259 |
275 root = rtree->rt_root; 276 height = root & VM_RADIX_HEIGHT; 277 *rnode = (struct vm_radix_node *)(root - height); 278 return (height); | 260 if (rnode->rn_clev > 0) { 261 idx = vm_radix_trimkey(idx, rnode->rn_clev - 1); 262 idx -= rnode->rn_owner; 263 if (idx != 0) 264 return (TRUE); 265 } 266 return (FALSE); |
279} 280 | 267} 268 |
281 | |
282/* | 269/* |
283 * Set the root node and height for a radix tree. | 270 * Adjusts the idx key to the first upper level available, based on a valid 271 * initial level and map of available levels. 272 * Returns a value bigger than 0 to signal that there are not valid levels 273 * available. |
284 */ | 274 */ |
285static inline void 286vm_radix_setroot(struct vm_radix *rtree, struct vm_radix_node *rnode, 287 int height) | 275static __inline int 276vm_radix_addlev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev) |
288{ | 277{ |
289 uintptr_t root; | 278 vm_pindex_t wrapidx; |
290 | 279 |
291 root = (uintptr_t)rnode | height; 292 rtree->rt_root = root; | 280 for (; levels[ilev] == FALSE || 281 vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1); ilev--) 282 if (ilev == 0) 283 break; 284 KASSERT(ilev > 0 || levels[0] == TRUE, 285 ("%s: levels back-scanning problem", __func__)); 286 if (ilev == 0 && vm_radix_slot(*idx, ilev) == (VM_RADIX_COUNT - 1)) 287 return (1); 288 wrapidx = *idx; 289 *idx = vm_radix_trimkey(*idx, ilev); 290 *idx += VM_RADIX_UNITLEVEL(ilev); 291 if (*idx < wrapidx) 292 return (1); 293 return (0); |
293} 294 | 294} 295 |
295static inline vm_page_t 296vm_radix_match(void *child) | 296/* 297 * Adjusts the idx key to the first lower level available, based on a valid 298 * initial level and map of available levels. 299 * Returns a value bigger than 0 to signal that there are not valid levels 300 * available. 301 */ 302static __inline int 303vm_radix_declev(vm_pindex_t *idx, boolean_t *levels, uint16_t ilev) |
297{ | 304{ |
298 uintptr_t c; | 305 vm_pindex_t wrapidx; |
299 | 306 |
300 c = (uintptr_t)child; 301 302 return ((vm_page_t)(c & ~VM_RADIX_FLAGS)); | 307 for (; levels[ilev] == FALSE || 308 vm_radix_slot(*idx, ilev) == 0; ilev--) 309 if (ilev == 0) 310 break; 311 KASSERT(ilev > 0 || levels[0] == TRUE, 312 ("%s: levels back-scanning problem", __func__)); 313 if (ilev == 0 && vm_radix_slot(*idx, ilev) == 0) 314 return (1); 315 wrapidx = *idx; 316 *idx = vm_radix_trimkey(*idx, ilev); 317 *idx |= VM_RADIX_UNITLEVEL(ilev) - 1; 318 *idx -= VM_RADIX_UNITLEVEL(ilev); 319 if (*idx < wrapidx) 320 return (1); 321 return (0); |
303} 304 | 322} 323 |
324/* 325 * Internal handwork for vm_radix_reclaim_allonodes() primitive. 326 * This function is recrusive. 327 */ |
|
305static void | 328static void |
306vm_radix_reclaim_allnodes_internal(struct vm_radix_node *rnode, int level) | 329vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode) |
307{ 308 int slot; 309 | 330{ 331 int slot; 332 |
310 MPASS(rnode != NULL && level >= 0); 311 312 /* 313 * Level 0 just contains pages as children, thus make it a special 314 * case, free the node and return. 315 */ 316 if (level == 0) { 317 CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level); 318 rnode->rn_count = 0; 319 vm_radix_node_put(rnode); 320 return; 321 } | |
322 for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) { 323 if (rnode->rn_child[slot] == NULL) 324 continue; | 333 for (slot = 0; slot < VM_RADIX_COUNT && rnode->rn_count != 0; slot++) { 334 if (rnode->rn_child[slot] == NULL) 335 continue; |
325 CTR3(KTR_VM, 326 "reclaiming: node %p, level %d recursing in slot %d", 327 rnode, level, slot); 328 vm_radix_reclaim_allnodes_internal(rnode->rn_child[slot], 329 level - 1); | 336 if (vm_radix_node_page(rnode->rn_child[slot]) == NULL) 337 vm_radix_reclaim_allnodes_int(rnode->rn_child[slot]); |
330 rnode->rn_count--; 331 } | 338 rnode->rn_count--; 339 } |
332 MPASS(rnode->rn_count == 0); 333 CTR2(KTR_VM, "reclaiming: node %p, level %d", rnode, level); | |
334 vm_radix_node_put(rnode); 335} 336 337/* | 340 vm_radix_node_put(rnode); 341} 342 343/* |
338 * Inserts the key-value pair in to the radix tree. Returns errno. | 344 * Returns the amount of requested memory to satisfy nodes pre-allocation. 345 */ 346size_t 347vm_radix_allocphys_size(size_t nitems) 348{ 349 350 return (nitems * sizeof(struct vm_radix_node)); 351} 352 353/* 354 * Pre-allocate intermediate nodes from the UMA slab zone. 355 */ 356void 357vm_radix_init(void) 358{ 359 360 vm_radix_node_zone = uma_zcreate("RADIX NODE", 361 sizeof(struct vm_radix_node), NULL, 362#ifdef INVARIANTS 363 vm_radix_node_zone_dtor, 364#else 365 NULL, 366#endif 367 NULL, NULL, VM_RADIX_PAD, UMA_ZONE_VM | UMA_ZONE_NOFREE); 368 uma_prealloc(vm_radix_node_zone, vm_page_array_size); 369} 370 371/* 372 * Inserts the key-value pair in to the trie. |
339 * Panics if the key already exists. 340 */ 341void 342vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, vm_page_t page) 343{ | 373 * Panics if the key already exists. 374 */ 375void 376vm_radix_insert(struct vm_radix *rtree, vm_pindex_t index, vm_page_t page) 377{ |
344 struct vm_radix_node *rnode; 345 struct vm_radix_node *root; 346 int level; | 378 vm_pindex_t newind; 379 struct vm_radix_node *rnode, *tmp, *tmp2; 380 vm_page_t m; |
347 int slot; | 381 int slot; |
382 uint16_t clev; |
|
348 | 383 |
349 CTR4(KTR_VM, 350 "insert: tree %p, " KFRMT64(index) ", page %p", rtree, 351 KSPLT64L(index), KSPLT64H(index), page); 352 if (index == -1) 353 panic("vm_radix_insert: -1 is not a valid index.\n"); 354 level = vm_radix_height(rtree, &root); | |
355 /* | 384 /* |
356 * Increase the height by adding nodes at the root until 357 * there is sufficient space. | 385 * The owner of record for root is not really important because it 386 * will never be used. |
358 */ | 387 */ |
359 while (level == 0 || index > VM_RADIX_MAX(level)) { 360 CTR5(KTR_VM, 361 "insert: expanding " KFRMT64(index) ">" KFRMT64(mxl) ", height %d", 362 KSPLT64L(index), KSPLT64H(index), 363 KSPLT64L(VM_RADIX_MAX(level)), 364 KSPLT64H(VM_RADIX_MAX(level)), level); 365 level++; 366 KASSERT(level <= VM_RADIX_LIMIT, 367 ("vm_radix_insert: Tree %p height %d too tall", 368 rtree, level)); 369 /* 370 * Only allocate tree nodes if they are needed. 371 */ 372 if (root == NULL || root->rn_count != 0) { 373 rnode = vm_radix_node_get(); 374 if (rnode == NULL) { 375 CTR5(KTR_VM, 376 "insert: tree %p, root %p, " KFRMT64(index) ", level %d ENOMEM", 377 rtree, root, KSPLT64L(index), 378 KSPLT64H(index), level); 379 panic("vm_radix_insert: failed allocation"); 380 } 381 /* 382 * Store the new pointer with a memory barrier so 383 * that it is visible before the new root. 384 */ 385 if (root) { 386 atomic_store_rel_ptr((volatile uintptr_t *) 387 &rnode->rn_child[0], (uintptr_t)root); 388 rnode->rn_count = 1; 389 } 390 root = rnode; 391 } 392 vm_radix_setroot(rtree, root, level); | 388 rnode = vm_radix_getroot(rtree); 389 if (rnode == NULL) { 390 rnode = vm_radix_node_get(0, 1, 0); 391 vm_radix_setroot(rtree, rnode); 392 vm_radix_addpage(rnode, index, 0, page); 393 return; |
393 } | 394 } |
394 395 /* Now that the tree is tall enough, fill in the path to the index. */ 396 rnode = root; 397 for (level = level - 1; level > 0; level--) { 398 slot = vm_radix_slot(index, level); 399 /* Add the required intermidiate nodes. */ | 395 while (rnode) { 396 if (vm_radix_keybarr(rnode, index) == TRUE) 397 break; 398 slot = vm_radix_slot(index, rnode->rn_clev); 399 m = vm_radix_node_page(rnode->rn_child[slot]); 400 if (m != NULL) { 401 if (m->pindex == index) 402 panic("%s: key %jx is already present", 403 __func__, (uintmax_t)index); 404 clev = vm_radix_keydiff(m->pindex, index); 405 tmp = vm_radix_node_get(vm_radix_trimkey(index, 406 clev - 1), 2, clev); 407 rnode->rn_child[slot] = tmp; 408 vm_radix_addpage(tmp, index, clev, page); 409 vm_radix_addpage(tmp, m->pindex, clev, m); 410 return; 411 } |
400 if (rnode->rn_child[slot] == NULL) { | 412 if (rnode->rn_child[slot] == NULL) { |
401 rnode->rn_child[slot] = vm_radix_node_get(); 402 if (rnode->rn_child[slot] == NULL) { 403 CTR6(KTR_VM, 404 "insert: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p ENOMEM", 405 rtree, KSPLT64L(index), KSPLT64H(index), 406 level, slot, rnode); 407 CTR4(KTR_VM, 408 "insert: tree %p, rnode %p, child %p, count %u ENOMEM", 409 rtree, rnode, rnode->rn_child[slot], 410 rnode->rn_count); 411 panic("vm_radix_insert: failed allocation"); 412 } | |
413 rnode->rn_count++; | 413 rnode->rn_count++; |
414 } 415 CTR6(KTR_VM, 416 "insert: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p", 417 rtree, KSPLT64L(index), KSPLT64H(index), level, slot, 418 rnode); 419 CTR4(KTR_VM, 420 "insert: tree %p, rnode %p, child %p, count %u", 421 rtree, rnode, rnode->rn_child[slot], rnode->rn_count); | 414 vm_radix_addpage(rnode, index, rnode->rn_clev, page); 415 return; 416 } |
422 rnode = rnode->rn_child[slot]; 423 } | 417 rnode = rnode->rn_child[slot]; 418 } |
419 if (rnode == NULL) 420 panic("%s: path traversal ended unexpectedly", __func__); |
|
424 | 421 |
425 slot = vm_radix_slot(index, 0); 426 MPASS(rnode != NULL); 427 KASSERT(rnode->rn_child[slot] == NULL, 428 ("vm_radix_insert: Duplicate value %p at index: %lu\n", 429 rnode->rn_child[slot], (u_long)index)); 430 rnode->rn_child[slot] = page; 431 rnode->rn_count++; 432 CTR5(KTR_VM, 433 "insert: tree %p, " KFRMT64(index) ", level %d, slot %d", 434 rtree, KSPLT64L(index), KSPLT64H(index), level, slot); 435 CTR3(KTR_VM, "insert: slot %d, rnode %p, count %u", slot, rnode, 436 rnode->rn_count); | 422 /* 423 * Scan the trie from the top and find the parent to insert 424 * the new object. 425 */ 426 newind = rnode->rn_owner; 427 clev = vm_radix_keydiff(newind, index); 428 slot = VM_RADIX_COUNT; 429 for (rnode = vm_radix_getroot(rtree); ; rnode = tmp) { 430 KASSERT(rnode != NULL, ("%s: edge cannot be NULL in the scan", 431 __func__)); 432 KASSERT(clev >= rnode->rn_clev, 433 ("%s: unexpected trie depth: clev: %d, rnode->rn_clev: %d", 434 __func__, clev, rnode->rn_clev)); 435 slot = vm_radix_slot(index, rnode->rn_clev); 436 tmp = rnode->rn_child[slot]; 437 KASSERT(tmp != NULL && vm_radix_node_page(tmp) == NULL, 438 ("%s: unexpected lookup interruption", __func__)); 439 if (tmp->rn_clev > clev) 440 break; 441 } 442 KASSERT(rnode != NULL && tmp != NULL && slot < VM_RADIX_COUNT, 443 ("%s: invalid scan parameters rnode: %p, tmp: %p, slot: %d", 444 __func__, (void *)rnode, (void *)tmp, slot)); 445 446 /* 447 * A new node is needed because the right insertion level is reached. 448 * Setup the new intermediate node and add the 2 children: the 449 * new object and the older edge. 450 */ 451 tmp2 = vm_radix_node_get(vm_radix_trimkey(page->pindex, clev - 1), 2, 452 clev); 453 rnode->rn_child[slot] = tmp2; 454 vm_radix_addpage(tmp2, index, clev, page); 455 slot = vm_radix_slot(newind, clev); 456 tmp2->rn_child[slot] = tmp; |
437} 438 439/* 440 * Returns the value stored at the index. If the index is not present 441 * NULL is returned. 442 */ 443vm_page_t 444vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) 445{ 446 struct vm_radix_node *rnode; | 457} 458 459/* 460 * Returns the value stored at the index. If the index is not present 461 * NULL is returned. 462 */ 463vm_page_t 464vm_radix_lookup(struct vm_radix *rtree, vm_pindex_t index) 465{ 466 struct vm_radix_node *rnode; |
467 vm_page_t m; |
|
447 int slot; | 468 int slot; |
448 int level; | |
449 | 469 |
450 level = vm_radix_height(rtree, &rnode); 451 if (index > VM_RADIX_MAX(level)) 452 return NULL; 453 level--; | 470 rnode = vm_radix_getroot(rtree); |
454 while (rnode) { | 471 while (rnode) { |
455 slot = vm_radix_slot(index, level); 456 CTR6(KTR_VM, 457 "lookup: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p", 458 rtree, KSPLT64L(index), KSPLT64H(index), level, slot, 459 rnode); 460 CTR2(KTR_VM, "lookup: rnode %p, child %p", rnode, 461 rnode->rn_child[slot]); 462 if (level == 0) 463 return vm_radix_match(rnode->rn_child[slot]); | 472 if (vm_radix_keybarr(rnode, index) == TRUE) 473 return (NULL); 474 slot = vm_radix_slot(index, rnode->rn_clev); |
464 rnode = rnode->rn_child[slot]; | 475 rnode = rnode->rn_child[slot]; |
465 level--; | 476 m = vm_radix_node_page(rnode); 477 if (m != NULL) { 478 if (m->pindex == index) 479 return (m); 480 else 481 return (NULL); 482 } |
466 } | 483 } |
467 CTR3(KTR_VM, "lookup: tree %p, " KFRMT64(index) " failed", rtree, 468 KSPLT64L(index), KSPLT64H(index)); 469 470 return NULL; | 484 return (NULL); |
471} 472 473/* | 485} 486 487/* |
474 * Find the first leaf with a valid node between *startp and end. Return 475 * the index of the first valid item in the leaf in *startp. | 488 * Look up any entry at a position bigger than or equal to index. |
476 */ | 489 */ |
477static struct vm_radix_node * 478vm_radix_leaf(struct vm_radix *rtree, vm_pindex_t *startp) | 490vm_page_t 491vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index) |
479{ | 492{ |
480 struct vm_radix_node *rnode; 481 vm_pindex_t start; | |
482 vm_pindex_t inc; | 493 vm_pindex_t inc; |
494 vm_page_t m; 495 struct vm_radix_node *rnode; |
|
483 int slot; | 496 int slot; |
484 int level; | 497 uint16_t difflev; 498 boolean_t maplevels[VM_RADIX_LIMIT + 1]; 499#ifdef INVARIANTS 500 int loops = 0; 501#endif |
485 | 502 |
486 start = *startp; | |
487restart: | 503restart: |
488 level = vm_radix_height(rtree, &rnode); 489 if (start > VM_RADIX_MAX(level)) { 490 rnode = NULL; 491 goto out; 492 } 493 /* 494 * Search the tree from the top for any leaf node holding an index 495 * between start and maxval. 496 */ 497 for (level--; level; level--) { 498 slot = vm_radix_slot(start, level); 499 CTR6(KTR_VM, 500 "leaf: tree %p, " KFRMT64(start) ", level %d, slot %d, rnode %p", 501 rtree, KSPLT64L(start), KSPLT64H(start), level, slot, 502 rnode); 503 CTR2(KTR_VM, "leaf: rnode %p, child %p", rnode, 504 rnode->rn_child[slot]); 505 if (rnode->rn_child[slot] != NULL) { | 504 KASSERT(++loops < 1000, ("%s: too many loops", __func__)); 505 for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++) 506 maplevels[difflev] = FALSE; 507 rnode = vm_radix_getroot(rtree); 508 while (rnode) { 509 maplevels[rnode->rn_clev] = TRUE; 510 511 /* 512 * If the keys differ before the current bisection node 513 * the search key might rollback to the earlierst 514 * available bisection node, or to the smaller value 515 * in the current domain (if the owner is bigger than the 516 * search key). 517 * The search for a valid bisection node is helped through 518 * the use of maplevels array which should bring immediately 519 * a lower useful level, skipping holes. 520 */ 521 if (vm_radix_keybarr(rnode, index) == TRUE) { 522 difflev = vm_radix_keydiff(index, rnode->rn_owner); 523 if (index > rnode->rn_owner) { 524 if (vm_radix_addlev(&index, maplevels, 525 difflev) > 0) 526 break; 527 } else 528 index = vm_radix_trimkey(rnode->rn_owner, 529 difflev); 530 goto restart; 531 } 532 slot = vm_radix_slot(index, rnode->rn_clev); 533 m = vm_radix_node_page(rnode->rn_child[slot]); 534 if (m != NULL && m->pindex >= index) 535 return (m); 536 if (rnode->rn_child[slot] != NULL && m == NULL) { |
506 rnode = rnode->rn_child[slot]; 507 continue; 508 } | 537 rnode = rnode->rn_child[slot]; 538 continue; 539 } |
509 /* 510 * Calculate how much to increment our index by 511 * based on the tree level. We must truncate the 512 * lower bits to start from the begnning of the 513 * next leaf. 514 */ 515 inc = 1LL << (level * VM_RADIX_WIDTH); 516 start &= ~VM_RADIX_MAX(level); | |
517 | 540 |
518 /* Avoid start address wrapping up. */ 519 if ((VM_RADIX_MAXVAL - start) < inc) { 520 rnode = NULL; 521 goto out; 522 } 523 start += inc; 524 slot++; 525 CTR4(KTR_VM, 526 "leaf: " KFRMT64(start) ", " KFRMT64(inc), 527 KSPLT64L(start), KSPLT64H(start), KSPLT64L(inc), 528 KSPLT64H(inc)); 529 CTR2(KTR_VM, "leaf: level %d, slot %d", level, slot); 530 for (; slot < VM_RADIX_COUNT; slot++, start += inc) { 531 if (rnode->rn_child[slot]) { 532 rnode = rnode->rn_child[slot]; 533 break; | 541 /* 542 * Look for an available edge or page within the current 543 * bisection node. 544 */ 545 if (slot < (VM_RADIX_COUNT - 1)) { 546 inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); 547 index = vm_radix_trimkey(index, rnode->rn_clev); 548 index += inc; 549 slot++; 550 for (;; index += inc, slot++) { 551 m = vm_radix_node_page(rnode->rn_child[slot]); 552 if (m != NULL && m->pindex >= index) 553 return (m); 554 if ((rnode->rn_child[slot] != NULL && 555 m == NULL) || slot == (VM_RADIX_COUNT - 1)) 556 break; |
534 } | 557 } |
535 if ((VM_RADIX_MAXVAL - start) < inc) { 536 rnode = NULL; 537 goto out; 538 } | |
539 } | 558 } |
540 if (slot == VM_RADIX_COUNT) | 559 560 /* 561 * If a valid page or edge, bigger than the search slot, is 562 * found in the traversal, skip to the next higher-level key. 563 */ 564 if (slot == (VM_RADIX_COUNT - 1) && 565 (rnode->rn_child[slot] == NULL || m != NULL)) { 566 if (rnode->rn_clev == 0 || vm_radix_addlev(&index, 567 maplevels, rnode->rn_clev - 1) > 0) 568 break; |
541 goto restart; | 569 goto restart; |
570 } 571 rnode = rnode->rn_child[slot]; |
|
542 } | 572 } |
543 544out: 545 *startp = start; 546 return (rnode); 547} 548 549/* 550 * Look up any entry at a position bigger than or equal to index. 551 */ 552vm_page_t 553vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index) 554{ 555 vm_page_t page; 556 struct vm_radix_node *rnode; 557 int slot; 558 559 CTR3(KTR_VM, "lookupn: tree %p, " KFRMT64(index), rtree, 560 KSPLT64L(index), KSPLT64H(index)); 561 if (rtree->rt_root == 0) 562 return (NULL); 563 while ((rnode = vm_radix_leaf(rtree, &index)) != NULL) { 564 slot = vm_radix_slot(index, 0); 565 for (; slot < VM_RADIX_COUNT; slot++, index++) { 566 page = vm_radix_match(rnode->rn_child[slot]); 567 if (page == NULL) { 568 569 /* 570 * The index address can wrap at the 571 * VM_RADIX_MAXVAL value. 572 * We need to make sure that index address 573 * point to the next chunk (even if wrapping) 574 * to stay consistent with default scanning 575 * behaviour. Also, because of the nature 576 * of the wrapping, the wrap up checks must 577 * be done after all the necessary controls 578 * on index are completed. 579 */ 580 if ((VM_RADIX_MAXVAL - index) == 0) 581 return (NULL); 582 continue; 583 } 584 CTR5(KTR_VM, 585 "lookupn: tree %p " KFRMT64(index) " slot %d found child %p", 586 rtree, KSPLT64L(index), KSPLT64H(index), slot, 587 page); 588 return (page); 589 } 590 MPASS((VM_RADIX_MAXVAL - index) != 0); 591 } | |
592 return (NULL); 593} 594 595/* 596 * Look up any entry at a position less than or equal to index. 597 */ 598vm_page_t 599vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index) 600{ | 573 return (NULL); 574} 575 576/* 577 * Look up any entry at a position less than or equal to index. 578 */ 579vm_page_t 580vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index) 581{ |
601 struct vm_radix_node *rnode; 602 struct vm_radix_node *child; 603 vm_pindex_t max; | |
604 vm_pindex_t inc; | 582 vm_pindex_t inc; |
605 vm_page_t page; | 583 vm_page_t m; 584 struct vm_radix_node *rnode; |
606 int slot; | 585 int slot; |
607 int level; | 586 uint16_t difflev; 587 boolean_t maplevels[VM_RADIX_LIMIT + 1]; 588#ifdef INVARIANTS 589 int loops = 0; 590#endif |
608 | 591 |
609 CTR3(KTR_VM, "lookup_le: tree %p, " KFRMT64(index), rtree, 610 KSPLT64L(index), KSPLT64H(index)); | |
611restart: | 592restart: |
612 level = vm_radix_height(rtree, &rnode); 613 if (rnode == NULL) 614 return (NULL); 615 max = VM_RADIX_MAX(level); 616 if (index > max || index == 0) 617 index = max; 618 /* 619 * Search the tree from the top for any leaf node holding an index 620 * lower than 'index'. 621 */ 622 level--; | 593 KASSERT(++loops < 1000, ("%s: too many loops", __func__)); 594 for (difflev = 0; difflev < (VM_RADIX_LIMIT + 1); difflev++) 595 maplevels[difflev] = FALSE; 596 rnode = vm_radix_getroot(rtree); |
623 while (rnode) { | 597 while (rnode) { |
624 slot = vm_radix_slot(index, level); 625 CTR6(KTR_VM, 626 "lookup_le: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p", 627 rtree, KSPLT64L(index), KSPLT64H(index), level, slot, 628 rnode); 629 CTR2(KTR_VM, "lookup_le: rnode %p, child %p", rnode, 630 rnode->rn_child[slot]); 631 if (level == 0) 632 break; | 598 maplevels[rnode->rn_clev] = TRUE; 599 |
633 /* | 600 /* |
634 * If we don't have an exact match we must start our search 635 * from the next leaf and adjust our index appropriately. | 601 * If the keys differ before the current bisection node 602 * the search key might rollback to the earlierst 603 * available bisection node, or to the higher value 604 * in the current domain (if the owner is smaller than the 605 * search key). 606 * The search for a valid bisection node is helped through 607 * the use of maplevels array which should bring immediately 608 * a lower useful level, skipping holes. |
636 */ | 609 */ |
637 if ((child = rnode->rn_child[slot]) == NULL) { 638 /* 639 * Calculate how much to decrement our index by 640 * based on the tree level. We must set the 641 * lower bits to start from the end of the next 642 * leaf. 643 */ 644 inc = 1LL << (level * VM_RADIX_WIDTH); 645 index |= VM_RADIX_MAX(level); | 610 if (vm_radix_keybarr(rnode, index) == TRUE) { 611 difflev = vm_radix_keydiff(index, rnode->rn_owner); 612 if (index > rnode->rn_owner) { 613 index = vm_radix_trimkey(rnode->rn_owner, 614 difflev); 615 index |= VM_RADIX_UNITLEVEL(difflev) - 1; 616 } else if (vm_radix_declev(&index, maplevels, 617 difflev) > 0) 618 break; 619 goto restart; 620 } 621 slot = vm_radix_slot(index, rnode->rn_clev); 622 m = vm_radix_node_page(rnode->rn_child[slot]); 623 if (m != NULL && m->pindex <= index) 624 return (m); 625 if (rnode->rn_child[slot] != NULL && m == NULL) { 626 rnode = rnode->rn_child[slot]; 627 continue; 628 } 629 630 /* 631 * Look for an available edge or page within the current 632 * bisection node. 633 */ 634 if (slot > 0) { 635 inc = VM_RADIX_UNITLEVEL(rnode->rn_clev); 636 index = vm_radix_trimkey(index, rnode->rn_clev); 637 index |= inc - 1; |
646 index -= inc; 647 slot--; | 638 index -= inc; 639 slot--; |
648 CTR6(KTR_VM, 649 "lookup_le: " KFRMT64(start) ", " KFRMT64(inc) ", level %d slot %d", 650 KSPLT64L(index), KSPLT64H(index), KSPLT64L(inc), 651 KSPLT64H(inc), level, slot); 652 for (; slot >= 0; slot--, index -= inc) { 653 child = rnode->rn_child[slot]; 654 if (child) | 640 for (;; index -= inc, slot--) { 641 m = vm_radix_node_page(rnode->rn_child[slot]); 642 if (m != NULL && m->pindex <= index) 643 return (m); 644 if ((rnode->rn_child[slot] != NULL && 645 m == NULL) || slot == 0) |
655 break; 656 } 657 } | 646 break; 647 } 648 } |
658 rnode = child; 659 level--; 660 } 661 if (rnode) { 662 for (; slot >= 0; slot--, index--) { 663 page = vm_radix_match(rnode->rn_child[slot]); 664 if (page) 665 return (page); | 649 650 /* 651 * If a valid page or edge, smaller then the search slot, is 652 * found in the traversal, skip to the next higher-level key. 653 */ 654 if (slot == 0 && (rnode->rn_child[slot] == NULL || m != NULL)) { 655 if (rnode->rn_clev == 0 || vm_radix_declev(&index, 656 maplevels, rnode->rn_clev - 1) > 0) 657 break; 658 goto restart; |
666 } | 659 } |
660 rnode = rnode->rn_child[slot]; |
|
667 } | 661 } |
668 if (index != -1) 669 goto restart; | |
670 return (NULL); 671} 672 673/* | 662 return (NULL); 663} 664 665/* |
674 * Remove the specified index from the tree. If possible the height of the 675 * tree is adjusted after deletion. The value stored at index is returned 676 * panics if the key is not present. | 666 * Remove the specified index from the tree. 667 * Panics if the key is not present. |
677 */ 678void 679vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) 680{ | 668 */ 669void 670vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index) 671{ |
681 struct vm_radix_node *stack[VM_RADIX_LIMIT]; 682 struct vm_radix_node *rnode, *root; 683 int level; 684 int slot; | 672 struct vm_radix_node *rnode, *parent; 673 vm_page_t m; 674 int i, slot; |
685 | 675 |
686 level = vm_radix_height(rtree, &root); 687 KASSERT(index <= VM_RADIX_MAX(level), 688 ("vm_radix_remove: %p index out of range %jd.", rtree, 689 VM_RADIX_MAX(level))); 690 rnode = root; 691 level--; 692 /* 693 * Find the node and record the path in stack. 694 */ 695 while (level && rnode) { 696 stack[level] = rnode; 697 slot = vm_radix_slot(index, level); 698 CTR6(KTR_VM, 699 "remove: tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p", 700 rtree, KSPLT64L(index), KSPLT64H(index), level, slot, 701 rnode); 702 CTR4(KTR_VM, "remove: tree %p, rnode %p, child %p, count %u", 703 rtree, rnode, rnode->rn_child[slot], rnode->rn_count); 704 rnode = rnode->rn_child[slot]; 705 level--; 706 } 707 KASSERT(rnode != NULL, 708 ("vm_radix_remove: index not present in the tree.\n")); 709 slot = vm_radix_slot(index, 0); 710 KASSERT(vm_radix_match(rnode->rn_child[slot]) != NULL, 711 ("vm_radix_remove: index not present in the tree.\n")); 712 | 676 parent = NULL; 677 rnode = vm_radix_getroot(rtree); |
713 for (;;) { | 678 for (;;) { |
714 CTR6(KTR_VM, 715"remove: resetting tree %p, " KFRMT64(index) ", level %d, slot %d, rnode %p", 716 rtree, KSPLT64L(index), KSPLT64H(index), level, slot, 717 rnode); 718 CTR4(KTR_VM, 719 "remove: resetting tree %p, rnode %p, child %p, count %u", 720 rtree, rnode, 721 (rnode != NULL) ? rnode->rn_child[slot] : NULL, 722 (rnode != NULL) ? rnode->rn_count : 0); 723 rnode->rn_child[slot] = NULL; 724 /* 725 * Use a write memory barrier here in order to avoid 726 * rn_count reaching 0 before to fetch the actual pointer. 727 * Concurrent node removal, infact, may want to reclaim 728 * the radix node itself before to read it. 729 */ 730 rnode->rn_count--; 731 wmb(); 732 if (rnode->rn_count > 0) | 679 if (rnode == NULL) 680 panic("vm_radix_remove: impossible to locate the key"); 681 slot = vm_radix_slot(index, rnode->rn_clev); 682 m = vm_radix_node_page(rnode->rn_child[slot]); 683 if (m != NULL && m->pindex == index) { 684 rnode->rn_child[slot] = NULL; 685 rnode->rn_count--; 686 if (rnode->rn_count > 1) 687 break; 688 if (parent == NULL) { 689 if (rnode->rn_count == 0) { 690 vm_radix_node_put(rnode); 691 vm_radix_setroot(rtree, NULL); 692 } 693 break; 694 } 695 for (i = 0; i < VM_RADIX_COUNT; i++) 696 if (rnode->rn_child[i] != NULL) 697 break; 698 KASSERT(i != VM_RADIX_COUNT, 699 ("%s: invalid node configuration", __func__)); 700 slot = vm_radix_slot(index, parent->rn_clev); 701 KASSERT(parent->rn_child[slot] == rnode, 702 ("%s: invalid child value", __func__)); 703 parent->rn_child[slot] = rnode->rn_child[i]; 704 rnode->rn_count--; 705 rnode->rn_child[i] = NULL; 706 vm_radix_node_put(rnode); |
733 break; | 707 break; |
734 vm_radix_node_put(rnode); 735 if (rnode == root) { 736 vm_radix_setroot(rtree, NULL, 0); 737 break; | |
738 } | 708 } |
739 rnode = stack[++level]; 740 slot = vm_radix_slot(index, level); 741 | 709 if (m != NULL && m->pindex != index) 710 panic("%s: invalid key found", __func__); 711 parent = rnode; 712 rnode = rnode->rn_child[slot]; |
742 } 743} 744 745/* 746 * Remove and free all the nodes from the radix tree. 747 * This function is recrusive but there is a tight control on it as the 748 * maximum depth of the tree is fixed. 749 */ 750void 751vm_radix_reclaim_allnodes(struct vm_radix *rtree) 752{ 753 struct vm_radix_node *root; | 713 } 714} 715 716/* 717 * Remove and free all the nodes from the radix tree. 718 * This function is recrusive but there is a tight control on it as the 719 * maximum depth of the tree is fixed. 720 */ 721void 722vm_radix_reclaim_allnodes(struct vm_radix *rtree) 723{ 724 struct vm_radix_node *root; |
754 int level; | |
755 | 725 |
756 if (rtree->rt_root == 0) | 726 root = vm_radix_getroot(rtree); 727 if (root == NULL) |
757 return; | 728 return; |
758 level = vm_radix_height(rtree, &root); 759 vm_radix_reclaim_allnodes_internal(root, level - 1); 760 rtree->rt_root = 0; | 729 vm_radix_reclaim_allnodes_int(root); 730 vm_radix_setroot(rtree, NULL); |
761} | 731} |
732 733#ifdef DDB 734/* 735 * Show details about the given vnode. 736 */ 737DB_SHOW_COMMAND(radixnode, db_show_radixnode) 738{ 739 struct vm_radix_node *rnode; 740 int i; 741 742 if (!have_addr) 743 return; 744 rnode = (struct vm_radix_node *)addr; 745 db_printf("radixnode %p, owner %jx, children count %u, level %u:\n", 746 (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count, 747 rnode->rn_clev); 748 for (i = 0; i < VM_RADIX_COUNT; i++) 749 if (rnode->rn_child[i] != NULL) 750 db_printf("slot: %d, val: %p, page: %p, clev: %d\n", 751 i, (void *)rnode->rn_child[i], 752 (void *)vm_radix_node_page(rnode->rn_child[i]), 753 rnode->rn_clev); 754} 755#endif /* DDB */ |
|