subr_blist.c revision 323665
1/*- 2 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved. 3 * Redistribution and use in source and binary forms, with or without 4 * modification, are permitted provided that the following conditions 5 * are met: 6 * 1. Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * 2. Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 4. Neither the name of the University nor the names of its contributors 12 * may be used to endorse or promote products derived from this software 13 * without specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 21 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 24 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27/* 28 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting 29 * 30 * This module implements a general bitmap allocator/deallocator. The 31 * allocator eats around 2 bits per 'block'. The module does not 32 * try to interpret the meaning of a 'block' other than to return 33 * SWAPBLK_NONE on an allocation failure. 34 * 35 * A radix tree is used to maintain the bitmap. Two radix constants are 36 * involved: One for the bitmaps contained in the leaf nodes (typically 37 * 64), and one for the meta nodes (typically 16). Both meta and leaf 38 * nodes have a hint field. This field gives us a hint as to the largest 39 * free contiguous range of blocks under the node. It may contain a 40 * value that is too high, but will never contain a value that is too 41 * low. When the radix tree is searched, allocation failures in subtrees 42 * update the hint. 43 * 44 * The radix tree also implements two collapsed states for meta nodes: 45 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is 46 * in either of these two states, all information contained underneath 47 * the node is considered stale. These states are used to optimize 48 * allocation and freeing operations. 49 * 50 * The hinting greatly increases code efficiency for allocations while 51 * the general radix structure optimizes both allocations and frees. The 52 * radix tree should be able to operate well no matter how much 53 * fragmentation there is and no matter how large a bitmap is used. 54 * 55 * The blist code wires all necessary memory at creation time. Neither 56 * allocations nor frees require interaction with the memory subsystem. 57 * The non-blocking features of the blist code are used in the swap code 58 * (vm/swap_pager.c). 59 * 60 * LAYOUT: The radix tree is laid out recursively using a 61 * linear array. Each meta node is immediately followed (laid out 62 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This 63 * is a recursive structure but one that can be easily scanned through 64 * a very simple 'skip' calculation. In order to support large radixes, 65 * portions of the tree may reside outside our memory allocation. We 66 * handle this with an early-termination optimization (when bighint is 67 * set to -1) on the scan. The memory allocation is only large enough 68 * to cover the number of blocks requested at creation time even if it 69 * must be encompassed in larger root-node radix. 70 * 71 * NOTE: the allocator cannot currently allocate more than 72 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too 73 * large' if you try. This is an area that could use improvement. The 74 * radix is large enough that this restriction does not effect the swap 75 * system, though. Currently only the allocation code is affected by 76 * this algorithmic unfeature. The freeing code can handle arbitrary 77 * ranges. 78 * 79 * This code can be compiled stand-alone for debugging. 80 */ 81 82#include <sys/cdefs.h> 83__FBSDID("$FreeBSD: stable/11/sys/kern/subr_blist.c 323665 2017-09-17 04:15:12Z alc $"); 84 85#ifdef _KERNEL 86 87#include <sys/param.h> 88#include <sys/systm.h> 89#include <sys/lock.h> 90#include <sys/kernel.h> 91#include <sys/blist.h> 92#include <sys/malloc.h> 93#include <sys/proc.h> 94#include <sys/mutex.h> 95 96#else 97 98#ifndef BLIST_NO_DEBUG 99#define BLIST_DEBUG 100#endif 101 102#include <sys/types.h> 103#include <sys/malloc.h> 104#include <stdio.h> 105#include <string.h> 106#include <stdlib.h> 107#include <stdarg.h> 108#include <stdbool.h> 109 110#define bitcount64(x) __bitcount64((uint64_t)(x)) 111#define malloc(a,b,c) calloc(a, 1) 112#define free(a,b) free(a) 113 114#include <sys/blist.h> 115 116void panic(const char *ctl, ...); 117 118#endif 119 120/* 121 * static support functions 122 */ 123static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, 124 daddr_t cursor); 125static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk, daddr_t count, 126 daddr_t radix, daddr_t skip, daddr_t cursor); 127static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count); 128static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, 129 daddr_t radix, daddr_t skip, daddr_t blk); 130static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, 131 daddr_t skip, blist_t dest, daddr_t count); 132static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count); 133static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, 134 daddr_t radix, daddr_t skip, daddr_t blk); 135static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix, daddr_t skip, 136 daddr_t count); 137#ifndef _KERNEL 138static void blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, 139 daddr_t skip, int tab); 140#endif 141 142#ifdef _KERNEL 143static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); 144#endif 145 146/* 147 * blist_create() - create a blist capable of handling up to the specified 148 * number of blocks 149 * 150 * blocks - must be greater than 0 151 * flags - malloc flags 152 * 153 * The smallest blist consists of a single leaf node capable of 154 * managing BLIST_BMAP_RADIX blocks. 155 */ 156blist_t 157blist_create(daddr_t blocks, int flags) 158{ 159 blist_t bl; 160 daddr_t nodes, radix, skip; 161 162 /* 163 * Calculate radix and skip field used for scanning. 164 */ 165 radix = BLIST_BMAP_RADIX; 166 skip = 0; 167 while (radix < blocks) { 168 radix *= BLIST_META_RADIX; 169 skip = (skip + 1) * BLIST_META_RADIX; 170 } 171 nodes = 1 + blst_radix_init(NULL, radix, skip, blocks); 172 173 bl = malloc(sizeof(struct blist), M_SWAP, flags); 174 if (bl == NULL) 175 return (NULL); 176 177 bl->bl_blocks = blocks; 178 bl->bl_radix = radix; 179 bl->bl_skip = skip; 180 bl->bl_cursor = 0; 181 bl->bl_root = malloc(nodes * sizeof(blmeta_t), M_SWAP, flags); 182 if (bl->bl_root == NULL) { 183 free(bl, M_SWAP); 184 return (NULL); 185 } 186 blst_radix_init(bl->bl_root, radix, skip, blocks); 187 188#if defined(BLIST_DEBUG) 189 printf( 190 "BLIST representing %lld blocks (%lld MB of swap)" 191 ", requiring %lldK of ram\n", 192 (long long)bl->bl_blocks, 193 (long long)bl->bl_blocks * 4 / 1024, 194 (long long)(nodes * sizeof(blmeta_t) + 1023) / 1024 195 ); 196 printf("BLIST raw radix tree contains %lld records\n", 197 (long long)nodes); 198#endif 199 200 return (bl); 201} 202 203void 204blist_destroy(blist_t bl) 205{ 206 free(bl->bl_root, M_SWAP); 207 free(bl, M_SWAP); 208} 209 210/* 211 * blist_alloc() - reserve space in the block bitmap. Return the base 212 * of a contiguous region or SWAPBLK_NONE if space could 213 * not be allocated. 214 */ 215daddr_t 216blist_alloc(blist_t bl, daddr_t count) 217{ 218 daddr_t blk; 219 220 /* 221 * This loop iterates at most twice. An allocation failure in the 222 * first iteration leads to a second iteration only if the cursor was 223 * non-zero. When the cursor is zero, an allocation failure will 224 * reduce the hint, stopping further iterations. 225 */ 226 while (count <= bl->bl_root->bm_bighint) { 227 blk = blst_meta_alloc(bl->bl_root, 0, count, bl->bl_radix, 228 bl->bl_skip, bl->bl_cursor); 229 if (blk != SWAPBLK_NONE) { 230 bl->bl_cursor = blk + count; 231 return (blk); 232 } else if (bl->bl_cursor != 0) 233 bl->bl_cursor = 0; 234 } 235 return (SWAPBLK_NONE); 236} 237 238/* 239 * blist_avail() - return the number of free blocks. 240 */ 241daddr_t 242blist_avail(blist_t bl) 243{ 244 245 if (bl->bl_radix == BLIST_BMAP_RADIX) 246 return (bitcount64(bl->bl_root->u.bmu_bitmap)); 247 else 248 return (bl->bl_root->u.bmu_avail); 249} 250 251/* 252 * blist_free() - free up space in the block bitmap. Return the base 253 * of a contiguous region. Panic if an inconsistancy is 254 * found. 255 */ 256void 257blist_free(blist_t bl, daddr_t blkno, daddr_t count) 258{ 259 260 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0); 261} 262 263/* 264 * blist_fill() - mark a region in the block bitmap as off-limits 265 * to the allocator (i.e. allocate it), ignoring any 266 * existing allocations. Return the number of blocks 267 * actually filled that were free before the call. 268 */ 269daddr_t 270blist_fill(blist_t bl, daddr_t blkno, daddr_t count) 271{ 272 273 return (blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix, 274 bl->bl_skip, 0)); 275} 276 277/* 278 * blist_resize() - resize an existing radix tree to handle the 279 * specified number of blocks. This will reallocate 280 * the tree and transfer the previous bitmap to the new 281 * one. When extending the tree you can specify whether 282 * the new blocks are to left allocated or freed. 283 */ 284void 285blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags) 286{ 287 blist_t newbl = blist_create(count, flags); 288 blist_t save = *pbl; 289 290 *pbl = newbl; 291 if (count > save->bl_blocks) 292 count = save->bl_blocks; 293 blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count); 294 295 /* 296 * If resizing upwards, should we free the new space or not? 297 */ 298 if (freenew && count < newbl->bl_blocks) { 299 blist_free(newbl, count, newbl->bl_blocks - count); 300 } 301 blist_destroy(save); 302} 303 304#ifdef BLIST_DEBUG 305 306/* 307 * blist_print() - dump radix tree 308 */ 309void 310blist_print(blist_t bl) 311{ 312 printf("BLIST {\n"); 313 blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4); 314 printf("}\n"); 315} 316 317#endif 318 319/************************************************************************ 320 * ALLOCATION SUPPORT FUNCTIONS * 321 ************************************************************************ 322 * 323 * These support functions do all the actual work. They may seem 324 * rather longish, but that's because I've commented them up. The 325 * actual code is straight forward. 326 * 327 */ 328 329/* 330 * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). 331 * 332 * This is the core of the allocator and is optimized for the 333 * BLIST_BMAP_RADIX block allocation case. Otherwise, execution 334 * time is proportional to log2(count) + log2(BLIST_BMAP_RADIX). 335 */ 336static daddr_t 337blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, daddr_t cursor) 338{ 339 u_daddr_t mask; 340 int count1, hi, lo, mid, num_shifts, range1, range_ext; 341 342 if (count == BLIST_BMAP_RADIX) { 343 /* 344 * Optimize allocation of BLIST_BMAP_RADIX bits. If this wasn't 345 * a special case, then forming the final value of 'mask' below 346 * would require special handling to avoid an invalid left shift 347 * when count equals the number of bits in mask. 348 */ 349 if (~scan->u.bmu_bitmap != 0) { 350 scan->bm_bighint = BLIST_BMAP_RADIX - 1; 351 return (SWAPBLK_NONE); 352 } 353 if (cursor != blk) 354 return (SWAPBLK_NONE); 355 scan->u.bmu_bitmap = 0; 356 scan->bm_bighint = 0; 357 return (blk); 358 } 359 range1 = 0; 360 count1 = count - 1; 361 num_shifts = fls(count1); 362 mask = scan->u.bmu_bitmap; 363 while (mask != 0 && num_shifts > 0) { 364 /* 365 * If bit i is set in mask, then bits in [i, i+range1] are set 366 * in scan->u.bmu_bitmap. The value of range1 is equal to 367 * count1 >> num_shifts. Grow range and reduce num_shifts to 0, 368 * while preserving these invariants. The updates to mask leave 369 * fewer bits set, but each bit that remains set represents a 370 * longer string of consecutive bits set in scan->u.bmu_bitmap. 371 */ 372 num_shifts--; 373 range_ext = range1 + ((count1 >> num_shifts) & 1); 374 mask &= mask >> range_ext; 375 range1 += range_ext; 376 } 377 if (mask == 0) { 378 /* 379 * Update bighint. There is no allocation bigger than range1 380 * available in this leaf. 381 */ 382 scan->bm_bighint = range1; 383 return (SWAPBLK_NONE); 384 } 385 386 /* 387 * Discard any candidates that appear before the cursor. 388 */ 389 lo = cursor - blk; 390 mask &= ~(u_daddr_t)0 << lo; 391 392 if (mask == 0) 393 return (SWAPBLK_NONE); 394 395 /* 396 * The least significant set bit in mask marks the start of the first 397 * available range of sufficient size. Clear all the bits but that one, 398 * and then perform a binary search to find its position. 399 */ 400 mask &= -mask; 401 hi = BLIST_BMAP_RADIX - count1; 402 while (lo + 1 < hi) { 403 mid = (lo + hi) >> 1; 404 if ((mask >> mid) != 0) 405 lo = mid; 406 else 407 hi = mid; 408 } 409 410 /* 411 * Set in mask exactly the bits being allocated, and clear them from 412 * the set of available bits. 413 */ 414 mask = (mask << count) - mask; 415 scan->u.bmu_bitmap &= ~mask; 416 return (blk + lo); 417} 418 419/* 420 * blist_meta_alloc() - allocate at a meta in the radix tree. 421 * 422 * Attempt to allocate at a meta node. If we can't, we update 423 * bighint and return a failure. Updating bighint optimize future 424 * calls that hit this node. We have to check for our collapse cases 425 * and we have a few optimizations strewn in as well. 426 */ 427static daddr_t 428blst_meta_alloc(blmeta_t *scan, daddr_t blk, daddr_t count, daddr_t radix, 429 daddr_t skip, daddr_t cursor) 430{ 431 daddr_t i, next_skip, r; 432 int child; 433 bool scan_from_start; 434 435 if (radix == BLIST_BMAP_RADIX) 436 return (blst_leaf_alloc(scan, blk, count, cursor)); 437 if (scan->u.bmu_avail < count) { 438 /* 439 * The meta node's hint must be too large if the allocation 440 * exceeds the number of free blocks. Reduce the hint, and 441 * return failure. 442 */ 443 scan->bm_bighint = scan->u.bmu_avail; 444 return (SWAPBLK_NONE); 445 } 446 next_skip = skip / BLIST_META_RADIX; 447 448 /* 449 * An ALL-FREE meta node requires special handling before allocating 450 * any of its blocks. 451 */ 452 if (scan->u.bmu_avail == radix) { 453 radix /= BLIST_META_RADIX; 454 455 /* 456 * Reinitialize each of the meta node's children. An ALL-FREE 457 * meta node cannot have a terminator in any subtree. 458 */ 459 for (i = 1; i <= skip; i += next_skip) { 460 if (next_skip == 1) 461 scan[i].u.bmu_bitmap = (u_daddr_t)-1; 462 else 463 scan[i].u.bmu_avail = radix; 464 scan[i].bm_bighint = radix; 465 } 466 } else { 467 radix /= BLIST_META_RADIX; 468 } 469 470 if (count > radix) { 471 /* 472 * The allocation exceeds the number of blocks that are 473 * managed by a subtree of this meta node. 474 */ 475 panic("allocation too large"); 476 } 477 scan_from_start = cursor == blk; 478 child = (cursor - blk) / radix; 479 blk += child * radix; 480 for (i = 1 + child * next_skip; i <= skip; i += next_skip) { 481 if (count <= scan[i].bm_bighint) { 482 /* 483 * The allocation might fit in the i'th subtree. 484 */ 485 r = blst_meta_alloc(&scan[i], blk, count, radix, 486 next_skip - 1, cursor > blk ? cursor : blk); 487 if (r != SWAPBLK_NONE) { 488 scan->u.bmu_avail -= count; 489 return (r); 490 } 491 } else if (scan[i].bm_bighint == (daddr_t)-1) { 492 /* 493 * Terminator 494 */ 495 break; 496 } 497 blk += radix; 498 } 499 500 /* 501 * We couldn't allocate count in this subtree, update bighint. 502 */ 503 if (scan_from_start && scan->bm_bighint >= count) 504 scan->bm_bighint = count - 1; 505 506 return (SWAPBLK_NONE); 507} 508 509/* 510 * BLST_LEAF_FREE() - free allocated block from leaf bitmap 511 * 512 */ 513static void 514blst_leaf_free(blmeta_t *scan, daddr_t blk, int count) 515{ 516 /* 517 * free some data in this bitmap 518 * 519 * e.g. 520 * 0000111111111110000 521 * \_________/\__/ 522 * v n 523 */ 524 int n = blk & (BLIST_BMAP_RADIX - 1); 525 u_daddr_t mask; 526 527 mask = ((u_daddr_t)-1 << n) & 528 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 529 530 if (scan->u.bmu_bitmap & mask) 531 panic("blst_radix_free: freeing free block"); 532 scan->u.bmu_bitmap |= mask; 533 534 /* 535 * We could probably do a better job here. We are required to make 536 * bighint at least as large as the biggest contiguous block of 537 * data. If we just shoehorn it, a little extra overhead will 538 * be incured on the next allocation (but only that one typically). 539 */ 540 scan->bm_bighint = BLIST_BMAP_RADIX; 541} 542 543/* 544 * BLST_META_FREE() - free allocated blocks from radix tree meta info 545 * 546 * This support routine frees a range of blocks from the bitmap. 547 * The range must be entirely enclosed by this radix node. If a 548 * meta node, we break the range down recursively to free blocks 549 * in subnodes (which means that this code can free an arbitrary 550 * range whereas the allocation code cannot allocate an arbitrary 551 * range). 552 */ 553static void 554blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, daddr_t radix, 555 daddr_t skip, daddr_t blk) 556{ 557 daddr_t i, next_skip, v; 558 int child; 559 560 if (scan->bm_bighint == (daddr_t)-1) 561 panic("freeing invalid range"); 562 if (radix == BLIST_BMAP_RADIX) 563 return (blst_leaf_free(scan, freeBlk, count)); 564 next_skip = skip / BLIST_META_RADIX; 565 566 if (scan->u.bmu_avail == 0) { 567 /* 568 * ALL-ALLOCATED special case, with possible 569 * shortcut to ALL-FREE special case. 570 */ 571 scan->u.bmu_avail = count; 572 scan->bm_bighint = count; 573 574 if (count != radix) { 575 for (i = 1; i <= skip; i += next_skip) { 576 if (scan[i].bm_bighint == (daddr_t)-1) 577 break; 578 scan[i].bm_bighint = 0; 579 if (next_skip == 1) { 580 scan[i].u.bmu_bitmap = 0; 581 } else { 582 scan[i].u.bmu_avail = 0; 583 } 584 } 585 /* fall through */ 586 } 587 } else { 588 scan->u.bmu_avail += count; 589 /* scan->bm_bighint = radix; */ 590 } 591 592 /* 593 * ALL-FREE special case. 594 */ 595 596 if (scan->u.bmu_avail == radix) 597 return; 598 if (scan->u.bmu_avail > radix) 599 panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld", 600 (long long)count, (long long)scan->u.bmu_avail, 601 (long long)radix); 602 603 /* 604 * Break the free down into its components 605 */ 606 607 radix /= BLIST_META_RADIX; 608 609 child = (freeBlk - blk) / radix; 610 blk += child * radix; 611 i = 1 + child * next_skip; 612 while (i <= skip && blk < freeBlk + count) { 613 v = blk + radix - freeBlk; 614 if (v > count) 615 v = count; 616 blst_meta_free(&scan[i], freeBlk, v, radix, next_skip - 1, blk); 617 if (scan->bm_bighint < scan[i].bm_bighint) 618 scan->bm_bighint = scan[i].bm_bighint; 619 count -= v; 620 freeBlk += v; 621 blk += radix; 622 i += next_skip; 623 } 624} 625 626/* 627 * BLIST_RADIX_COPY() - copy one radix tree to another 628 * 629 * Locates free space in the source tree and frees it in the destination 630 * tree. The space may not already be free in the destination. 631 */ 632static void 633blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, daddr_t skip, 634 blist_t dest, daddr_t count) 635{ 636 daddr_t i, next_skip; 637 638 /* 639 * Leaf node 640 */ 641 642 if (radix == BLIST_BMAP_RADIX) { 643 u_daddr_t v = scan->u.bmu_bitmap; 644 645 if (v == (u_daddr_t)-1) { 646 blist_free(dest, blk, count); 647 } else if (v != 0) { 648 int i; 649 650 for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { 651 if (v & ((u_daddr_t)1 << i)) 652 blist_free(dest, blk + i, 1); 653 } 654 } 655 return; 656 } 657 658 /* 659 * Meta node 660 */ 661 662 if (scan->u.bmu_avail == 0) { 663 /* 664 * Source all allocated, leave dest allocated 665 */ 666 return; 667 } 668 if (scan->u.bmu_avail == radix) { 669 /* 670 * Source all free, free entire dest 671 */ 672 if (count < radix) 673 blist_free(dest, blk, count); 674 else 675 blist_free(dest, blk, radix); 676 return; 677 } 678 679 680 radix /= BLIST_META_RADIX; 681 next_skip = skip / BLIST_META_RADIX; 682 683 for (i = 1; count && i <= skip; i += next_skip) { 684 if (scan[i].bm_bighint == (daddr_t)-1) 685 break; 686 687 if (count >= radix) { 688 blst_copy(&scan[i], blk, radix, next_skip - 1, dest, 689 radix); 690 count -= radix; 691 } else { 692 if (count) { 693 blst_copy(&scan[i], blk, radix, next_skip - 1, 694 dest, count); 695 } 696 count = 0; 697 } 698 blk += radix; 699 } 700} 701 702/* 703 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap 704 * 705 * This routine allocates all blocks in the specified range 706 * regardless of any existing allocations in that range. Returns 707 * the number of blocks allocated by the call. 708 */ 709static daddr_t 710blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) 711{ 712 int n = blk & (BLIST_BMAP_RADIX - 1); 713 daddr_t nblks; 714 u_daddr_t mask; 715 716 mask = ((u_daddr_t)-1 << n) & 717 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 718 719 /* Count the number of blocks that we are allocating. */ 720 nblks = bitcount64(scan->u.bmu_bitmap & mask); 721 722 scan->u.bmu_bitmap &= ~mask; 723 return (nblks); 724} 725 726/* 727 * BLIST_META_FILL() - allocate specific blocks at a meta node 728 * 729 * This routine allocates the specified range of blocks, 730 * regardless of any existing allocations in the range. The 731 * range must be within the extent of this node. Returns the 732 * number of blocks allocated by the call. 733 */ 734static daddr_t 735blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, daddr_t radix, 736 daddr_t skip, daddr_t blk) 737{ 738 daddr_t i, nblks, next_skip, v; 739 int child; 740 741 if (scan->bm_bighint == (daddr_t)-1) 742 panic("filling invalid range"); 743 if (count > radix) { 744 /* 745 * The allocation exceeds the number of blocks that are 746 * managed by this node. 747 */ 748 panic("fill too large"); 749 } 750 if (radix == BLIST_BMAP_RADIX) 751 return (blst_leaf_fill(scan, allocBlk, count)); 752 if (count == radix || scan->u.bmu_avail == 0) { 753 /* 754 * ALL-ALLOCATED special case 755 */ 756 nblks = scan->u.bmu_avail; 757 scan->u.bmu_avail = 0; 758 scan->bm_bighint = 0; 759 return (nblks); 760 } 761 next_skip = skip / BLIST_META_RADIX; 762 763 /* 764 * An ALL-FREE meta node requires special handling before allocating 765 * any of its blocks. 766 */ 767 if (scan->u.bmu_avail == radix) { 768 radix /= BLIST_META_RADIX; 769 770 /* 771 * Reinitialize each of the meta node's children. An ALL-FREE 772 * meta node cannot have a terminator in any subtree. 773 */ 774 for (i = 1; i <= skip; i += next_skip) { 775 if (next_skip == 1) 776 scan[i].u.bmu_bitmap = (u_daddr_t)-1; 777 else 778 scan[i].u.bmu_avail = radix; 779 scan[i].bm_bighint = radix; 780 } 781 } else { 782 radix /= BLIST_META_RADIX; 783 } 784 785 nblks = 0; 786 child = (allocBlk - blk) / radix; 787 blk += child * radix; 788 i = 1 + child * next_skip; 789 while (i <= skip && blk < allocBlk + count) { 790 v = blk + radix - allocBlk; 791 if (v > count) 792 v = count; 793 nblks += blst_meta_fill(&scan[i], allocBlk, v, radix, 794 next_skip - 1, blk); 795 count -= v; 796 allocBlk += v; 797 blk += radix; 798 i += next_skip; 799 } 800 scan->u.bmu_avail -= nblks; 801 return (nblks); 802} 803 804/* 805 * BLST_RADIX_INIT() - initialize radix tree 806 * 807 * Initialize our meta structures and bitmaps and calculate the exact 808 * amount of space required to manage 'count' blocks - this space may 809 * be considerably less than the calculated radix due to the large 810 * RADIX values we use. 811 */ 812static daddr_t 813blst_radix_init(blmeta_t *scan, daddr_t radix, daddr_t skip, daddr_t count) 814{ 815 daddr_t i, memindex, next_skip; 816 817 memindex = 0; 818 819 /* 820 * Leaf node 821 */ 822 823 if (radix == BLIST_BMAP_RADIX) { 824 if (scan) { 825 scan->bm_bighint = 0; 826 scan->u.bmu_bitmap = 0; 827 } 828 return (memindex); 829 } 830 831 /* 832 * Meta node. If allocating the entire object we can special 833 * case it. However, we need to figure out how much memory 834 * is required to manage 'count' blocks, so we continue on anyway. 835 */ 836 837 if (scan) { 838 scan->bm_bighint = 0; 839 scan->u.bmu_avail = 0; 840 } 841 842 radix /= BLIST_META_RADIX; 843 next_skip = skip / BLIST_META_RADIX; 844 845 for (i = 1; i <= skip; i += next_skip) { 846 if (count >= radix) { 847 /* 848 * Allocate the entire object 849 */ 850 memindex = i + 851 blst_radix_init(((scan) ? &scan[i] : NULL), radix, 852 next_skip - 1, radix); 853 count -= radix; 854 } else if (count > 0) { 855 /* 856 * Allocate a partial object 857 */ 858 memindex = i + 859 blst_radix_init(((scan) ? &scan[i] : NULL), radix, 860 next_skip - 1, count); 861 count = 0; 862 } else { 863 /* 864 * Add terminator and break out 865 */ 866 if (scan) 867 scan[i].bm_bighint = (daddr_t)-1; 868 break; 869 } 870 } 871 if (memindex < i) 872 memindex = i; 873 return (memindex); 874} 875 876#ifdef BLIST_DEBUG 877 878static void 879blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, daddr_t skip, 880 int tab) 881{ 882 daddr_t i, next_skip; 883 884 if (radix == BLIST_BMAP_RADIX) { 885 printf( 886 "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n", 887 tab, tab, "", 888 (long long)blk, (long long)radix, 889 (long long)scan->u.bmu_bitmap, 890 (long long)scan->bm_bighint 891 ); 892 return; 893 } 894 895 if (scan->u.bmu_avail == 0) { 896 printf( 897 "%*.*s(%08llx,%lld) ALL ALLOCATED\n", 898 tab, tab, "", 899 (long long)blk, 900 (long long)radix 901 ); 902 return; 903 } 904 if (scan->u.bmu_avail == radix) { 905 printf( 906 "%*.*s(%08llx,%lld) ALL FREE\n", 907 tab, tab, "", 908 (long long)blk, 909 (long long)radix 910 ); 911 return; 912 } 913 914 printf( 915 "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n", 916 tab, tab, "", 917 (long long)blk, (long long)radix, 918 (long long)scan->u.bmu_avail, 919 (long long)radix, 920 (long long)scan->bm_bighint 921 ); 922 923 radix /= BLIST_META_RADIX; 924 next_skip = skip / BLIST_META_RADIX; 925 tab += 4; 926 927 for (i = 1; i <= skip; i += next_skip) { 928 if (scan[i].bm_bighint == (daddr_t)-1) { 929 printf( 930 "%*.*s(%08llx,%lld): Terminator\n", 931 tab, tab, "", 932 (long long)blk, (long long)radix 933 ); 934 break; 935 } 936 blst_radix_print(&scan[i], blk, radix, next_skip - 1, tab); 937 blk += radix; 938 } 939 tab -= 4; 940 941 printf( 942 "%*.*s}\n", 943 tab, tab, "" 944 ); 945} 946 947#endif 948 949#ifdef BLIST_DEBUG 950 951int 952main(int ac, char **av) 953{ 954 int size = 1024; 955 int i; 956 blist_t bl; 957 958 for (i = 1; i < ac; ++i) { 959 const char *ptr = av[i]; 960 if (*ptr != '-') { 961 size = strtol(ptr, NULL, 0); 962 continue; 963 } 964 ptr += 2; 965 fprintf(stderr, "Bad option: %s\n", ptr - 2); 966 exit(1); 967 } 968 bl = blist_create(size, M_WAITOK); 969 blist_free(bl, 0, size); 970 971 for (;;) { 972 char buf[1024]; 973 long long da = 0; 974 long long count = 0; 975 976 printf("%lld/%lld/%lld> ", (long long)blist_avail(bl), 977 (long long)size, (long long)bl->bl_radix); 978 fflush(stdout); 979 if (fgets(buf, sizeof(buf), stdin) == NULL) 980 break; 981 switch(buf[0]) { 982 case 'r': 983 if (sscanf(buf + 1, "%lld", &count) == 1) { 984 blist_resize(&bl, count, 1, M_WAITOK); 985 } else { 986 printf("?\n"); 987 } 988 case 'p': 989 blist_print(bl); 990 break; 991 case 'a': 992 if (sscanf(buf + 1, "%lld", &count) == 1) { 993 daddr_t blk = blist_alloc(bl, count); 994 printf(" R=%08llx\n", (long long)blk); 995 } else { 996 printf("?\n"); 997 } 998 break; 999 case 'f': 1000 if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { 1001 blist_free(bl, da, count); 1002 } else { 1003 printf("?\n"); 1004 } 1005 break; 1006 case 'l': 1007 if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) { 1008 printf(" n=%jd\n", 1009 (intmax_t)blist_fill(bl, da, count)); 1010 } else { 1011 printf("?\n"); 1012 } 1013 break; 1014 case '?': 1015 case 'h': 1016 puts( 1017 "p -print\n" 1018 "a %d -allocate\n" 1019 "f %x %d -free\n" 1020 "l %x %d -fill\n" 1021 "r %d -resize\n" 1022 "h/? -help" 1023 ); 1024 break; 1025 default: 1026 printf("?\n"); 1027 break; 1028 } 1029 } 1030 return(0); 1031} 1032 1033void 1034panic(const char *ctl, ...) 1035{ 1036 va_list va; 1037 1038 va_start(va, ctl); 1039 vfprintf(stderr, ctl, va); 1040 fprintf(stderr, "\n"); 1041 va_end(va); 1042 exit(1); 1043} 1044 1045#endif 1046