subr_blist.c revision 107913
142956Sdillon 242956Sdillon/* 342956Sdillon * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting 442956Sdillon * 542956Sdillon * (c)Copyright 1998, Matthew Dillon. Terms for use and redistribution 642956Sdillon * are covered by the BSD Copyright as found in /usr/src/COPYRIGHT. 742956Sdillon * 842956Sdillon * This module implements a general bitmap allocator/deallocator. The 942956Sdillon * allocator eats around 2 bits per 'block'. The module does not 1042956Sdillon * try to interpret the meaning of a 'block' other then to return 1142956Sdillon * SWAPBLK_NONE on an allocation failure. 1242956Sdillon * 1342956Sdillon * A radix tree is used to maintain the bitmap. Two radix constants are 1442956Sdillon * involved: One for the bitmaps contained in the leaf nodes (typically 1542956Sdillon * 32), and one for the meta nodes (typically 16). Both meta and leaf 1642956Sdillon * nodes have a hint field. This field gives us a hint as to the largest 1742956Sdillon * free contiguous range of blocks under the node. It may contain a 1842956Sdillon * value that is too high, but will never contain a value that is too 1942956Sdillon * low. When the radix tree is searched, allocation failures in subtrees 2042956Sdillon * update the hint. 2142956Sdillon * 2242956Sdillon * The radix tree also implements two collapsed states for meta nodes: 2342956Sdillon * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is 2442956Sdillon * in either of these two states, all information contained underneath 2542956Sdillon * the node is considered stale. These states are used to optimize 2642956Sdillon * allocation and freeing operations. 2742956Sdillon * 2842956Sdillon * The hinting greatly increases code efficiency for allocations while 2942956Sdillon * the general radix structure optimizes both allocations and frees. The 3042956Sdillon * radix tree should be able to operate well no matter how much 3142956Sdillon * fragmentation there is and no matter how large a bitmap is used. 3242956Sdillon * 3342956Sdillon * Unlike the rlist code, the blist code wires all necessary memory at 3442956Sdillon * creation time. Neither allocations nor frees require interaction with 3542956Sdillon * the memory subsystem. In contrast, the rlist code may allocate memory 3642956Sdillon * on an rlist_free() call. The non-blocking features of the blist code 3742956Sdillon * are used to great advantage in the swap code (vm/nswap_pager.c). The 3842956Sdillon * rlist code uses a little less overall memory then the blist code (but 3942956Sdillon * due to swap interleaving not all that much less), but the blist code 4042956Sdillon * scales much, much better. 4142956Sdillon * 4242956Sdillon * LAYOUT: The radix tree is layed out recursively using a 4342956Sdillon * linear array. Each meta node is immediately followed (layed out 4442956Sdillon * sequentially in memory) by BLIST_META_RADIX lower level nodes. This 4542956Sdillon * is a recursive structure but one that can be easily scanned through 4642956Sdillon * a very simple 'skip' calculation. In order to support large radixes, 4742956Sdillon * portions of the tree may reside outside our memory allocation. We 4842956Sdillon * handle this with an early-termination optimization (when bighint is 4942956Sdillon * set to -1) on the scan. The memory allocation is only large enough 5042956Sdillon * to cover the number of blocks requested at creation time even if it 5142956Sdillon * must be encompassed in larger root-node radix. 5242956Sdillon * 5342956Sdillon * NOTE: the allocator cannot currently allocate more then 5442956Sdillon * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too 5542956Sdillon * large' if you try. This is an area that could use improvement. The 5642956Sdillon * radix is large enough that this restriction does not effect the swap 5742956Sdillon * system, though. Currently only the allocation code is effected by 5842956Sdillon * this algorithmic unfeature. The freeing code can handle arbitrary 5942956Sdillon * ranges. 6042956Sdillon * 6142956Sdillon * This code can be compiled stand-alone for debugging. 6247989Sgpalmer * 6350477Speter * $FreeBSD: head/sys/kern/subr_blist.c 107913 2002-12-15 19:17:57Z dillon $ 6442956Sdillon */ 6542956Sdillon 6655206Speter#ifdef _KERNEL 6742956Sdillon 6842956Sdillon#include <sys/param.h> 6942956Sdillon#include <sys/systm.h> 7042956Sdillon#include <sys/lock.h> 7142956Sdillon#include <sys/kernel.h> 7242956Sdillon#include <sys/blist.h> 7342956Sdillon#include <sys/malloc.h> 7479224Sdillon#include <sys/proc.h> 7576827Salfred#include <sys/mutex.h> 7642956Sdillon#include <vm/vm.h> 7742956Sdillon#include <vm/vm_object.h> 7842956Sdillon#include <vm/vm_kern.h> 7942956Sdillon#include <vm/vm_extern.h> 8042956Sdillon#include <vm/vm_page.h> 8142956Sdillon 8242956Sdillon#else 8342956Sdillon 8442956Sdillon#ifndef BLIST_NO_DEBUG 8542956Sdillon#define BLIST_DEBUG 8642956Sdillon#endif 8742956Sdillon 8842956Sdillon#define SWAPBLK_NONE ((daddr_t)-1) 8942956Sdillon 9042956Sdillon#include <sys/types.h> 9142956Sdillon#include <stdio.h> 9242956Sdillon#include <string.h> 9342956Sdillon#include <stdlib.h> 9442956Sdillon#include <stdarg.h> 9542956Sdillon 96107913Sdillon#define malloc(a,b,c) calloc(a, 1) 9742956Sdillon#define free(a,b) free(a) 9842956Sdillon 9942956Sdillontypedef unsigned int u_daddr_t; 10042956Sdillon 10142956Sdillon#include <sys/blist.h> 10242956Sdillon 10342956Sdillonvoid panic(const char *ctl, ...); 10442956Sdillon 10542956Sdillon#endif 10642956Sdillon 10742956Sdillon/* 10842956Sdillon * static support functions 10942956Sdillon */ 11042956Sdillon 11142956Sdillonstatic daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count); 11242956Sdillonstatic daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk, 11342956Sdillon daddr_t count, daddr_t radix, int skip); 11442956Sdillonstatic void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count); 11542956Sdillonstatic void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, 11642956Sdillon daddr_t radix, int skip, daddr_t blk); 11742956Sdillonstatic void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, 11842956Sdillon daddr_t skip, blist_t dest, daddr_t count); 119107913Sdillonstatic int blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count); 120107913Sdillonstatic int blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, 121107913Sdillon daddr_t radix, int skip, daddr_t blk); 12242956Sdillonstatic daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix, 12342956Sdillon int skip, daddr_t count); 12455206Speter#ifndef _KERNEL 12542956Sdillonstatic void blst_radix_print(blmeta_t *scan, daddr_t blk, 12642956Sdillon daddr_t radix, int skip, int tab); 12742956Sdillon#endif 12842956Sdillon 12955206Speter#ifdef _KERNEL 13042956Sdillonstatic MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); 13142956Sdillon#endif 13242956Sdillon 13342956Sdillon/* 13442956Sdillon * blist_create() - create a blist capable of handling up to the specified 13542956Sdillon * number of blocks 13642956Sdillon * 13742956Sdillon * blocks must be greater then 0 13842956Sdillon * 13942956Sdillon * The smallest blist consists of a single leaf node capable of 14042956Sdillon * managing BLIST_BMAP_RADIX blocks. 14142956Sdillon */ 14242956Sdillon 14342956Sdillonblist_t 14442956Sdillonblist_create(daddr_t blocks) 14542956Sdillon{ 14642956Sdillon blist_t bl; 14742956Sdillon int radix; 14842956Sdillon int skip = 0; 14942956Sdillon 15042956Sdillon /* 15142956Sdillon * Calculate radix and skip field used for scanning. 15242956Sdillon */ 15342956Sdillon radix = BLIST_BMAP_RADIX; 15442956Sdillon 15542956Sdillon while (radix < blocks) { 15642956Sdillon radix <<= BLIST_META_RADIX_SHIFT; 15742956Sdillon skip = (skip + 1) << BLIST_META_RADIX_SHIFT; 15842956Sdillon } 15942956Sdillon 16069781Sdwmalone bl = malloc(sizeof(struct blist), M_SWAP, M_WAITOK | M_ZERO); 16142956Sdillon 16242956Sdillon bl->bl_blocks = blocks; 16342956Sdillon bl->bl_radix = radix; 16442956Sdillon bl->bl_skip = skip; 16542956Sdillon bl->bl_rootblks = 1 + 16642956Sdillon blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks); 16742956Sdillon bl->bl_root = malloc(sizeof(blmeta_t) * bl->bl_rootblks, M_SWAP, M_WAITOK); 16842956Sdillon 16942956Sdillon#if defined(BLIST_DEBUG) 17042956Sdillon printf( 171107913Sdillon "BLIST representing %lld blocks (%lld MB of swap)" 172107913Sdillon ", requiring %lldK of ram\n", 173107913Sdillon (long long)bl->bl_blocks, 174107913Sdillon (long long)bl->bl_blocks * 4 / 1024, 175107913Sdillon (long long)(bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024 17642956Sdillon ); 177107913Sdillon printf("BLIST raw radix tree contains %lld records\n", 178107913Sdillon (long long)bl->bl_rootblks); 17942956Sdillon#endif 18042956Sdillon blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks); 18142956Sdillon 18242956Sdillon return(bl); 18342956Sdillon} 18442956Sdillon 18542956Sdillonvoid 18642956Sdillonblist_destroy(blist_t bl) 18742956Sdillon{ 18842956Sdillon free(bl->bl_root, M_SWAP); 18942956Sdillon free(bl, M_SWAP); 19042956Sdillon} 19142956Sdillon 19242956Sdillon/* 19342956Sdillon * blist_alloc() - reserve space in the block bitmap. Return the base 19442956Sdillon * of a contiguous region or SWAPBLK_NONE if space could 19542956Sdillon * not be allocated. 19642956Sdillon */ 19742956Sdillon 19842956Sdillondaddr_t 19942956Sdillonblist_alloc(blist_t bl, daddr_t count) 20042956Sdillon{ 20142956Sdillon daddr_t blk = SWAPBLK_NONE; 20242956Sdillon 20342956Sdillon if (bl) { 20442956Sdillon if (bl->bl_radix == BLIST_BMAP_RADIX) 20542956Sdillon blk = blst_leaf_alloc(bl->bl_root, 0, count); 20642956Sdillon else 20742956Sdillon blk = blst_meta_alloc(bl->bl_root, 0, count, bl->bl_radix, bl->bl_skip); 20842956Sdillon if (blk != SWAPBLK_NONE) 20942956Sdillon bl->bl_free -= count; 21042956Sdillon } 21142956Sdillon return(blk); 21242956Sdillon} 21342956Sdillon 21442956Sdillon/* 21542956Sdillon * blist_free() - free up space in the block bitmap. Return the base 21642956Sdillon * of a contiguous region. Panic if an inconsistancy is 21742956Sdillon * found. 21842956Sdillon */ 21942956Sdillon 22042956Sdillonvoid 22142956Sdillonblist_free(blist_t bl, daddr_t blkno, daddr_t count) 22242956Sdillon{ 22342956Sdillon if (bl) { 22442956Sdillon if (bl->bl_radix == BLIST_BMAP_RADIX) 22542956Sdillon blst_leaf_free(bl->bl_root, blkno, count); 22642956Sdillon else 22742956Sdillon blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0); 22842956Sdillon bl->bl_free += count; 22942956Sdillon } 23042956Sdillon} 23142956Sdillon 23242956Sdillon/* 233107913Sdillon * blist_fill() - mark a region in the block bitmap as off-limits 234107913Sdillon * to the allocator (i.e. allocate it), ignoring any 235107913Sdillon * existing allocations. Return the number of blocks 236107913Sdillon * actually filled that were free before the call. 237107913Sdillon */ 238107913Sdillon 239107913Sdillonint 240107913Sdillonblist_fill(blist_t bl, daddr_t blkno, daddr_t count) 241107913Sdillon{ 242107913Sdillon int filled; 243107913Sdillon 244107913Sdillon if (bl) { 245107913Sdillon if (bl->bl_radix == BLIST_BMAP_RADIX) 246107913Sdillon filled = blst_leaf_fill(bl->bl_root, blkno, count); 247107913Sdillon else 248107913Sdillon filled = blst_meta_fill(bl->bl_root, blkno, count, 249107913Sdillon bl->bl_radix, bl->bl_skip, 0); 250107913Sdillon bl->bl_free -= filled; 251107913Sdillon return filled; 252107913Sdillon } else 253107913Sdillon return 0; 254107913Sdillon} 255107913Sdillon 256107913Sdillon/* 25742956Sdillon * blist_resize() - resize an existing radix tree to handle the 25842956Sdillon * specified number of blocks. This will reallocate 25942956Sdillon * the tree and transfer the previous bitmap to the new 26042956Sdillon * one. When extending the tree you can specify whether 26142956Sdillon * the new blocks are to left allocated or freed. 26242956Sdillon */ 26342956Sdillon 26442956Sdillonvoid 26542956Sdillonblist_resize(blist_t *pbl, daddr_t count, int freenew) 26642956Sdillon{ 26742956Sdillon blist_t newbl = blist_create(count); 26842956Sdillon blist_t save = *pbl; 26942956Sdillon 27042956Sdillon *pbl = newbl; 27142956Sdillon if (count > save->bl_blocks) 27242956Sdillon count = save->bl_blocks; 27342956Sdillon blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count); 27442956Sdillon 27542956Sdillon /* 27642956Sdillon * If resizing upwards, should we free the new space or not? 27742956Sdillon */ 27842956Sdillon if (freenew && count < newbl->bl_blocks) { 27942956Sdillon blist_free(newbl, count, newbl->bl_blocks - count); 28042956Sdillon } 28142956Sdillon blist_destroy(save); 28242956Sdillon} 28342956Sdillon 28442956Sdillon#ifdef BLIST_DEBUG 28542956Sdillon 28642956Sdillon/* 28742956Sdillon * blist_print() - dump radix tree 28842956Sdillon */ 28942956Sdillon 29042956Sdillonvoid 29142956Sdillonblist_print(blist_t bl) 29242956Sdillon{ 29342956Sdillon printf("BLIST {\n"); 29442956Sdillon blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4); 29542956Sdillon printf("}\n"); 29642956Sdillon} 29742956Sdillon 29842956Sdillon#endif 29942956Sdillon 30042956Sdillon/************************************************************************ 30142956Sdillon * ALLOCATION SUPPORT FUNCTIONS * 30242956Sdillon ************************************************************************ 30342956Sdillon * 30442956Sdillon * These support functions do all the actual work. They may seem 30542956Sdillon * rather longish, but that's because I've commented them up. The 30642956Sdillon * actual code is straight forward. 30742956Sdillon * 30842956Sdillon */ 30942956Sdillon 31042956Sdillon/* 31142956Sdillon * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). 31242956Sdillon * 31342956Sdillon * This is the core of the allocator and is optimized for the 1 block 31442956Sdillon * and the BLIST_BMAP_RADIX block allocation cases. Other cases are 31542956Sdillon * somewhat slower. The 1 block allocation case is log2 and extremely 31642956Sdillon * quick. 31742956Sdillon */ 31842956Sdillon 31942956Sdillonstatic daddr_t 32042956Sdillonblst_leaf_alloc( 32142956Sdillon blmeta_t *scan, 32242956Sdillon daddr_t blk, 32342956Sdillon int count 32442956Sdillon) { 32542956Sdillon u_daddr_t orig = scan->u.bmu_bitmap; 32642956Sdillon 32742956Sdillon if (orig == 0) { 32842956Sdillon /* 32942956Sdillon * Optimize bitmap all-allocated case. Also, count = 1 33042956Sdillon * case assumes at least 1 bit is free in the bitmap, so 33142956Sdillon * we have to take care of this case here. 33242956Sdillon */ 33342956Sdillon scan->bm_bighint = 0; 33442956Sdillon return(SWAPBLK_NONE); 33542956Sdillon } 33642956Sdillon if (count == 1) { 33742956Sdillon /* 33842956Sdillon * Optimized code to allocate one bit out of the bitmap 33942956Sdillon */ 34042956Sdillon u_daddr_t mask; 34142956Sdillon int j = BLIST_BMAP_RADIX/2; 34242956Sdillon int r = 0; 34342956Sdillon 34442956Sdillon mask = (u_daddr_t)-1 >> (BLIST_BMAP_RADIX/2); 34542956Sdillon 34642956Sdillon while (j) { 34742956Sdillon if ((orig & mask) == 0) { 34842956Sdillon r += j; 34942956Sdillon orig >>= j; 35042956Sdillon } 35142956Sdillon j >>= 1; 35242956Sdillon mask >>= j; 35342956Sdillon } 35442956Sdillon scan->u.bmu_bitmap &= ~(1 << r); 35542956Sdillon return(blk + r); 35642956Sdillon } 35742956Sdillon if (count <= BLIST_BMAP_RADIX) { 35842956Sdillon /* 35942956Sdillon * non-optimized code to allocate N bits out of the bitmap. 36042956Sdillon * The more bits, the faster the code runs. It will run 36142956Sdillon * the slowest allocating 2 bits, but since there aren't any 36242956Sdillon * memory ops in the core loop (or shouldn't be, anyway), 36342956Sdillon * you probably won't notice the difference. 36442956Sdillon */ 36542956Sdillon int j; 36642956Sdillon int n = BLIST_BMAP_RADIX - count; 36742956Sdillon u_daddr_t mask; 36842956Sdillon 36942956Sdillon mask = (u_daddr_t)-1 >> n; 37042956Sdillon 37142956Sdillon for (j = 0; j <= n; ++j) { 37242956Sdillon if ((orig & mask) == mask) { 37342956Sdillon scan->u.bmu_bitmap &= ~mask; 37442956Sdillon return(blk + j); 37542956Sdillon } 37642956Sdillon mask = (mask << 1); 37742956Sdillon } 37842956Sdillon } 37942956Sdillon /* 38042956Sdillon * We couldn't allocate count in this subtree, update bighint. 38142956Sdillon */ 38242956Sdillon scan->bm_bighint = count - 1; 38342956Sdillon return(SWAPBLK_NONE); 38442956Sdillon} 38542956Sdillon 38642956Sdillon/* 38742956Sdillon * blist_meta_alloc() - allocate at a meta in the radix tree. 38842956Sdillon * 38942956Sdillon * Attempt to allocate at a meta node. If we can't, we update 39042956Sdillon * bighint and return a failure. Updating bighint optimize future 39142956Sdillon * calls that hit this node. We have to check for our collapse cases 39242956Sdillon * and we have a few optimizations strewn in as well. 39342956Sdillon */ 39442956Sdillon 39542956Sdillonstatic daddr_t 39642956Sdillonblst_meta_alloc( 39742956Sdillon blmeta_t *scan, 39842956Sdillon daddr_t blk, 39942956Sdillon daddr_t count, 40042956Sdillon daddr_t radix, 40142956Sdillon int skip 40242956Sdillon) { 40342956Sdillon int i; 40442956Sdillon int next_skip = (skip >> BLIST_META_RADIX_SHIFT); 40542956Sdillon 40642956Sdillon if (scan->u.bmu_avail == 0) { 40742956Sdillon /* 40842956Sdillon * ALL-ALLOCATED special case 40942956Sdillon */ 41042956Sdillon scan->bm_bighint = count; 41142956Sdillon return(SWAPBLK_NONE); 41242956Sdillon } 41342956Sdillon 41442956Sdillon if (scan->u.bmu_avail == radix) { 41542956Sdillon radix >>= BLIST_META_RADIX_SHIFT; 41642956Sdillon 41742956Sdillon /* 41842956Sdillon * ALL-FREE special case, initialize uninitialize 41942956Sdillon * sublevel. 42042956Sdillon */ 42142956Sdillon for (i = 1; i <= skip; i += next_skip) { 42242956Sdillon if (scan[i].bm_bighint == (daddr_t)-1) 42342956Sdillon break; 42442956Sdillon if (next_skip == 1) { 42542956Sdillon scan[i].u.bmu_bitmap = (u_daddr_t)-1; 42642956Sdillon scan[i].bm_bighint = BLIST_BMAP_RADIX; 42742956Sdillon } else { 42842956Sdillon scan[i].bm_bighint = radix; 42942956Sdillon scan[i].u.bmu_avail = radix; 43042956Sdillon } 43142956Sdillon } 43242956Sdillon } else { 43342956Sdillon radix >>= BLIST_META_RADIX_SHIFT; 43442956Sdillon } 43542956Sdillon 43642956Sdillon for (i = 1; i <= skip; i += next_skip) { 43742956Sdillon if (count <= scan[i].bm_bighint) { 43842956Sdillon /* 43942956Sdillon * count fits in object 44042956Sdillon */ 44142956Sdillon daddr_t r; 44242956Sdillon if (next_skip == 1) { 44342956Sdillon r = blst_leaf_alloc(&scan[i], blk, count); 44442956Sdillon } else { 44542956Sdillon r = blst_meta_alloc(&scan[i], blk, count, radix, next_skip - 1); 44642956Sdillon } 44742956Sdillon if (r != SWAPBLK_NONE) { 44842956Sdillon scan->u.bmu_avail -= count; 44942956Sdillon if (scan->bm_bighint > scan->u.bmu_avail) 45042956Sdillon scan->bm_bighint = scan->u.bmu_avail; 45142956Sdillon return(r); 45242956Sdillon } 45342956Sdillon } else if (scan[i].bm_bighint == (daddr_t)-1) { 45442956Sdillon /* 45542956Sdillon * Terminator 45642956Sdillon */ 45742956Sdillon break; 45842956Sdillon } else if (count > radix) { 45942956Sdillon /* 46042956Sdillon * count does not fit in object even if it were 46142956Sdillon * complete free. 46242956Sdillon */ 46342956Sdillon panic("blist_meta_alloc: allocation too large"); 46442956Sdillon } 46542956Sdillon blk += radix; 46642956Sdillon } 46742956Sdillon 46842956Sdillon /* 46942956Sdillon * We couldn't allocate count in this subtree, update bighint. 47042956Sdillon */ 47142956Sdillon if (scan->bm_bighint >= count) 47242956Sdillon scan->bm_bighint = count - 1; 47342956Sdillon return(SWAPBLK_NONE); 47442956Sdillon} 47542956Sdillon 47642956Sdillon/* 47742956Sdillon * BLST_LEAF_FREE() - free allocated block from leaf bitmap 47842956Sdillon * 47942956Sdillon */ 48042956Sdillon 48142956Sdillonstatic void 48242956Sdillonblst_leaf_free( 48342956Sdillon blmeta_t *scan, 48442956Sdillon daddr_t blk, 48542956Sdillon int count 48642956Sdillon) { 48742956Sdillon /* 48842956Sdillon * free some data in this bitmap 48942956Sdillon * 49042956Sdillon * e.g. 49142956Sdillon * 0000111111111110000 49242956Sdillon * \_________/\__/ 49342956Sdillon * v n 49442956Sdillon */ 49542956Sdillon int n = blk & (BLIST_BMAP_RADIX - 1); 49642956Sdillon u_daddr_t mask; 49742956Sdillon 49842956Sdillon mask = ((u_daddr_t)-1 << n) & 49942956Sdillon ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 50042956Sdillon 50142956Sdillon if (scan->u.bmu_bitmap & mask) 50242956Sdillon panic("blst_radix_free: freeing free block"); 50342956Sdillon scan->u.bmu_bitmap |= mask; 50442956Sdillon 50542956Sdillon /* 50642956Sdillon * We could probably do a better job here. We are required to make 50742956Sdillon * bighint at least as large as the biggest contiguous block of 50842956Sdillon * data. If we just shoehorn it, a little extra overhead will 50942956Sdillon * be incured on the next allocation (but only that one typically). 51042956Sdillon */ 51142956Sdillon scan->bm_bighint = BLIST_BMAP_RADIX; 51242956Sdillon} 51342956Sdillon 51442956Sdillon/* 51542956Sdillon * BLST_META_FREE() - free allocated blocks from radix tree meta info 51642956Sdillon * 51742956Sdillon * This support routine frees a range of blocks from the bitmap. 51842956Sdillon * The range must be entirely enclosed by this radix node. If a 51942956Sdillon * meta node, we break the range down recursively to free blocks 52042956Sdillon * in subnodes (which means that this code can free an arbitrary 52142956Sdillon * range whereas the allocation code cannot allocate an arbitrary 52242956Sdillon * range). 52342956Sdillon */ 52442956Sdillon 52542956Sdillonstatic void 52642956Sdillonblst_meta_free( 52742956Sdillon blmeta_t *scan, 52842956Sdillon daddr_t freeBlk, 52942956Sdillon daddr_t count, 53042956Sdillon daddr_t radix, 53142956Sdillon int skip, 53242956Sdillon daddr_t blk 53342956Sdillon) { 53442956Sdillon int i; 53542956Sdillon int next_skip = (skip >> BLIST_META_RADIX_SHIFT); 53642956Sdillon 53742956Sdillon#if 0 538107913Sdillon printf("FREE (%llx,%lld) FROM (%llx,%lld)\n", 539107913Sdillon (long long)freeBlk, (long long)count, 540107913Sdillon (long long)blk, (long long)radix 54142956Sdillon ); 54242956Sdillon#endif 54342956Sdillon 54442956Sdillon if (scan->u.bmu_avail == 0) { 54542956Sdillon /* 54642956Sdillon * ALL-ALLOCATED special case, with possible 54742956Sdillon * shortcut to ALL-FREE special case. 54842956Sdillon */ 54942956Sdillon scan->u.bmu_avail = count; 55042956Sdillon scan->bm_bighint = count; 55142956Sdillon 55242956Sdillon if (count != radix) { 55342956Sdillon for (i = 1; i <= skip; i += next_skip) { 55442956Sdillon if (scan[i].bm_bighint == (daddr_t)-1) 55542956Sdillon break; 55642956Sdillon scan[i].bm_bighint = 0; 55742956Sdillon if (next_skip == 1) { 55842956Sdillon scan[i].u.bmu_bitmap = 0; 55942956Sdillon } else { 56042956Sdillon scan[i].u.bmu_avail = 0; 56142956Sdillon } 56242956Sdillon } 56342956Sdillon /* fall through */ 56442956Sdillon } 56542956Sdillon } else { 56642956Sdillon scan->u.bmu_avail += count; 56742956Sdillon /* scan->bm_bighint = radix; */ 56842956Sdillon } 56942956Sdillon 57042956Sdillon /* 57142956Sdillon * ALL-FREE special case. 57242956Sdillon */ 57342956Sdillon 57442956Sdillon if (scan->u.bmu_avail == radix) 57542956Sdillon return; 57642956Sdillon if (scan->u.bmu_avail > radix) 57796882Sjhb panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld", 57896882Sjhb (long long)count, (long long)scan->u.bmu_avail, 57996882Sjhb (long long)radix); 58042956Sdillon 58142956Sdillon /* 58242956Sdillon * Break the free down into its components 58342956Sdillon */ 58442956Sdillon 58542956Sdillon radix >>= BLIST_META_RADIX_SHIFT; 58642956Sdillon 58742956Sdillon i = (freeBlk - blk) / radix; 58842956Sdillon blk += i * radix; 58942956Sdillon i = i * next_skip + 1; 59042956Sdillon 59142956Sdillon while (i <= skip && blk < freeBlk + count) { 59242956Sdillon daddr_t v; 59342956Sdillon 59442956Sdillon v = blk + radix - freeBlk; 59542956Sdillon if (v > count) 59642956Sdillon v = count; 59742956Sdillon 59842956Sdillon if (scan->bm_bighint == (daddr_t)-1) 59942956Sdillon panic("blst_meta_free: freeing unexpected range"); 60042956Sdillon 60142956Sdillon if (next_skip == 1) { 60242956Sdillon blst_leaf_free(&scan[i], freeBlk, v); 60342956Sdillon } else { 60442956Sdillon blst_meta_free(&scan[i], freeBlk, v, radix, next_skip - 1, blk); 60542956Sdillon } 60642956Sdillon if (scan->bm_bighint < scan[i].bm_bighint) 60742956Sdillon scan->bm_bighint = scan[i].bm_bighint; 60842956Sdillon count -= v; 60942956Sdillon freeBlk += v; 61042956Sdillon blk += radix; 61142956Sdillon i += next_skip; 61242956Sdillon } 61342956Sdillon} 61442956Sdillon 61542956Sdillon/* 61642956Sdillon * BLIST_RADIX_COPY() - copy one radix tree to another 61742956Sdillon * 61842956Sdillon * Locates free space in the source tree and frees it in the destination 61942956Sdillon * tree. The space may not already be free in the destination. 62042956Sdillon */ 62142956Sdillon 62242956Sdillonstatic void blst_copy( 62342956Sdillon blmeta_t *scan, 62442956Sdillon daddr_t blk, 62542956Sdillon daddr_t radix, 62642956Sdillon daddr_t skip, 62742956Sdillon blist_t dest, 62842956Sdillon daddr_t count 62942956Sdillon) { 63042956Sdillon int next_skip; 63142956Sdillon int i; 63242956Sdillon 63342956Sdillon /* 63442956Sdillon * Leaf node 63542956Sdillon */ 63642956Sdillon 63742956Sdillon if (radix == BLIST_BMAP_RADIX) { 63842956Sdillon u_daddr_t v = scan->u.bmu_bitmap; 63942956Sdillon 64042956Sdillon if (v == (u_daddr_t)-1) { 64142956Sdillon blist_free(dest, blk, count); 64242956Sdillon } else if (v != 0) { 64342956Sdillon int i; 64442956Sdillon 64542956Sdillon for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { 64642956Sdillon if (v & (1 << i)) 64742956Sdillon blist_free(dest, blk + i, 1); 64842956Sdillon } 64942956Sdillon } 65042956Sdillon return; 65142956Sdillon } 65242956Sdillon 65342956Sdillon /* 65442956Sdillon * Meta node 65542956Sdillon */ 65642956Sdillon 65742956Sdillon if (scan->u.bmu_avail == 0) { 65842956Sdillon /* 65942956Sdillon * Source all allocated, leave dest allocated 66042956Sdillon */ 66142956Sdillon return; 66242956Sdillon } 66342956Sdillon if (scan->u.bmu_avail == radix) { 66442956Sdillon /* 66542956Sdillon * Source all free, free entire dest 66642956Sdillon */ 66742956Sdillon if (count < radix) 66842956Sdillon blist_free(dest, blk, count); 66942956Sdillon else 67042956Sdillon blist_free(dest, blk, radix); 67142956Sdillon return; 67242956Sdillon } 67342956Sdillon 67442956Sdillon 67542956Sdillon radix >>= BLIST_META_RADIX_SHIFT; 67642956Sdillon next_skip = (skip >> BLIST_META_RADIX_SHIFT); 67742956Sdillon 67842956Sdillon for (i = 1; count && i <= skip; i += next_skip) { 67942956Sdillon if (scan[i].bm_bighint == (daddr_t)-1) 68042956Sdillon break; 68142956Sdillon 68242956Sdillon if (count >= radix) { 68342956Sdillon blst_copy( 68442956Sdillon &scan[i], 68542956Sdillon blk, 68642956Sdillon radix, 68742956Sdillon next_skip - 1, 68842956Sdillon dest, 68942956Sdillon radix 69042956Sdillon ); 69142956Sdillon count -= radix; 69242956Sdillon } else { 69342956Sdillon if (count) { 69442956Sdillon blst_copy( 69542956Sdillon &scan[i], 69642956Sdillon blk, 69742956Sdillon radix, 69842956Sdillon next_skip - 1, 69942956Sdillon dest, 70042956Sdillon count 70142956Sdillon ); 70242956Sdillon } 70342956Sdillon count = 0; 70442956Sdillon } 70542956Sdillon blk += radix; 70642956Sdillon } 70742956Sdillon} 70842956Sdillon 70942956Sdillon/* 710107913Sdillon * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap 711107913Sdillon * 712107913Sdillon * This routine allocates all blocks in the specified range 713107913Sdillon * regardless of any existing allocations in that range. Returns 714107913Sdillon * the number of blocks allocated by the call. 715107913Sdillon */ 716107913Sdillon 717107913Sdillonstatic int 718107913Sdillonblst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) 719107913Sdillon{ 720107913Sdillon int n = blk & (BLIST_BMAP_RADIX - 1); 721107913Sdillon int nblks; 722107913Sdillon u_daddr_t mask, bitmap; 723107913Sdillon 724107913Sdillon mask = ((u_daddr_t)-1 << n) & 725107913Sdillon ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 726107913Sdillon 727107913Sdillon /* Count the number of blocks we're about to allocate */ 728107913Sdillon bitmap = scan->u.bmu_bitmap & mask; 729107913Sdillon for (nblks = 0; bitmap != 0; nblks++) 730107913Sdillon bitmap &= bitmap - 1; 731107913Sdillon 732107913Sdillon scan->u.bmu_bitmap &= ~mask; 733107913Sdillon return nblks; 734107913Sdillon} 735107913Sdillon 736107913Sdillon/* 737107913Sdillon * BLIST_META_FILL() - allocate specific blocks at a meta node 738107913Sdillon * 739107913Sdillon * This routine allocates the specified range of blocks, 740107913Sdillon * regardless of any existing allocations in the range. The 741107913Sdillon * range must be within the extent of this node. Returns the 742107913Sdillon * number of blocks allocated by the call. 743107913Sdillon */ 744107913Sdillonstatic int 745107913Sdillonblst_meta_fill( 746107913Sdillon blmeta_t *scan, 747107913Sdillon daddr_t allocBlk, 748107913Sdillon daddr_t count, 749107913Sdillon daddr_t radix, 750107913Sdillon int skip, 751107913Sdillon daddr_t blk 752107913Sdillon) { 753107913Sdillon int i; 754107913Sdillon int next_skip = (skip >> BLIST_META_RADIX_SHIFT); 755107913Sdillon int nblks = 0; 756107913Sdillon 757107913Sdillon if (count == radix || scan->u.bmu_avail == 0) { 758107913Sdillon /* 759107913Sdillon * ALL-ALLOCATED special case 760107913Sdillon */ 761107913Sdillon nblks = scan->u.bmu_avail; 762107913Sdillon scan->u.bmu_avail = 0; 763107913Sdillon scan->bm_bighint = count; 764107913Sdillon return nblks; 765107913Sdillon } 766107913Sdillon 767107913Sdillon if (scan->u.bmu_avail == radix) { 768107913Sdillon radix >>= BLIST_META_RADIX_SHIFT; 769107913Sdillon 770107913Sdillon /* 771107913Sdillon * ALL-FREE special case, initialize sublevel 772107913Sdillon */ 773107913Sdillon for (i = 1; i <= skip; i += next_skip) { 774107913Sdillon if (scan[i].bm_bighint == (daddr_t)-1) 775107913Sdillon break; 776107913Sdillon if (next_skip == 1) { 777107913Sdillon scan[i].u.bmu_bitmap = (u_daddr_t)-1; 778107913Sdillon scan[i].bm_bighint = BLIST_BMAP_RADIX; 779107913Sdillon } else { 780107913Sdillon scan[i].bm_bighint = radix; 781107913Sdillon scan[i].u.bmu_avail = radix; 782107913Sdillon } 783107913Sdillon } 784107913Sdillon } else { 785107913Sdillon radix >>= BLIST_META_RADIX_SHIFT; 786107913Sdillon } 787107913Sdillon 788107913Sdillon if (count > radix) 789107913Sdillon panic("blist_meta_fill: allocation too large"); 790107913Sdillon 791107913Sdillon i = (allocBlk - blk) / radix; 792107913Sdillon blk += i * radix; 793107913Sdillon i = i * next_skip + 1; 794107913Sdillon 795107913Sdillon while (i <= skip && blk < allocBlk + count) { 796107913Sdillon daddr_t v; 797107913Sdillon 798107913Sdillon v = blk + radix - allocBlk; 799107913Sdillon if (v > count) 800107913Sdillon v = count; 801107913Sdillon 802107913Sdillon if (scan->bm_bighint == (daddr_t)-1) 803107913Sdillon panic("blst_meta_fill: filling unexpected range"); 804107913Sdillon 805107913Sdillon if (next_skip == 1) { 806107913Sdillon nblks += blst_leaf_fill(&scan[i], allocBlk, v); 807107913Sdillon } else { 808107913Sdillon nblks += blst_meta_fill(&scan[i], allocBlk, v, 809107913Sdillon radix, next_skip - 1, blk); 810107913Sdillon } 811107913Sdillon count -= v; 812107913Sdillon allocBlk += v; 813107913Sdillon blk += radix; 814107913Sdillon i += next_skip; 815107913Sdillon } 816107913Sdillon scan->u.bmu_avail -= nblks; 817107913Sdillon return nblks; 818107913Sdillon} 819107913Sdillon 820107913Sdillon/* 82142956Sdillon * BLST_RADIX_INIT() - initialize radix tree 82242956Sdillon * 82342956Sdillon * Initialize our meta structures and bitmaps and calculate the exact 82442956Sdillon * amount of space required to manage 'count' blocks - this space may 82542956Sdillon * be considerably less then the calculated radix due to the large 82642956Sdillon * RADIX values we use. 82742956Sdillon */ 82842956Sdillon 82942956Sdillonstatic daddr_t 83042956Sdillonblst_radix_init(blmeta_t *scan, daddr_t radix, int skip, daddr_t count) 83142956Sdillon{ 83242956Sdillon int i; 83342956Sdillon int next_skip; 83442956Sdillon daddr_t memindex = 0; 83542956Sdillon 83642956Sdillon /* 83742956Sdillon * Leaf node 83842956Sdillon */ 83942956Sdillon 84042956Sdillon if (radix == BLIST_BMAP_RADIX) { 84142956Sdillon if (scan) { 84242956Sdillon scan->bm_bighint = 0; 84342956Sdillon scan->u.bmu_bitmap = 0; 84442956Sdillon } 84542956Sdillon return(memindex); 84642956Sdillon } 84742956Sdillon 84842956Sdillon /* 84942956Sdillon * Meta node. If allocating the entire object we can special 85042956Sdillon * case it. However, we need to figure out how much memory 85142956Sdillon * is required to manage 'count' blocks, so we continue on anyway. 85242956Sdillon */ 85342956Sdillon 85442956Sdillon if (scan) { 85542956Sdillon scan->bm_bighint = 0; 85642956Sdillon scan->u.bmu_avail = 0; 85742956Sdillon } 85842956Sdillon 85942956Sdillon radix >>= BLIST_META_RADIX_SHIFT; 86042956Sdillon next_skip = (skip >> BLIST_META_RADIX_SHIFT); 86142956Sdillon 86242956Sdillon for (i = 1; i <= skip; i += next_skip) { 86342956Sdillon if (count >= radix) { 86442956Sdillon /* 86542956Sdillon * Allocate the entire object 86642956Sdillon */ 86742956Sdillon memindex = i + blst_radix_init( 86842956Sdillon ((scan) ? &scan[i] : NULL), 86942956Sdillon radix, 87042956Sdillon next_skip - 1, 87142956Sdillon radix 87242956Sdillon ); 87342956Sdillon count -= radix; 87442956Sdillon } else if (count > 0) { 87542956Sdillon /* 87642956Sdillon * Allocate a partial object 87742956Sdillon */ 87842956Sdillon memindex = i + blst_radix_init( 87942956Sdillon ((scan) ? &scan[i] : NULL), 88042956Sdillon radix, 88142956Sdillon next_skip - 1, 88242956Sdillon count 88342956Sdillon ); 88442956Sdillon count = 0; 88542956Sdillon } else { 88642956Sdillon /* 88742956Sdillon * Add terminator and break out 88842956Sdillon */ 88942956Sdillon if (scan) 89042956Sdillon scan[i].bm_bighint = (daddr_t)-1; 89142956Sdillon break; 89242956Sdillon } 89342956Sdillon } 89442956Sdillon if (memindex < i) 89542956Sdillon memindex = i; 89642956Sdillon return(memindex); 89742956Sdillon} 89842956Sdillon 89942956Sdillon#ifdef BLIST_DEBUG 90042956Sdillon 90142956Sdillonstatic void 90242956Sdillonblst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab) 90342956Sdillon{ 90442956Sdillon int i; 90542956Sdillon int next_skip; 90642956Sdillon int lastState = 0; 90742956Sdillon 90842956Sdillon if (radix == BLIST_BMAP_RADIX) { 90942956Sdillon printf( 910107913Sdillon "%*.*s(%08llx,%lld): bitmap %08llx big=%lld\n", 91142956Sdillon tab, tab, "", 912107913Sdillon (long long)blk, (long long)radix, 913107913Sdillon (long long)scan->u.bmu_bitmap, 914107913Sdillon (long long)scan->bm_bighint 91542956Sdillon ); 91642956Sdillon return; 91742956Sdillon } 91842956Sdillon 91942956Sdillon if (scan->u.bmu_avail == 0) { 92042956Sdillon printf( 921107913Sdillon "%*.*s(%08llx,%lld) ALL ALLOCATED\n", 92242956Sdillon tab, tab, "", 923107913Sdillon (long long)blk, 924107913Sdillon (long long)radix 92542956Sdillon ); 92642956Sdillon return; 92742956Sdillon } 92842956Sdillon if (scan->u.bmu_avail == radix) { 92942956Sdillon printf( 930107913Sdillon "%*.*s(%08llx,%lld) ALL FREE\n", 93142956Sdillon tab, tab, "", 932107913Sdillon (long long)blk, 933107913Sdillon (long long)radix 93442956Sdillon ); 93542956Sdillon return; 93642956Sdillon } 93742956Sdillon 93842956Sdillon printf( 939107913Sdillon "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n", 94042956Sdillon tab, tab, "", 941107913Sdillon (long long)blk, (long long)radix, 942107913Sdillon (long long)scan->u.bmu_avail, 943107913Sdillon (long long)radix, 944107913Sdillon (long long)scan->bm_bighint 94542956Sdillon ); 94642956Sdillon 94742956Sdillon radix >>= BLIST_META_RADIX_SHIFT; 94842956Sdillon next_skip = (skip >> BLIST_META_RADIX_SHIFT); 94942956Sdillon tab += 4; 95042956Sdillon 95142956Sdillon for (i = 1; i <= skip; i += next_skip) { 95242956Sdillon if (scan[i].bm_bighint == (daddr_t)-1) { 95342956Sdillon printf( 954107913Sdillon "%*.*s(%08llx,%lld): Terminator\n", 95542956Sdillon tab, tab, "", 956107913Sdillon (long long)blk, (long long)radix 95742956Sdillon ); 95842956Sdillon lastState = 0; 95942956Sdillon break; 96042956Sdillon } 96142956Sdillon blst_radix_print( 96242956Sdillon &scan[i], 96342956Sdillon blk, 96442956Sdillon radix, 96542956Sdillon next_skip - 1, 96642956Sdillon tab 96742956Sdillon ); 96842956Sdillon blk += radix; 96942956Sdillon } 97042956Sdillon tab -= 4; 97142956Sdillon 97242956Sdillon printf( 97342956Sdillon "%*.*s}\n", 97442956Sdillon tab, tab, "" 97542956Sdillon ); 97642956Sdillon} 97742956Sdillon 97842956Sdillon#endif 97942956Sdillon 98042956Sdillon#ifdef BLIST_DEBUG 98142956Sdillon 98242956Sdillonint 98342956Sdillonmain(int ac, char **av) 98442956Sdillon{ 98542956Sdillon int size = 1024; 98642956Sdillon int i; 98742956Sdillon blist_t bl; 98842956Sdillon 98942956Sdillon for (i = 1; i < ac; ++i) { 99042956Sdillon const char *ptr = av[i]; 99142956Sdillon if (*ptr != '-') { 99242956Sdillon size = strtol(ptr, NULL, 0); 99342956Sdillon continue; 99442956Sdillon } 99542956Sdillon ptr += 2; 99642956Sdillon fprintf(stderr, "Bad option: %s\n", ptr - 2); 99742956Sdillon exit(1); 99842956Sdillon } 99942956Sdillon bl = blist_create(size); 100042956Sdillon blist_free(bl, 0, size); 100142956Sdillon 100242956Sdillon for (;;) { 100342956Sdillon char buf[1024]; 100442956Sdillon daddr_t da = 0; 100542956Sdillon daddr_t count = 0; 100642956Sdillon 100742956Sdillon 1008107913Sdillon printf("%lld/%lld/%lld> ", (long long)bl->bl_free, 1009107913Sdillon (long long)size, (long long)bl->bl_radix); 101042956Sdillon fflush(stdout); 101142956Sdillon if (fgets(buf, sizeof(buf), stdin) == NULL) 101242956Sdillon break; 101342956Sdillon switch(buf[0]) { 101442956Sdillon case 'r': 1015107913Sdillon if (sscanf(buf + 1, "%lld", &count) == 1) { 101642956Sdillon blist_resize(&bl, count, 1); 101742956Sdillon } else { 101842956Sdillon printf("?\n"); 101942956Sdillon } 102042956Sdillon case 'p': 102142956Sdillon blist_print(bl); 102242956Sdillon break; 102342956Sdillon case 'a': 1024107913Sdillon if (sscanf(buf + 1, "%lld", &count) == 1) { 102542956Sdillon daddr_t blk = blist_alloc(bl, count); 1026107913Sdillon printf(" R=%08llx\n", (long long)blk); 102742956Sdillon } else { 102842956Sdillon printf("?\n"); 102942956Sdillon } 103042956Sdillon break; 103142956Sdillon case 'f': 1032107913Sdillon if (sscanf(buf + 1, "%llx %lld", 1033107913Sdillon (long long *)&da, (long long *)&count) == 2) { 103442956Sdillon blist_free(bl, da, count); 103542956Sdillon } else { 103642956Sdillon printf("?\n"); 103742956Sdillon } 103842956Sdillon break; 1039107913Sdillon case 'l': 1040107913Sdillon if (sscanf(buf + 1, "%llx %lld", 1041107913Sdillon (long long *)&da, (long long *)&count) == 2) { 1042107913Sdillon printf(" n=%d\n", 1043107913Sdillon blist_fill(bl, da, count)); 1044107913Sdillon } else { 1045107913Sdillon printf("?\n"); 1046107913Sdillon } 1047107913Sdillon break; 104842956Sdillon case '?': 104942956Sdillon case 'h': 105042956Sdillon puts( 105142956Sdillon "p -print\n" 105242956Sdillon "a %d -allocate\n" 105342956Sdillon "f %x %d -free\n" 1054107913Sdillon "l %x %d -fill\n" 105542956Sdillon "r %d -resize\n" 105642956Sdillon "h/? -help" 105742956Sdillon ); 105842956Sdillon break; 105942956Sdillon default: 106042956Sdillon printf("?\n"); 106142956Sdillon break; 106242956Sdillon } 106342956Sdillon } 106442956Sdillon return(0); 106542956Sdillon} 106642956Sdillon 106742956Sdillonvoid 106842956Sdillonpanic(const char *ctl, ...) 106942956Sdillon{ 107042956Sdillon va_list va; 107142956Sdillon 107242956Sdillon va_start(va, ctl); 107342956Sdillon vfprintf(stderr, ctl, va); 107442956Sdillon fprintf(stderr, "\n"); 107542956Sdillon va_end(va); 107642956Sdillon exit(1); 107742956Sdillon} 107842956Sdillon 107942956Sdillon#endif 108042956Sdillon 1081