subr_blist.c revision 79224
142956Sdillon 242956Sdillon/* 342956Sdillon * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting 442956Sdillon * 542956Sdillon * (c)Copyright 1998, Matthew Dillon. Terms for use and redistribution 642956Sdillon * are covered by the BSD Copyright as found in /usr/src/COPYRIGHT. 742956Sdillon * 842956Sdillon * This module implements a general bitmap allocator/deallocator. The 942956Sdillon * allocator eats around 2 bits per 'block'. The module does not 1042956Sdillon * try to interpret the meaning of a 'block' other then to return 1142956Sdillon * SWAPBLK_NONE on an allocation failure. 1242956Sdillon * 1342956Sdillon * A radix tree is used to maintain the bitmap. Two radix constants are 1442956Sdillon * involved: One for the bitmaps contained in the leaf nodes (typically 1542956Sdillon * 32), and one for the meta nodes (typically 16). Both meta and leaf 1642956Sdillon * nodes have a hint field. This field gives us a hint as to the largest 1742956Sdillon * free contiguous range of blocks under the node. It may contain a 1842956Sdillon * value that is too high, but will never contain a value that is too 1942956Sdillon * low. When the radix tree is searched, allocation failures in subtrees 2042956Sdillon * update the hint. 2142956Sdillon * 2242956Sdillon * The radix tree also implements two collapsed states for meta nodes: 2342956Sdillon * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is 2442956Sdillon * in either of these two states, all information contained underneath 2542956Sdillon * the node is considered stale. These states are used to optimize 2642956Sdillon * allocation and freeing operations. 2742956Sdillon * 2842956Sdillon * The hinting greatly increases code efficiency for allocations while 2942956Sdillon * the general radix structure optimizes both allocations and frees. The 3042956Sdillon * radix tree should be able to operate well no matter how much 3142956Sdillon * fragmentation there is and no matter how large a bitmap is used. 3242956Sdillon * 3342956Sdillon * Unlike the rlist code, the blist code wires all necessary memory at 3442956Sdillon * creation time. Neither allocations nor frees require interaction with 3542956Sdillon * the memory subsystem. In contrast, the rlist code may allocate memory 3642956Sdillon * on an rlist_free() call. The non-blocking features of the blist code 3742956Sdillon * are used to great advantage in the swap code (vm/nswap_pager.c). The 3842956Sdillon * rlist code uses a little less overall memory then the blist code (but 3942956Sdillon * due to swap interleaving not all that much less), but the blist code 4042956Sdillon * scales much, much better. 4142956Sdillon * 4242956Sdillon * LAYOUT: The radix tree is layed out recursively using a 4342956Sdillon * linear array. Each meta node is immediately followed (layed out 4442956Sdillon * sequentially in memory) by BLIST_META_RADIX lower level nodes. This 4542956Sdillon * is a recursive structure but one that can be easily scanned through 4642956Sdillon * a very simple 'skip' calculation. In order to support large radixes, 4742956Sdillon * portions of the tree may reside outside our memory allocation. We 4842956Sdillon * handle this with an early-termination optimization (when bighint is 4942956Sdillon * set to -1) on the scan. The memory allocation is only large enough 5042956Sdillon * to cover the number of blocks requested at creation time even if it 5142956Sdillon * must be encompassed in larger root-node radix. 5242956Sdillon * 5342956Sdillon * NOTE: the allocator cannot currently allocate more then 5442956Sdillon * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too 5542956Sdillon * large' if you try. This is an area that could use improvement. The 5642956Sdillon * radix is large enough that this restriction does not effect the swap 5742956Sdillon * system, though. Currently only the allocation code is effected by 5842956Sdillon * this algorithmic unfeature. The freeing code can handle arbitrary 5942956Sdillon * ranges. 6042956Sdillon * 6142956Sdillon * This code can be compiled stand-alone for debugging. 6247989Sgpalmer * 6350477Speter * $FreeBSD: head/sys/kern/subr_blist.c 79224 2001-07-04 16:20:28Z dillon $ 6442956Sdillon */ 6542956Sdillon 6655206Speter#ifdef _KERNEL 6742956Sdillon 6842956Sdillon#include <sys/param.h> 6942956Sdillon#include <sys/systm.h> 7042956Sdillon#include <sys/lock.h> 7142956Sdillon#include <sys/kernel.h> 7242956Sdillon#include <sys/blist.h> 7342956Sdillon#include <sys/malloc.h> 7479224Sdillon#include <sys/proc.h> 7576827Salfred#include <sys/mutex.h> 7642956Sdillon#include <vm/vm.h> 7742956Sdillon#include <vm/vm_object.h> 7842956Sdillon#include <vm/vm_kern.h> 7942956Sdillon#include <vm/vm_extern.h> 8042956Sdillon#include <vm/vm_page.h> 8142956Sdillon 8242956Sdillon#else 8342956Sdillon 8442956Sdillon#ifndef BLIST_NO_DEBUG 8542956Sdillon#define BLIST_DEBUG 8642956Sdillon#endif 8742956Sdillon 8842956Sdillon#define SWAPBLK_NONE ((daddr_t)-1) 8942956Sdillon 9042956Sdillon#include <sys/types.h> 9142956Sdillon#include <stdio.h> 9242956Sdillon#include <string.h> 9342956Sdillon#include <stdlib.h> 9442956Sdillon#include <stdarg.h> 9542956Sdillon 9642956Sdillon#define malloc(a,b,c) malloc(a) 9742956Sdillon#define free(a,b) free(a) 9842956Sdillon 9942956Sdillontypedef unsigned int u_daddr_t; 10042956Sdillon 10142956Sdillon#include <sys/blist.h> 10242956Sdillon 10342956Sdillonvoid panic(const char *ctl, ...); 10442956Sdillon 10542956Sdillon#endif 10642956Sdillon 10742956Sdillon/* 10842956Sdillon * static support functions 10942956Sdillon */ 11042956Sdillon 11142956Sdillonstatic daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count); 11242956Sdillonstatic daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk, 11342956Sdillon daddr_t count, daddr_t radix, int skip); 11442956Sdillonstatic void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count); 11542956Sdillonstatic void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, 11642956Sdillon daddr_t radix, int skip, daddr_t blk); 11742956Sdillonstatic void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, 11842956Sdillon daddr_t skip, blist_t dest, daddr_t count); 11942956Sdillonstatic daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix, 12042956Sdillon int skip, daddr_t count); 12155206Speter#ifndef _KERNEL 12242956Sdillonstatic void blst_radix_print(blmeta_t *scan, daddr_t blk, 12342956Sdillon daddr_t radix, int skip, int tab); 12442956Sdillon#endif 12542956Sdillon 12655206Speter#ifdef _KERNEL 12742956Sdillonstatic MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); 12842956Sdillon#endif 12942956Sdillon 13042956Sdillon/* 13142956Sdillon * blist_create() - create a blist capable of handling up to the specified 13242956Sdillon * number of blocks 13342956Sdillon * 13442956Sdillon * blocks must be greater then 0 13542956Sdillon * 13642956Sdillon * The smallest blist consists of a single leaf node capable of 13742956Sdillon * managing BLIST_BMAP_RADIX blocks. 13842956Sdillon */ 13942956Sdillon 14042956Sdillonblist_t 14142956Sdillonblist_create(daddr_t blocks) 14242956Sdillon{ 14342956Sdillon blist_t bl; 14442956Sdillon int radix; 14542956Sdillon int skip = 0; 14642956Sdillon 14742956Sdillon /* 14842956Sdillon * Calculate radix and skip field used for scanning. 14942956Sdillon */ 15042956Sdillon radix = BLIST_BMAP_RADIX; 15142956Sdillon 15242956Sdillon while (radix < blocks) { 15342956Sdillon radix <<= BLIST_META_RADIX_SHIFT; 15442956Sdillon skip = (skip + 1) << BLIST_META_RADIX_SHIFT; 15542956Sdillon } 15642956Sdillon 15769781Sdwmalone bl = malloc(sizeof(struct blist), M_SWAP, M_WAITOK | M_ZERO); 15842956Sdillon 15942956Sdillon bl->bl_blocks = blocks; 16042956Sdillon bl->bl_radix = radix; 16142956Sdillon bl->bl_skip = skip; 16242956Sdillon bl->bl_rootblks = 1 + 16342956Sdillon blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks); 16442956Sdillon bl->bl_root = malloc(sizeof(blmeta_t) * bl->bl_rootblks, M_SWAP, M_WAITOK); 16542956Sdillon 16642956Sdillon#if defined(BLIST_DEBUG) 16742956Sdillon printf( 16842956Sdillon "BLIST representing %d blocks (%d MB of swap)" 16942956Sdillon ", requiring %dK of ram\n", 17042956Sdillon bl->bl_blocks, 17142956Sdillon bl->bl_blocks * 4 / 1024, 17242956Sdillon (bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024 17342956Sdillon ); 17442956Sdillon printf("BLIST raw radix tree contains %d records\n", bl->bl_rootblks); 17542956Sdillon#endif 17642956Sdillon blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks); 17742956Sdillon 17842956Sdillon return(bl); 17942956Sdillon} 18042956Sdillon 18142956Sdillonvoid 18242956Sdillonblist_destroy(blist_t bl) 18342956Sdillon{ 18442956Sdillon free(bl->bl_root, M_SWAP); 18542956Sdillon free(bl, M_SWAP); 18642956Sdillon} 18742956Sdillon 18842956Sdillon/* 18942956Sdillon * blist_alloc() - reserve space in the block bitmap. Return the base 19042956Sdillon * of a contiguous region or SWAPBLK_NONE if space could 19142956Sdillon * not be allocated. 19242956Sdillon */ 19342956Sdillon 19442956Sdillondaddr_t 19542956Sdillonblist_alloc(blist_t bl, daddr_t count) 19642956Sdillon{ 19742956Sdillon daddr_t blk = SWAPBLK_NONE; 19842956Sdillon 19942956Sdillon if (bl) { 20042956Sdillon if (bl->bl_radix == BLIST_BMAP_RADIX) 20142956Sdillon blk = blst_leaf_alloc(bl->bl_root, 0, count); 20242956Sdillon else 20342956Sdillon blk = blst_meta_alloc(bl->bl_root, 0, count, bl->bl_radix, bl->bl_skip); 20442956Sdillon if (blk != SWAPBLK_NONE) 20542956Sdillon bl->bl_free -= count; 20642956Sdillon } 20742956Sdillon return(blk); 20842956Sdillon} 20942956Sdillon 21042956Sdillon/* 21142956Sdillon * blist_free() - free up space in the block bitmap. Return the base 21242956Sdillon * of a contiguous region. Panic if an inconsistancy is 21342956Sdillon * found. 21442956Sdillon */ 21542956Sdillon 21642956Sdillonvoid 21742956Sdillonblist_free(blist_t bl, daddr_t blkno, daddr_t count) 21842956Sdillon{ 21942956Sdillon if (bl) { 22042956Sdillon if (bl->bl_radix == BLIST_BMAP_RADIX) 22142956Sdillon blst_leaf_free(bl->bl_root, blkno, count); 22242956Sdillon else 22342956Sdillon blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0); 22442956Sdillon bl->bl_free += count; 22542956Sdillon } 22642956Sdillon} 22742956Sdillon 22842956Sdillon/* 22942956Sdillon * blist_resize() - resize an existing radix tree to handle the 23042956Sdillon * specified number of blocks. This will reallocate 23142956Sdillon * the tree and transfer the previous bitmap to the new 23242956Sdillon * one. When extending the tree you can specify whether 23342956Sdillon * the new blocks are to left allocated or freed. 23442956Sdillon */ 23542956Sdillon 23642956Sdillonvoid 23742956Sdillonblist_resize(blist_t *pbl, daddr_t count, int freenew) 23842956Sdillon{ 23942956Sdillon blist_t newbl = blist_create(count); 24042956Sdillon blist_t save = *pbl; 24142956Sdillon 24242956Sdillon *pbl = newbl; 24342956Sdillon if (count > save->bl_blocks) 24442956Sdillon count = save->bl_blocks; 24542956Sdillon blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count); 24642956Sdillon 24742956Sdillon /* 24842956Sdillon * If resizing upwards, should we free the new space or not? 24942956Sdillon */ 25042956Sdillon if (freenew && count < newbl->bl_blocks) { 25142956Sdillon blist_free(newbl, count, newbl->bl_blocks - count); 25242956Sdillon } 25342956Sdillon blist_destroy(save); 25442956Sdillon} 25542956Sdillon 25642956Sdillon#ifdef BLIST_DEBUG 25742956Sdillon 25842956Sdillon/* 25942956Sdillon * blist_print() - dump radix tree 26042956Sdillon */ 26142956Sdillon 26242956Sdillonvoid 26342956Sdillonblist_print(blist_t bl) 26442956Sdillon{ 26542956Sdillon printf("BLIST {\n"); 26642956Sdillon blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4); 26742956Sdillon printf("}\n"); 26842956Sdillon} 26942956Sdillon 27042956Sdillon#endif 27142956Sdillon 27242956Sdillon/************************************************************************ 27342956Sdillon * ALLOCATION SUPPORT FUNCTIONS * 27442956Sdillon ************************************************************************ 27542956Sdillon * 27642956Sdillon * These support functions do all the actual work. They may seem 27742956Sdillon * rather longish, but that's because I've commented them up. The 27842956Sdillon * actual code is straight forward. 27942956Sdillon * 28042956Sdillon */ 28142956Sdillon 28242956Sdillon/* 28342956Sdillon * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). 28442956Sdillon * 28542956Sdillon * This is the core of the allocator and is optimized for the 1 block 28642956Sdillon * and the BLIST_BMAP_RADIX block allocation cases. Other cases are 28742956Sdillon * somewhat slower. The 1 block allocation case is log2 and extremely 28842956Sdillon * quick. 28942956Sdillon */ 29042956Sdillon 29142956Sdillonstatic daddr_t 29242956Sdillonblst_leaf_alloc( 29342956Sdillon blmeta_t *scan, 29442956Sdillon daddr_t blk, 29542956Sdillon int count 29642956Sdillon) { 29742956Sdillon u_daddr_t orig = scan->u.bmu_bitmap; 29842956Sdillon 29942956Sdillon if (orig == 0) { 30042956Sdillon /* 30142956Sdillon * Optimize bitmap all-allocated case. Also, count = 1 30242956Sdillon * case assumes at least 1 bit is free in the bitmap, so 30342956Sdillon * we have to take care of this case here. 30442956Sdillon */ 30542956Sdillon scan->bm_bighint = 0; 30642956Sdillon return(SWAPBLK_NONE); 30742956Sdillon } 30842956Sdillon if (count == 1) { 30942956Sdillon /* 31042956Sdillon * Optimized code to allocate one bit out of the bitmap 31142956Sdillon */ 31242956Sdillon u_daddr_t mask; 31342956Sdillon int j = BLIST_BMAP_RADIX/2; 31442956Sdillon int r = 0; 31542956Sdillon 31642956Sdillon mask = (u_daddr_t)-1 >> (BLIST_BMAP_RADIX/2); 31742956Sdillon 31842956Sdillon while (j) { 31942956Sdillon if ((orig & mask) == 0) { 32042956Sdillon r += j; 32142956Sdillon orig >>= j; 32242956Sdillon } 32342956Sdillon j >>= 1; 32442956Sdillon mask >>= j; 32542956Sdillon } 32642956Sdillon scan->u.bmu_bitmap &= ~(1 << r); 32742956Sdillon return(blk + r); 32842956Sdillon } 32942956Sdillon if (count <= BLIST_BMAP_RADIX) { 33042956Sdillon /* 33142956Sdillon * non-optimized code to allocate N bits out of the bitmap. 33242956Sdillon * The more bits, the faster the code runs. It will run 33342956Sdillon * the slowest allocating 2 bits, but since there aren't any 33442956Sdillon * memory ops in the core loop (or shouldn't be, anyway), 33542956Sdillon * you probably won't notice the difference. 33642956Sdillon */ 33742956Sdillon int j; 33842956Sdillon int n = BLIST_BMAP_RADIX - count; 33942956Sdillon u_daddr_t mask; 34042956Sdillon 34142956Sdillon mask = (u_daddr_t)-1 >> n; 34242956Sdillon 34342956Sdillon for (j = 0; j <= n; ++j) { 34442956Sdillon if ((orig & mask) == mask) { 34542956Sdillon scan->u.bmu_bitmap &= ~mask; 34642956Sdillon return(blk + j); 34742956Sdillon } 34842956Sdillon mask = (mask << 1); 34942956Sdillon } 35042956Sdillon } 35142956Sdillon /* 35242956Sdillon * We couldn't allocate count in this subtree, update bighint. 35342956Sdillon */ 35442956Sdillon scan->bm_bighint = count - 1; 35542956Sdillon return(SWAPBLK_NONE); 35642956Sdillon} 35742956Sdillon 35842956Sdillon/* 35942956Sdillon * blist_meta_alloc() - allocate at a meta in the radix tree. 36042956Sdillon * 36142956Sdillon * Attempt to allocate at a meta node. If we can't, we update 36242956Sdillon * bighint and return a failure. Updating bighint optimize future 36342956Sdillon * calls that hit this node. We have to check for our collapse cases 36442956Sdillon * and we have a few optimizations strewn in as well. 36542956Sdillon */ 36642956Sdillon 36742956Sdillonstatic daddr_t 36842956Sdillonblst_meta_alloc( 36942956Sdillon blmeta_t *scan, 37042956Sdillon daddr_t blk, 37142956Sdillon daddr_t count, 37242956Sdillon daddr_t radix, 37342956Sdillon int skip 37442956Sdillon) { 37542956Sdillon int i; 37642956Sdillon int next_skip = (skip >> BLIST_META_RADIX_SHIFT); 37742956Sdillon 37842956Sdillon if (scan->u.bmu_avail == 0) { 37942956Sdillon /* 38042956Sdillon * ALL-ALLOCATED special case 38142956Sdillon */ 38242956Sdillon scan->bm_bighint = count; 38342956Sdillon return(SWAPBLK_NONE); 38442956Sdillon } 38542956Sdillon 38642956Sdillon if (scan->u.bmu_avail == radix) { 38742956Sdillon radix >>= BLIST_META_RADIX_SHIFT; 38842956Sdillon 38942956Sdillon /* 39042956Sdillon * ALL-FREE special case, initialize uninitialize 39142956Sdillon * sublevel. 39242956Sdillon */ 39342956Sdillon for (i = 1; i <= skip; i += next_skip) { 39442956Sdillon if (scan[i].bm_bighint == (daddr_t)-1) 39542956Sdillon break; 39642956Sdillon if (next_skip == 1) { 39742956Sdillon scan[i].u.bmu_bitmap = (u_daddr_t)-1; 39842956Sdillon scan[i].bm_bighint = BLIST_BMAP_RADIX; 39942956Sdillon } else { 40042956Sdillon scan[i].bm_bighint = radix; 40142956Sdillon scan[i].u.bmu_avail = radix; 40242956Sdillon } 40342956Sdillon } 40442956Sdillon } else { 40542956Sdillon radix >>= BLIST_META_RADIX_SHIFT; 40642956Sdillon } 40742956Sdillon 40842956Sdillon for (i = 1; i <= skip; i += next_skip) { 40942956Sdillon if (count <= scan[i].bm_bighint) { 41042956Sdillon /* 41142956Sdillon * count fits in object 41242956Sdillon */ 41342956Sdillon daddr_t r; 41442956Sdillon if (next_skip == 1) { 41542956Sdillon r = blst_leaf_alloc(&scan[i], blk, count); 41642956Sdillon } else { 41742956Sdillon r = blst_meta_alloc(&scan[i], blk, count, radix, next_skip - 1); 41842956Sdillon } 41942956Sdillon if (r != SWAPBLK_NONE) { 42042956Sdillon scan->u.bmu_avail -= count; 42142956Sdillon if (scan->bm_bighint > scan->u.bmu_avail) 42242956Sdillon scan->bm_bighint = scan->u.bmu_avail; 42342956Sdillon return(r); 42442956Sdillon } 42542956Sdillon } else if (scan[i].bm_bighint == (daddr_t)-1) { 42642956Sdillon /* 42742956Sdillon * Terminator 42842956Sdillon */ 42942956Sdillon break; 43042956Sdillon } else if (count > radix) { 43142956Sdillon /* 43242956Sdillon * count does not fit in object even if it were 43342956Sdillon * complete free. 43442956Sdillon */ 43542956Sdillon panic("blist_meta_alloc: allocation too large"); 43642956Sdillon } 43742956Sdillon blk += radix; 43842956Sdillon } 43942956Sdillon 44042956Sdillon /* 44142956Sdillon * We couldn't allocate count in this subtree, update bighint. 44242956Sdillon */ 44342956Sdillon if (scan->bm_bighint >= count) 44442956Sdillon scan->bm_bighint = count - 1; 44542956Sdillon return(SWAPBLK_NONE); 44642956Sdillon} 44742956Sdillon 44842956Sdillon/* 44942956Sdillon * BLST_LEAF_FREE() - free allocated block from leaf bitmap 45042956Sdillon * 45142956Sdillon */ 45242956Sdillon 45342956Sdillonstatic void 45442956Sdillonblst_leaf_free( 45542956Sdillon blmeta_t *scan, 45642956Sdillon daddr_t blk, 45742956Sdillon int count 45842956Sdillon) { 45942956Sdillon /* 46042956Sdillon * free some data in this bitmap 46142956Sdillon * 46242956Sdillon * e.g. 46342956Sdillon * 0000111111111110000 46442956Sdillon * \_________/\__/ 46542956Sdillon * v n 46642956Sdillon */ 46742956Sdillon int n = blk & (BLIST_BMAP_RADIX - 1); 46842956Sdillon u_daddr_t mask; 46942956Sdillon 47042956Sdillon mask = ((u_daddr_t)-1 << n) & 47142956Sdillon ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 47242956Sdillon 47342956Sdillon if (scan->u.bmu_bitmap & mask) 47442956Sdillon panic("blst_radix_free: freeing free block"); 47542956Sdillon scan->u.bmu_bitmap |= mask; 47642956Sdillon 47742956Sdillon /* 47842956Sdillon * We could probably do a better job here. We are required to make 47942956Sdillon * bighint at least as large as the biggest contiguous block of 48042956Sdillon * data. If we just shoehorn it, a little extra overhead will 48142956Sdillon * be incured on the next allocation (but only that one typically). 48242956Sdillon */ 48342956Sdillon scan->bm_bighint = BLIST_BMAP_RADIX; 48442956Sdillon} 48542956Sdillon 48642956Sdillon/* 48742956Sdillon * BLST_META_FREE() - free allocated blocks from radix tree meta info 48842956Sdillon * 48942956Sdillon * This support routine frees a range of blocks from the bitmap. 49042956Sdillon * The range must be entirely enclosed by this radix node. If a 49142956Sdillon * meta node, we break the range down recursively to free blocks 49242956Sdillon * in subnodes (which means that this code can free an arbitrary 49342956Sdillon * range whereas the allocation code cannot allocate an arbitrary 49442956Sdillon * range). 49542956Sdillon */ 49642956Sdillon 49742956Sdillonstatic void 49842956Sdillonblst_meta_free( 49942956Sdillon blmeta_t *scan, 50042956Sdillon daddr_t freeBlk, 50142956Sdillon daddr_t count, 50242956Sdillon daddr_t radix, 50342956Sdillon int skip, 50442956Sdillon daddr_t blk 50542956Sdillon) { 50642956Sdillon int i; 50742956Sdillon int next_skip = (skip >> BLIST_META_RADIX_SHIFT); 50842956Sdillon 50942956Sdillon#if 0 51042956Sdillon printf("FREE (%x,%d) FROM (%x,%d)\n", 51142956Sdillon freeBlk, count, 51242956Sdillon blk, radix 51342956Sdillon ); 51442956Sdillon#endif 51542956Sdillon 51642956Sdillon if (scan->u.bmu_avail == 0) { 51742956Sdillon /* 51842956Sdillon * ALL-ALLOCATED special case, with possible 51942956Sdillon * shortcut to ALL-FREE special case. 52042956Sdillon */ 52142956Sdillon scan->u.bmu_avail = count; 52242956Sdillon scan->bm_bighint = count; 52342956Sdillon 52442956Sdillon if (count != radix) { 52542956Sdillon for (i = 1; i <= skip; i += next_skip) { 52642956Sdillon if (scan[i].bm_bighint == (daddr_t)-1) 52742956Sdillon break; 52842956Sdillon scan[i].bm_bighint = 0; 52942956Sdillon if (next_skip == 1) { 53042956Sdillon scan[i].u.bmu_bitmap = 0; 53142956Sdillon } else { 53242956Sdillon scan[i].u.bmu_avail = 0; 53342956Sdillon } 53442956Sdillon } 53542956Sdillon /* fall through */ 53642956Sdillon } 53742956Sdillon } else { 53842956Sdillon scan->u.bmu_avail += count; 53942956Sdillon /* scan->bm_bighint = radix; */ 54042956Sdillon } 54142956Sdillon 54242956Sdillon /* 54342956Sdillon * ALL-FREE special case. 54442956Sdillon */ 54542956Sdillon 54642956Sdillon if (scan->u.bmu_avail == radix) 54742956Sdillon return; 54842956Sdillon if (scan->u.bmu_avail > radix) 54942956Sdillon panic("blst_meta_free: freeing already free blocks (%d) %d/%d", count, scan->u.bmu_avail, radix); 55042956Sdillon 55142956Sdillon /* 55242956Sdillon * Break the free down into its components 55342956Sdillon */ 55442956Sdillon 55542956Sdillon radix >>= BLIST_META_RADIX_SHIFT; 55642956Sdillon 55742956Sdillon i = (freeBlk - blk) / radix; 55842956Sdillon blk += i * radix; 55942956Sdillon i = i * next_skip + 1; 56042956Sdillon 56142956Sdillon while (i <= skip && blk < freeBlk + count) { 56242956Sdillon daddr_t v; 56342956Sdillon 56442956Sdillon v = blk + radix - freeBlk; 56542956Sdillon if (v > count) 56642956Sdillon v = count; 56742956Sdillon 56842956Sdillon if (scan->bm_bighint == (daddr_t)-1) 56942956Sdillon panic("blst_meta_free: freeing unexpected range"); 57042956Sdillon 57142956Sdillon if (next_skip == 1) { 57242956Sdillon blst_leaf_free(&scan[i], freeBlk, v); 57342956Sdillon } else { 57442956Sdillon blst_meta_free(&scan[i], freeBlk, v, radix, next_skip - 1, blk); 57542956Sdillon } 57642956Sdillon if (scan->bm_bighint < scan[i].bm_bighint) 57742956Sdillon scan->bm_bighint = scan[i].bm_bighint; 57842956Sdillon count -= v; 57942956Sdillon freeBlk += v; 58042956Sdillon blk += radix; 58142956Sdillon i += next_skip; 58242956Sdillon } 58342956Sdillon} 58442956Sdillon 58542956Sdillon/* 58642956Sdillon * BLIST_RADIX_COPY() - copy one radix tree to another 58742956Sdillon * 58842956Sdillon * Locates free space in the source tree and frees it in the destination 58942956Sdillon * tree. The space may not already be free in the destination. 59042956Sdillon */ 59142956Sdillon 59242956Sdillonstatic void blst_copy( 59342956Sdillon blmeta_t *scan, 59442956Sdillon daddr_t blk, 59542956Sdillon daddr_t radix, 59642956Sdillon daddr_t skip, 59742956Sdillon blist_t dest, 59842956Sdillon daddr_t count 59942956Sdillon) { 60042956Sdillon int next_skip; 60142956Sdillon int i; 60242956Sdillon 60342956Sdillon /* 60442956Sdillon * Leaf node 60542956Sdillon */ 60642956Sdillon 60742956Sdillon if (radix == BLIST_BMAP_RADIX) { 60842956Sdillon u_daddr_t v = scan->u.bmu_bitmap; 60942956Sdillon 61042956Sdillon if (v == (u_daddr_t)-1) { 61142956Sdillon blist_free(dest, blk, count); 61242956Sdillon } else if (v != 0) { 61342956Sdillon int i; 61442956Sdillon 61542956Sdillon for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { 61642956Sdillon if (v & (1 << i)) 61742956Sdillon blist_free(dest, blk + i, 1); 61842956Sdillon } 61942956Sdillon } 62042956Sdillon return; 62142956Sdillon } 62242956Sdillon 62342956Sdillon /* 62442956Sdillon * Meta node 62542956Sdillon */ 62642956Sdillon 62742956Sdillon if (scan->u.bmu_avail == 0) { 62842956Sdillon /* 62942956Sdillon * Source all allocated, leave dest allocated 63042956Sdillon */ 63142956Sdillon return; 63242956Sdillon } 63342956Sdillon if (scan->u.bmu_avail == radix) { 63442956Sdillon /* 63542956Sdillon * Source all free, free entire dest 63642956Sdillon */ 63742956Sdillon if (count < radix) 63842956Sdillon blist_free(dest, blk, count); 63942956Sdillon else 64042956Sdillon blist_free(dest, blk, radix); 64142956Sdillon return; 64242956Sdillon } 64342956Sdillon 64442956Sdillon 64542956Sdillon radix >>= BLIST_META_RADIX_SHIFT; 64642956Sdillon next_skip = (skip >> BLIST_META_RADIX_SHIFT); 64742956Sdillon 64842956Sdillon for (i = 1; count && i <= skip; i += next_skip) { 64942956Sdillon if (scan[i].bm_bighint == (daddr_t)-1) 65042956Sdillon break; 65142956Sdillon 65242956Sdillon if (count >= radix) { 65342956Sdillon blst_copy( 65442956Sdillon &scan[i], 65542956Sdillon blk, 65642956Sdillon radix, 65742956Sdillon next_skip - 1, 65842956Sdillon dest, 65942956Sdillon radix 66042956Sdillon ); 66142956Sdillon count -= radix; 66242956Sdillon } else { 66342956Sdillon if (count) { 66442956Sdillon blst_copy( 66542956Sdillon &scan[i], 66642956Sdillon blk, 66742956Sdillon radix, 66842956Sdillon next_skip - 1, 66942956Sdillon dest, 67042956Sdillon count 67142956Sdillon ); 67242956Sdillon } 67342956Sdillon count = 0; 67442956Sdillon } 67542956Sdillon blk += radix; 67642956Sdillon } 67742956Sdillon} 67842956Sdillon 67942956Sdillon/* 68042956Sdillon * BLST_RADIX_INIT() - initialize radix tree 68142956Sdillon * 68242956Sdillon * Initialize our meta structures and bitmaps and calculate the exact 68342956Sdillon * amount of space required to manage 'count' blocks - this space may 68442956Sdillon * be considerably less then the calculated radix due to the large 68542956Sdillon * RADIX values we use. 68642956Sdillon */ 68742956Sdillon 68842956Sdillonstatic daddr_t 68942956Sdillonblst_radix_init(blmeta_t *scan, daddr_t radix, int skip, daddr_t count) 69042956Sdillon{ 69142956Sdillon int i; 69242956Sdillon int next_skip; 69342956Sdillon daddr_t memindex = 0; 69442956Sdillon 69542956Sdillon /* 69642956Sdillon * Leaf node 69742956Sdillon */ 69842956Sdillon 69942956Sdillon if (radix == BLIST_BMAP_RADIX) { 70042956Sdillon if (scan) { 70142956Sdillon scan->bm_bighint = 0; 70242956Sdillon scan->u.bmu_bitmap = 0; 70342956Sdillon } 70442956Sdillon return(memindex); 70542956Sdillon } 70642956Sdillon 70742956Sdillon /* 70842956Sdillon * Meta node. If allocating the entire object we can special 70942956Sdillon * case it. However, we need to figure out how much memory 71042956Sdillon * is required to manage 'count' blocks, so we continue on anyway. 71142956Sdillon */ 71242956Sdillon 71342956Sdillon if (scan) { 71442956Sdillon scan->bm_bighint = 0; 71542956Sdillon scan->u.bmu_avail = 0; 71642956Sdillon } 71742956Sdillon 71842956Sdillon radix >>= BLIST_META_RADIX_SHIFT; 71942956Sdillon next_skip = (skip >> BLIST_META_RADIX_SHIFT); 72042956Sdillon 72142956Sdillon for (i = 1; i <= skip; i += next_skip) { 72242956Sdillon if (count >= radix) { 72342956Sdillon /* 72442956Sdillon * Allocate the entire object 72542956Sdillon */ 72642956Sdillon memindex = i + blst_radix_init( 72742956Sdillon ((scan) ? &scan[i] : NULL), 72842956Sdillon radix, 72942956Sdillon next_skip - 1, 73042956Sdillon radix 73142956Sdillon ); 73242956Sdillon count -= radix; 73342956Sdillon } else if (count > 0) { 73442956Sdillon /* 73542956Sdillon * Allocate a partial object 73642956Sdillon */ 73742956Sdillon memindex = i + blst_radix_init( 73842956Sdillon ((scan) ? &scan[i] : NULL), 73942956Sdillon radix, 74042956Sdillon next_skip - 1, 74142956Sdillon count 74242956Sdillon ); 74342956Sdillon count = 0; 74442956Sdillon } else { 74542956Sdillon /* 74642956Sdillon * Add terminator and break out 74742956Sdillon */ 74842956Sdillon if (scan) 74942956Sdillon scan[i].bm_bighint = (daddr_t)-1; 75042956Sdillon break; 75142956Sdillon } 75242956Sdillon } 75342956Sdillon if (memindex < i) 75442956Sdillon memindex = i; 75542956Sdillon return(memindex); 75642956Sdillon} 75742956Sdillon 75842956Sdillon#ifdef BLIST_DEBUG 75942956Sdillon 76042956Sdillonstatic void 76142956Sdillonblst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab) 76242956Sdillon{ 76342956Sdillon int i; 76442956Sdillon int next_skip; 76542956Sdillon int lastState = 0; 76642956Sdillon 76742956Sdillon if (radix == BLIST_BMAP_RADIX) { 76842956Sdillon printf( 76942956Sdillon "%*.*s(%04x,%d): bitmap %08x big=%d\n", 77042956Sdillon tab, tab, "", 77142956Sdillon blk, radix, 77242956Sdillon scan->u.bmu_bitmap, 77342956Sdillon scan->bm_bighint 77442956Sdillon ); 77542956Sdillon return; 77642956Sdillon } 77742956Sdillon 77842956Sdillon if (scan->u.bmu_avail == 0) { 77942956Sdillon printf( 78042956Sdillon "%*.*s(%04x,%d) ALL ALLOCATED\n", 78142956Sdillon tab, tab, "", 78242956Sdillon blk, 78342956Sdillon radix 78442956Sdillon ); 78542956Sdillon return; 78642956Sdillon } 78742956Sdillon if (scan->u.bmu_avail == radix) { 78842956Sdillon printf( 78942956Sdillon "%*.*s(%04x,%d) ALL FREE\n", 79042956Sdillon tab, tab, "", 79142956Sdillon blk, 79242956Sdillon radix 79342956Sdillon ); 79442956Sdillon return; 79542956Sdillon } 79642956Sdillon 79742956Sdillon printf( 79842956Sdillon "%*.*s(%04x,%d): subtree (%d/%d) big=%d {\n", 79942956Sdillon tab, tab, "", 80042956Sdillon blk, radix, 80142956Sdillon scan->u.bmu_avail, 80242956Sdillon radix, 80342956Sdillon scan->bm_bighint 80442956Sdillon ); 80542956Sdillon 80642956Sdillon radix >>= BLIST_META_RADIX_SHIFT; 80742956Sdillon next_skip = (skip >> BLIST_META_RADIX_SHIFT); 80842956Sdillon tab += 4; 80942956Sdillon 81042956Sdillon for (i = 1; i <= skip; i += next_skip) { 81142956Sdillon if (scan[i].bm_bighint == (daddr_t)-1) { 81242956Sdillon printf( 81342956Sdillon "%*.*s(%04x,%d): Terminator\n", 81442956Sdillon tab, tab, "", 81542956Sdillon blk, radix 81642956Sdillon ); 81742956Sdillon lastState = 0; 81842956Sdillon break; 81942956Sdillon } 82042956Sdillon blst_radix_print( 82142956Sdillon &scan[i], 82242956Sdillon blk, 82342956Sdillon radix, 82442956Sdillon next_skip - 1, 82542956Sdillon tab 82642956Sdillon ); 82742956Sdillon blk += radix; 82842956Sdillon } 82942956Sdillon tab -= 4; 83042956Sdillon 83142956Sdillon printf( 83242956Sdillon "%*.*s}\n", 83342956Sdillon tab, tab, "" 83442956Sdillon ); 83542956Sdillon} 83642956Sdillon 83742956Sdillon#endif 83842956Sdillon 83942956Sdillon#ifdef BLIST_DEBUG 84042956Sdillon 84142956Sdillonint 84242956Sdillonmain(int ac, char **av) 84342956Sdillon{ 84442956Sdillon int size = 1024; 84542956Sdillon int i; 84642956Sdillon blist_t bl; 84742956Sdillon 84842956Sdillon for (i = 1; i < ac; ++i) { 84942956Sdillon const char *ptr = av[i]; 85042956Sdillon if (*ptr != '-') { 85142956Sdillon size = strtol(ptr, NULL, 0); 85242956Sdillon continue; 85342956Sdillon } 85442956Sdillon ptr += 2; 85542956Sdillon fprintf(stderr, "Bad option: %s\n", ptr - 2); 85642956Sdillon exit(1); 85742956Sdillon } 85842956Sdillon bl = blist_create(size); 85942956Sdillon blist_free(bl, 0, size); 86042956Sdillon 86142956Sdillon for (;;) { 86242956Sdillon char buf[1024]; 86342956Sdillon daddr_t da = 0; 86442956Sdillon daddr_t count = 0; 86542956Sdillon 86642956Sdillon 86742956Sdillon printf("%d/%d/%d> ", bl->bl_free, size, bl->bl_radix); 86842956Sdillon fflush(stdout); 86942956Sdillon if (fgets(buf, sizeof(buf), stdin) == NULL) 87042956Sdillon break; 87142956Sdillon switch(buf[0]) { 87242956Sdillon case 'r': 87342956Sdillon if (sscanf(buf + 1, "%d", &count) == 1) { 87442956Sdillon blist_resize(&bl, count, 1); 87542956Sdillon } else { 87642956Sdillon printf("?\n"); 87742956Sdillon } 87842956Sdillon case 'p': 87942956Sdillon blist_print(bl); 88042956Sdillon break; 88142956Sdillon case 'a': 88242956Sdillon if (sscanf(buf + 1, "%d", &count) == 1) { 88342956Sdillon daddr_t blk = blist_alloc(bl, count); 88442956Sdillon printf(" R=%04x\n", blk); 88542956Sdillon } else { 88642956Sdillon printf("?\n"); 88742956Sdillon } 88842956Sdillon break; 88942956Sdillon case 'f': 89042956Sdillon if (sscanf(buf + 1, "%x %d", &da, &count) == 2) { 89142956Sdillon blist_free(bl, da, count); 89242956Sdillon } else { 89342956Sdillon printf("?\n"); 89442956Sdillon } 89542956Sdillon break; 89642956Sdillon case '?': 89742956Sdillon case 'h': 89842956Sdillon puts( 89942956Sdillon "p -print\n" 90042956Sdillon "a %d -allocate\n" 90142956Sdillon "f %x %d -free\n" 90242956Sdillon "r %d -resize\n" 90342956Sdillon "h/? -help" 90442956Sdillon ); 90542956Sdillon break; 90642956Sdillon default: 90742956Sdillon printf("?\n"); 90842956Sdillon break; 90942956Sdillon } 91042956Sdillon } 91142956Sdillon return(0); 91242956Sdillon} 91342956Sdillon 91442956Sdillonvoid 91542956Sdillonpanic(const char *ctl, ...) 91642956Sdillon{ 91742956Sdillon va_list va; 91842956Sdillon 91942956Sdillon va_start(va, ctl); 92042956Sdillon vfprintf(stderr, ctl, va); 92142956Sdillon fprintf(stderr, "\n"); 92242956Sdillon va_end(va); 92342956Sdillon exit(1); 92442956Sdillon} 92542956Sdillon 92642956Sdillon#endif 92742956Sdillon 928