subr_blist.c revision 79224
142956Sdillon
242956Sdillon/*
342956Sdillon * BLIST.C -	Bitmap allocator/deallocator, using a radix tree with hinting
442956Sdillon *
542956Sdillon *	(c)Copyright 1998, Matthew Dillon.  Terms for use and redistribution
642956Sdillon *	are covered by the BSD Copyright as found in /usr/src/COPYRIGHT.
742956Sdillon *
842956Sdillon *	This module implements a general bitmap allocator/deallocator.  The
942956Sdillon *	allocator eats around 2 bits per 'block'.  The module does not
1042956Sdillon *	try to interpret the meaning of a 'block' other then to return
1142956Sdillon *	SWAPBLK_NONE on an allocation failure.
1242956Sdillon *
1342956Sdillon *	A radix tree is used to maintain the bitmap.  Two radix constants are
1442956Sdillon *	involved:  One for the bitmaps contained in the leaf nodes (typically
1542956Sdillon *	32), and one for the meta nodes (typically 16).  Both meta and leaf
1642956Sdillon *	nodes have a hint field.  This field gives us a hint as to the largest
1742956Sdillon *	free contiguous range of blocks under the node.  It may contain a
1842956Sdillon *	value that is too high, but will never contain a value that is too
1942956Sdillon *	low.  When the radix tree is searched, allocation failures in subtrees
2042956Sdillon *	update the hint.
2142956Sdillon *
2242956Sdillon *	The radix tree also implements two collapsed states for meta nodes:
2342956Sdillon *	the ALL-ALLOCATED state and the ALL-FREE state.  If a meta node is
2442956Sdillon *	in either of these two states, all information contained underneath
2542956Sdillon *	the node is considered stale.  These states are used to optimize
2642956Sdillon *	allocation and freeing operations.
2742956Sdillon *
2842956Sdillon * 	The hinting greatly increases code efficiency for allocations while
2942956Sdillon *	the general radix structure optimizes both allocations and frees.  The
3042956Sdillon *	radix tree should be able to operate well no matter how much
3142956Sdillon *	fragmentation there is and no matter how large a bitmap is used.
3242956Sdillon *
3342956Sdillon *	Unlike the rlist code, the blist code wires all necessary memory at
3442956Sdillon *	creation time.  Neither allocations nor frees require interaction with
3542956Sdillon *	the memory subsystem.  In contrast, the rlist code may allocate memory
3642956Sdillon *	on an rlist_free() call.  The non-blocking features of the blist code
3742956Sdillon *	are used to great advantage in the swap code (vm/nswap_pager.c).  The
3842956Sdillon *	rlist code uses a little less overall memory then the blist code (but
3942956Sdillon *	due to swap interleaving not all that much less), but the blist code
4042956Sdillon *	scales much, much better.
4142956Sdillon *
4242956Sdillon *	LAYOUT: The radix tree is layed out recursively using a
4342956Sdillon *	linear array.  Each meta node is immediately followed (layed out
4442956Sdillon *	sequentially in memory) by BLIST_META_RADIX lower level nodes.  This
4542956Sdillon *	is a recursive structure but one that can be easily scanned through
4642956Sdillon *	a very simple 'skip' calculation.  In order to support large radixes,
4742956Sdillon *	portions of the tree may reside outside our memory allocation.  We
4842956Sdillon *	handle this with an early-termination optimization (when bighint is
4942956Sdillon *	set to -1) on the scan.  The memory allocation is only large enough
5042956Sdillon *	to cover the number of blocks requested at creation time even if it
5142956Sdillon *	must be encompassed in larger root-node radix.
5242956Sdillon *
5342956Sdillon *	NOTE: the allocator cannot currently allocate more then
5442956Sdillon *	BLIST_BMAP_RADIX blocks per call.  It will panic with 'allocation too
5542956Sdillon *	large' if you try.  This is an area that could use improvement.  The
5642956Sdillon *	radix is large enough that this restriction does not effect the swap
5742956Sdillon *	system, though.  Currently only the allocation code is effected by
5842956Sdillon *	this algorithmic unfeature.  The freeing code can handle arbitrary
5942956Sdillon *	ranges.
6042956Sdillon *
6142956Sdillon *	This code can be compiled stand-alone for debugging.
6247989Sgpalmer *
6350477Speter * $FreeBSD: head/sys/kern/subr_blist.c 79224 2001-07-04 16:20:28Z dillon $
6442956Sdillon */
6542956Sdillon
6655206Speter#ifdef _KERNEL
6742956Sdillon
6842956Sdillon#include <sys/param.h>
6942956Sdillon#include <sys/systm.h>
7042956Sdillon#include <sys/lock.h>
7142956Sdillon#include <sys/kernel.h>
7242956Sdillon#include <sys/blist.h>
7342956Sdillon#include <sys/malloc.h>
7479224Sdillon#include <sys/proc.h>
7576827Salfred#include <sys/mutex.h>
7642956Sdillon#include <vm/vm.h>
7742956Sdillon#include <vm/vm_object.h>
7842956Sdillon#include <vm/vm_kern.h>
7942956Sdillon#include <vm/vm_extern.h>
8042956Sdillon#include <vm/vm_page.h>
8142956Sdillon
8242956Sdillon#else
8342956Sdillon
8442956Sdillon#ifndef BLIST_NO_DEBUG
8542956Sdillon#define BLIST_DEBUG
8642956Sdillon#endif
8742956Sdillon
8842956Sdillon#define SWAPBLK_NONE ((daddr_t)-1)
8942956Sdillon
9042956Sdillon#include <sys/types.h>
9142956Sdillon#include <stdio.h>
9242956Sdillon#include <string.h>
9342956Sdillon#include <stdlib.h>
9442956Sdillon#include <stdarg.h>
9542956Sdillon
9642956Sdillon#define malloc(a,b,c)	malloc(a)
9742956Sdillon#define free(a,b)	free(a)
9842956Sdillon
9942956Sdillontypedef unsigned int u_daddr_t;
10042956Sdillon
10142956Sdillon#include <sys/blist.h>
10242956Sdillon
10342956Sdillonvoid panic(const char *ctl, ...);
10442956Sdillon
10542956Sdillon#endif
10642956Sdillon
10742956Sdillon/*
10842956Sdillon * static support functions
10942956Sdillon */
11042956Sdillon
11142956Sdillonstatic daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count);
11242956Sdillonstatic daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk,
11342956Sdillon				daddr_t count, daddr_t radix, int skip);
11442956Sdillonstatic void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
11542956Sdillonstatic void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
11642956Sdillon					daddr_t radix, int skip, daddr_t blk);
11742956Sdillonstatic void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
11842956Sdillon				daddr_t skip, blist_t dest, daddr_t count);
11942956Sdillonstatic daddr_t	blst_radix_init(blmeta_t *scan, daddr_t radix,
12042956Sdillon						int skip, daddr_t count);
12155206Speter#ifndef _KERNEL
12242956Sdillonstatic void	blst_radix_print(blmeta_t *scan, daddr_t blk,
12342956Sdillon					daddr_t radix, int skip, int tab);
12442956Sdillon#endif
12542956Sdillon
12655206Speter#ifdef _KERNEL
12742956Sdillonstatic MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
12842956Sdillon#endif
12942956Sdillon
13042956Sdillon/*
13142956Sdillon * blist_create() - create a blist capable of handling up to the specified
13242956Sdillon *		    number of blocks
13342956Sdillon *
13442956Sdillon *	blocks must be greater then 0
13542956Sdillon *
13642956Sdillon *	The smallest blist consists of a single leaf node capable of
13742956Sdillon *	managing BLIST_BMAP_RADIX blocks.
13842956Sdillon */
13942956Sdillon
14042956Sdillonblist_t
14142956Sdillonblist_create(daddr_t blocks)
14242956Sdillon{
14342956Sdillon	blist_t bl;
14442956Sdillon	int radix;
14542956Sdillon	int skip = 0;
14642956Sdillon
14742956Sdillon	/*
14842956Sdillon	 * Calculate radix and skip field used for scanning.
14942956Sdillon	 */
15042956Sdillon	radix = BLIST_BMAP_RADIX;
15142956Sdillon
15242956Sdillon	while (radix < blocks) {
15342956Sdillon		radix <<= BLIST_META_RADIX_SHIFT;
15442956Sdillon		skip = (skip + 1) << BLIST_META_RADIX_SHIFT;
15542956Sdillon	}
15642956Sdillon
15769781Sdwmalone	bl = malloc(sizeof(struct blist), M_SWAP, M_WAITOK | M_ZERO);
15842956Sdillon
15942956Sdillon	bl->bl_blocks = blocks;
16042956Sdillon	bl->bl_radix = radix;
16142956Sdillon	bl->bl_skip = skip;
16242956Sdillon	bl->bl_rootblks = 1 +
16342956Sdillon	    blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks);
16442956Sdillon	bl->bl_root = malloc(sizeof(blmeta_t) * bl->bl_rootblks, M_SWAP, M_WAITOK);
16542956Sdillon
16642956Sdillon#if defined(BLIST_DEBUG)
16742956Sdillon	printf(
16842956Sdillon		"BLIST representing %d blocks (%d MB of swap)"
16942956Sdillon		", requiring %dK of ram\n",
17042956Sdillon		bl->bl_blocks,
17142956Sdillon		bl->bl_blocks * 4 / 1024,
17242956Sdillon		(bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024
17342956Sdillon	);
17442956Sdillon	printf("BLIST raw radix tree contains %d records\n", bl->bl_rootblks);
17542956Sdillon#endif
17642956Sdillon	blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks);
17742956Sdillon
17842956Sdillon	return(bl);
17942956Sdillon}
18042956Sdillon
18142956Sdillonvoid
18242956Sdillonblist_destroy(blist_t bl)
18342956Sdillon{
18442956Sdillon	free(bl->bl_root, M_SWAP);
18542956Sdillon	free(bl, M_SWAP);
18642956Sdillon}
18742956Sdillon
18842956Sdillon/*
18942956Sdillon * blist_alloc() - reserve space in the block bitmap.  Return the base
19042956Sdillon *		     of a contiguous region or SWAPBLK_NONE if space could
19142956Sdillon *		     not be allocated.
19242956Sdillon */
19342956Sdillon
19442956Sdillondaddr_t
19542956Sdillonblist_alloc(blist_t bl, daddr_t count)
19642956Sdillon{
19742956Sdillon	daddr_t blk = SWAPBLK_NONE;
19842956Sdillon
19942956Sdillon	if (bl) {
20042956Sdillon		if (bl->bl_radix == BLIST_BMAP_RADIX)
20142956Sdillon			blk = blst_leaf_alloc(bl->bl_root, 0, count);
20242956Sdillon		else
20342956Sdillon			blk = blst_meta_alloc(bl->bl_root, 0, count, bl->bl_radix, bl->bl_skip);
20442956Sdillon		if (blk != SWAPBLK_NONE)
20542956Sdillon			bl->bl_free -= count;
20642956Sdillon	}
20742956Sdillon	return(blk);
20842956Sdillon}
20942956Sdillon
21042956Sdillon/*
21142956Sdillon * blist_free() -	free up space in the block bitmap.  Return the base
21242956Sdillon *		     	of a contiguous region.  Panic if an inconsistancy is
21342956Sdillon *			found.
21442956Sdillon */
21542956Sdillon
21642956Sdillonvoid
21742956Sdillonblist_free(blist_t bl, daddr_t blkno, daddr_t count)
21842956Sdillon{
21942956Sdillon	if (bl) {
22042956Sdillon		if (bl->bl_radix == BLIST_BMAP_RADIX)
22142956Sdillon			blst_leaf_free(bl->bl_root, blkno, count);
22242956Sdillon		else
22342956Sdillon			blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0);
22442956Sdillon		bl->bl_free += count;
22542956Sdillon	}
22642956Sdillon}
22742956Sdillon
22842956Sdillon/*
22942956Sdillon * blist_resize() -	resize an existing radix tree to handle the
23042956Sdillon *			specified number of blocks.  This will reallocate
23142956Sdillon *			the tree and transfer the previous bitmap to the new
23242956Sdillon *			one.  When extending the tree you can specify whether
23342956Sdillon *			the new blocks are to left allocated or freed.
23442956Sdillon */
23542956Sdillon
23642956Sdillonvoid
23742956Sdillonblist_resize(blist_t *pbl, daddr_t count, int freenew)
23842956Sdillon{
23942956Sdillon    blist_t newbl = blist_create(count);
24042956Sdillon    blist_t save = *pbl;
24142956Sdillon
24242956Sdillon    *pbl = newbl;
24342956Sdillon    if (count > save->bl_blocks)
24442956Sdillon	    count = save->bl_blocks;
24542956Sdillon    blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count);
24642956Sdillon
24742956Sdillon    /*
24842956Sdillon     * If resizing upwards, should we free the new space or not?
24942956Sdillon     */
25042956Sdillon    if (freenew && count < newbl->bl_blocks) {
25142956Sdillon	    blist_free(newbl, count, newbl->bl_blocks - count);
25242956Sdillon    }
25342956Sdillon    blist_destroy(save);
25442956Sdillon}
25542956Sdillon
25642956Sdillon#ifdef BLIST_DEBUG
25742956Sdillon
25842956Sdillon/*
25942956Sdillon * blist_print()    - dump radix tree
26042956Sdillon */
26142956Sdillon
26242956Sdillonvoid
26342956Sdillonblist_print(blist_t bl)
26442956Sdillon{
26542956Sdillon	printf("BLIST {\n");
26642956Sdillon	blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4);
26742956Sdillon	printf("}\n");
26842956Sdillon}
26942956Sdillon
27042956Sdillon#endif
27142956Sdillon
27242956Sdillon/************************************************************************
27342956Sdillon *			  ALLOCATION SUPPORT FUNCTIONS			*
27442956Sdillon ************************************************************************
27542956Sdillon *
27642956Sdillon *	These support functions do all the actual work.  They may seem
27742956Sdillon *	rather longish, but that's because I've commented them up.  The
27842956Sdillon *	actual code is straight forward.
27942956Sdillon *
28042956Sdillon */
28142956Sdillon
28242956Sdillon/*
28342956Sdillon * blist_leaf_alloc() -	allocate at a leaf in the radix tree (a bitmap).
28442956Sdillon *
28542956Sdillon *	This is the core of the allocator and is optimized for the 1 block
28642956Sdillon *	and the BLIST_BMAP_RADIX block allocation cases.  Other cases are
28742956Sdillon *	somewhat slower.  The 1 block allocation case is log2 and extremely
28842956Sdillon *	quick.
28942956Sdillon */
29042956Sdillon
29142956Sdillonstatic daddr_t
29242956Sdillonblst_leaf_alloc(
29342956Sdillon	blmeta_t *scan,
29442956Sdillon	daddr_t blk,
29542956Sdillon	int count
29642956Sdillon) {
29742956Sdillon	u_daddr_t orig = scan->u.bmu_bitmap;
29842956Sdillon
29942956Sdillon	if (orig == 0) {
30042956Sdillon		/*
30142956Sdillon		 * Optimize bitmap all-allocated case.  Also, count = 1
30242956Sdillon		 * case assumes at least 1 bit is free in the bitmap, so
30342956Sdillon		 * we have to take care of this case here.
30442956Sdillon		 */
30542956Sdillon		scan->bm_bighint = 0;
30642956Sdillon		return(SWAPBLK_NONE);
30742956Sdillon	}
30842956Sdillon	if (count == 1) {
30942956Sdillon		/*
31042956Sdillon		 * Optimized code to allocate one bit out of the bitmap
31142956Sdillon		 */
31242956Sdillon		u_daddr_t mask;
31342956Sdillon		int j = BLIST_BMAP_RADIX/2;
31442956Sdillon		int r = 0;
31542956Sdillon
31642956Sdillon		mask = (u_daddr_t)-1 >> (BLIST_BMAP_RADIX/2);
31742956Sdillon
31842956Sdillon		while (j) {
31942956Sdillon			if ((orig & mask) == 0) {
32042956Sdillon			    r += j;
32142956Sdillon			    orig >>= j;
32242956Sdillon			}
32342956Sdillon			j >>= 1;
32442956Sdillon			mask >>= j;
32542956Sdillon		}
32642956Sdillon		scan->u.bmu_bitmap &= ~(1 << r);
32742956Sdillon		return(blk + r);
32842956Sdillon	}
32942956Sdillon	if (count <= BLIST_BMAP_RADIX) {
33042956Sdillon		/*
33142956Sdillon		 * non-optimized code to allocate N bits out of the bitmap.
33242956Sdillon		 * The more bits, the faster the code runs.  It will run
33342956Sdillon		 * the slowest allocating 2 bits, but since there aren't any
33442956Sdillon		 * memory ops in the core loop (or shouldn't be, anyway),
33542956Sdillon		 * you probably won't notice the difference.
33642956Sdillon		 */
33742956Sdillon		int j;
33842956Sdillon		int n = BLIST_BMAP_RADIX - count;
33942956Sdillon		u_daddr_t mask;
34042956Sdillon
34142956Sdillon		mask = (u_daddr_t)-1 >> n;
34242956Sdillon
34342956Sdillon		for (j = 0; j <= n; ++j) {
34442956Sdillon			if ((orig & mask) == mask) {
34542956Sdillon				scan->u.bmu_bitmap &= ~mask;
34642956Sdillon				return(blk + j);
34742956Sdillon			}
34842956Sdillon			mask = (mask << 1);
34942956Sdillon		}
35042956Sdillon	}
35142956Sdillon	/*
35242956Sdillon	 * We couldn't allocate count in this subtree, update bighint.
35342956Sdillon	 */
35442956Sdillon	scan->bm_bighint = count - 1;
35542956Sdillon	return(SWAPBLK_NONE);
35642956Sdillon}
35742956Sdillon
35842956Sdillon/*
35942956Sdillon * blist_meta_alloc() -	allocate at a meta in the radix tree.
36042956Sdillon *
36142956Sdillon *	Attempt to allocate at a meta node.  If we can't, we update
36242956Sdillon *	bighint and return a failure.  Updating bighint optimize future
36342956Sdillon *	calls that hit this node.  We have to check for our collapse cases
36442956Sdillon *	and we have a few optimizations strewn in as well.
36542956Sdillon */
36642956Sdillon
36742956Sdillonstatic daddr_t
36842956Sdillonblst_meta_alloc(
36942956Sdillon	blmeta_t *scan,
37042956Sdillon	daddr_t blk,
37142956Sdillon	daddr_t count,
37242956Sdillon	daddr_t radix,
37342956Sdillon	int skip
37442956Sdillon) {
37542956Sdillon	int i;
37642956Sdillon	int next_skip = (skip >> BLIST_META_RADIX_SHIFT);
37742956Sdillon
37842956Sdillon	if (scan->u.bmu_avail == 0)  {
37942956Sdillon		/*
38042956Sdillon		 * ALL-ALLOCATED special case
38142956Sdillon		 */
38242956Sdillon		scan->bm_bighint = count;
38342956Sdillon		return(SWAPBLK_NONE);
38442956Sdillon	}
38542956Sdillon
38642956Sdillon	if (scan->u.bmu_avail == radix) {
38742956Sdillon		radix >>= BLIST_META_RADIX_SHIFT;
38842956Sdillon
38942956Sdillon		/*
39042956Sdillon		 * ALL-FREE special case, initialize uninitialize
39142956Sdillon		 * sublevel.
39242956Sdillon		 */
39342956Sdillon		for (i = 1; i <= skip; i += next_skip) {
39442956Sdillon			if (scan[i].bm_bighint == (daddr_t)-1)
39542956Sdillon				break;
39642956Sdillon			if (next_skip == 1) {
39742956Sdillon				scan[i].u.bmu_bitmap = (u_daddr_t)-1;
39842956Sdillon				scan[i].bm_bighint = BLIST_BMAP_RADIX;
39942956Sdillon			} else {
40042956Sdillon				scan[i].bm_bighint = radix;
40142956Sdillon				scan[i].u.bmu_avail = radix;
40242956Sdillon			}
40342956Sdillon		}
40442956Sdillon	} else {
40542956Sdillon		radix >>= BLIST_META_RADIX_SHIFT;
40642956Sdillon	}
40742956Sdillon
40842956Sdillon	for (i = 1; i <= skip; i += next_skip) {
40942956Sdillon		if (count <= scan[i].bm_bighint) {
41042956Sdillon			/*
41142956Sdillon			 * count fits in object
41242956Sdillon			 */
41342956Sdillon			daddr_t r;
41442956Sdillon			if (next_skip == 1) {
41542956Sdillon				r = blst_leaf_alloc(&scan[i], blk, count);
41642956Sdillon			} else {
41742956Sdillon				r = blst_meta_alloc(&scan[i], blk, count, radix, next_skip - 1);
41842956Sdillon			}
41942956Sdillon			if (r != SWAPBLK_NONE) {
42042956Sdillon				scan->u.bmu_avail -= count;
42142956Sdillon				if (scan->bm_bighint > scan->u.bmu_avail)
42242956Sdillon					scan->bm_bighint = scan->u.bmu_avail;
42342956Sdillon				return(r);
42442956Sdillon			}
42542956Sdillon		} else if (scan[i].bm_bighint == (daddr_t)-1) {
42642956Sdillon			/*
42742956Sdillon			 * Terminator
42842956Sdillon			 */
42942956Sdillon			break;
43042956Sdillon		} else if (count > radix) {
43142956Sdillon			/*
43242956Sdillon			 * count does not fit in object even if it were
43342956Sdillon			 * complete free.
43442956Sdillon			 */
43542956Sdillon			panic("blist_meta_alloc: allocation too large");
43642956Sdillon		}
43742956Sdillon		blk += radix;
43842956Sdillon	}
43942956Sdillon
44042956Sdillon	/*
44142956Sdillon	 * We couldn't allocate count in this subtree, update bighint.
44242956Sdillon	 */
44342956Sdillon	if (scan->bm_bighint >= count)
44442956Sdillon		scan->bm_bighint = count - 1;
44542956Sdillon	return(SWAPBLK_NONE);
44642956Sdillon}
44742956Sdillon
44842956Sdillon/*
44942956Sdillon * BLST_LEAF_FREE() -	free allocated block from leaf bitmap
45042956Sdillon *
45142956Sdillon */
45242956Sdillon
45342956Sdillonstatic void
45442956Sdillonblst_leaf_free(
45542956Sdillon	blmeta_t *scan,
45642956Sdillon	daddr_t blk,
45742956Sdillon	int count
45842956Sdillon) {
45942956Sdillon	/*
46042956Sdillon	 * free some data in this bitmap
46142956Sdillon	 *
46242956Sdillon	 * e.g.
46342956Sdillon	 *	0000111111111110000
46442956Sdillon	 *          \_________/\__/
46542956Sdillon	 *		v        n
46642956Sdillon	 */
46742956Sdillon	int n = blk & (BLIST_BMAP_RADIX - 1);
46842956Sdillon	u_daddr_t mask;
46942956Sdillon
47042956Sdillon	mask = ((u_daddr_t)-1 << n) &
47142956Sdillon	    ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
47242956Sdillon
47342956Sdillon	if (scan->u.bmu_bitmap & mask)
47442956Sdillon		panic("blst_radix_free: freeing free block");
47542956Sdillon	scan->u.bmu_bitmap |= mask;
47642956Sdillon
47742956Sdillon	/*
47842956Sdillon	 * We could probably do a better job here.  We are required to make
47942956Sdillon	 * bighint at least as large as the biggest contiguous block of
48042956Sdillon	 * data.  If we just shoehorn it, a little extra overhead will
48142956Sdillon	 * be incured on the next allocation (but only that one typically).
48242956Sdillon	 */
48342956Sdillon	scan->bm_bighint = BLIST_BMAP_RADIX;
48442956Sdillon}
48542956Sdillon
48642956Sdillon/*
48742956Sdillon * BLST_META_FREE() - free allocated blocks from radix tree meta info
48842956Sdillon *
48942956Sdillon *	This support routine frees a range of blocks from the bitmap.
49042956Sdillon *	The range must be entirely enclosed by this radix node.  If a
49142956Sdillon *	meta node, we break the range down recursively to free blocks
49242956Sdillon *	in subnodes (which means that this code can free an arbitrary
49342956Sdillon *	range whereas the allocation code cannot allocate an arbitrary
49442956Sdillon *	range).
49542956Sdillon */
49642956Sdillon
49742956Sdillonstatic void
49842956Sdillonblst_meta_free(
49942956Sdillon	blmeta_t *scan,
50042956Sdillon	daddr_t freeBlk,
50142956Sdillon	daddr_t count,
50242956Sdillon	daddr_t radix,
50342956Sdillon	int skip,
50442956Sdillon	daddr_t blk
50542956Sdillon) {
50642956Sdillon	int i;
50742956Sdillon	int next_skip = (skip >> BLIST_META_RADIX_SHIFT);
50842956Sdillon
50942956Sdillon#if 0
51042956Sdillon	printf("FREE (%x,%d) FROM (%x,%d)\n",
51142956Sdillon	    freeBlk, count,
51242956Sdillon	    blk, radix
51342956Sdillon	);
51442956Sdillon#endif
51542956Sdillon
51642956Sdillon	if (scan->u.bmu_avail == 0) {
51742956Sdillon		/*
51842956Sdillon		 * ALL-ALLOCATED special case, with possible
51942956Sdillon		 * shortcut to ALL-FREE special case.
52042956Sdillon		 */
52142956Sdillon		scan->u.bmu_avail = count;
52242956Sdillon		scan->bm_bighint = count;
52342956Sdillon
52442956Sdillon		if (count != radix)  {
52542956Sdillon			for (i = 1; i <= skip; i += next_skip) {
52642956Sdillon				if (scan[i].bm_bighint == (daddr_t)-1)
52742956Sdillon					break;
52842956Sdillon				scan[i].bm_bighint = 0;
52942956Sdillon				if (next_skip == 1) {
53042956Sdillon					scan[i].u.bmu_bitmap = 0;
53142956Sdillon				} else {
53242956Sdillon					scan[i].u.bmu_avail = 0;
53342956Sdillon				}
53442956Sdillon			}
53542956Sdillon			/* fall through */
53642956Sdillon		}
53742956Sdillon	} else {
53842956Sdillon		scan->u.bmu_avail += count;
53942956Sdillon		/* scan->bm_bighint = radix; */
54042956Sdillon	}
54142956Sdillon
54242956Sdillon	/*
54342956Sdillon	 * ALL-FREE special case.
54442956Sdillon	 */
54542956Sdillon
54642956Sdillon	if (scan->u.bmu_avail == radix)
54742956Sdillon		return;
54842956Sdillon	if (scan->u.bmu_avail > radix)
54942956Sdillon		panic("blst_meta_free: freeing already free blocks (%d) %d/%d", count, scan->u.bmu_avail, radix);
55042956Sdillon
55142956Sdillon	/*
55242956Sdillon	 * Break the free down into its components
55342956Sdillon	 */
55442956Sdillon
55542956Sdillon	radix >>= BLIST_META_RADIX_SHIFT;
55642956Sdillon
55742956Sdillon	i = (freeBlk - blk) / radix;
55842956Sdillon	blk += i * radix;
55942956Sdillon	i = i * next_skip + 1;
56042956Sdillon
56142956Sdillon	while (i <= skip && blk < freeBlk + count) {
56242956Sdillon		daddr_t v;
56342956Sdillon
56442956Sdillon		v = blk + radix - freeBlk;
56542956Sdillon		if (v > count)
56642956Sdillon			v = count;
56742956Sdillon
56842956Sdillon		if (scan->bm_bighint == (daddr_t)-1)
56942956Sdillon			panic("blst_meta_free: freeing unexpected range");
57042956Sdillon
57142956Sdillon		if (next_skip == 1) {
57242956Sdillon			blst_leaf_free(&scan[i], freeBlk, v);
57342956Sdillon		} else {
57442956Sdillon			blst_meta_free(&scan[i], freeBlk, v, radix, next_skip - 1, blk);
57542956Sdillon		}
57642956Sdillon		if (scan->bm_bighint < scan[i].bm_bighint)
57742956Sdillon		    scan->bm_bighint = scan[i].bm_bighint;
57842956Sdillon		count -= v;
57942956Sdillon		freeBlk += v;
58042956Sdillon		blk += radix;
58142956Sdillon		i += next_skip;
58242956Sdillon	}
58342956Sdillon}
58442956Sdillon
58542956Sdillon/*
58642956Sdillon * BLIST_RADIX_COPY() - copy one radix tree to another
58742956Sdillon *
58842956Sdillon *	Locates free space in the source tree and frees it in the destination
58942956Sdillon *	tree.  The space may not already be free in the destination.
59042956Sdillon */
59142956Sdillon
59242956Sdillonstatic void blst_copy(
59342956Sdillon	blmeta_t *scan,
59442956Sdillon	daddr_t blk,
59542956Sdillon	daddr_t radix,
59642956Sdillon	daddr_t skip,
59742956Sdillon	blist_t dest,
59842956Sdillon	daddr_t count
59942956Sdillon) {
60042956Sdillon	int next_skip;
60142956Sdillon	int i;
60242956Sdillon
60342956Sdillon	/*
60442956Sdillon	 * Leaf node
60542956Sdillon	 */
60642956Sdillon
60742956Sdillon	if (radix == BLIST_BMAP_RADIX) {
60842956Sdillon		u_daddr_t v = scan->u.bmu_bitmap;
60942956Sdillon
61042956Sdillon		if (v == (u_daddr_t)-1) {
61142956Sdillon			blist_free(dest, blk, count);
61242956Sdillon		} else if (v != 0) {
61342956Sdillon			int i;
61442956Sdillon
61542956Sdillon			for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) {
61642956Sdillon				if (v & (1 << i))
61742956Sdillon					blist_free(dest, blk + i, 1);
61842956Sdillon			}
61942956Sdillon		}
62042956Sdillon		return;
62142956Sdillon	}
62242956Sdillon
62342956Sdillon	/*
62442956Sdillon	 * Meta node
62542956Sdillon	 */
62642956Sdillon
62742956Sdillon	if (scan->u.bmu_avail == 0) {
62842956Sdillon		/*
62942956Sdillon		 * Source all allocated, leave dest allocated
63042956Sdillon		 */
63142956Sdillon		return;
63242956Sdillon	}
63342956Sdillon	if (scan->u.bmu_avail == radix) {
63442956Sdillon		/*
63542956Sdillon		 * Source all free, free entire dest
63642956Sdillon		 */
63742956Sdillon		if (count < radix)
63842956Sdillon			blist_free(dest, blk, count);
63942956Sdillon		else
64042956Sdillon			blist_free(dest, blk, radix);
64142956Sdillon		return;
64242956Sdillon	}
64342956Sdillon
64442956Sdillon
64542956Sdillon	radix >>= BLIST_META_RADIX_SHIFT;
64642956Sdillon	next_skip = (skip >> BLIST_META_RADIX_SHIFT);
64742956Sdillon
64842956Sdillon	for (i = 1; count && i <= skip; i += next_skip) {
64942956Sdillon		if (scan[i].bm_bighint == (daddr_t)-1)
65042956Sdillon			break;
65142956Sdillon
65242956Sdillon		if (count >= radix) {
65342956Sdillon			blst_copy(
65442956Sdillon			    &scan[i],
65542956Sdillon			    blk,
65642956Sdillon			    radix,
65742956Sdillon			    next_skip - 1,
65842956Sdillon			    dest,
65942956Sdillon			    radix
66042956Sdillon			);
66142956Sdillon			count -= radix;
66242956Sdillon		} else {
66342956Sdillon			if (count) {
66442956Sdillon				blst_copy(
66542956Sdillon				    &scan[i],
66642956Sdillon				    blk,
66742956Sdillon				    radix,
66842956Sdillon				    next_skip - 1,
66942956Sdillon				    dest,
67042956Sdillon				    count
67142956Sdillon				);
67242956Sdillon			}
67342956Sdillon			count = 0;
67442956Sdillon		}
67542956Sdillon		blk += radix;
67642956Sdillon	}
67742956Sdillon}
67842956Sdillon
67942956Sdillon/*
68042956Sdillon * BLST_RADIX_INIT() - initialize radix tree
68142956Sdillon *
68242956Sdillon *	Initialize our meta structures and bitmaps and calculate the exact
68342956Sdillon *	amount of space required to manage 'count' blocks - this space may
68442956Sdillon *	be considerably less then the calculated radix due to the large
68542956Sdillon *	RADIX values we use.
68642956Sdillon */
68742956Sdillon
68842956Sdillonstatic daddr_t
68942956Sdillonblst_radix_init(blmeta_t *scan, daddr_t radix, int skip, daddr_t count)
69042956Sdillon{
69142956Sdillon	int i;
69242956Sdillon	int next_skip;
69342956Sdillon	daddr_t memindex = 0;
69442956Sdillon
69542956Sdillon	/*
69642956Sdillon	 * Leaf node
69742956Sdillon	 */
69842956Sdillon
69942956Sdillon	if (radix == BLIST_BMAP_RADIX) {
70042956Sdillon		if (scan) {
70142956Sdillon			scan->bm_bighint = 0;
70242956Sdillon			scan->u.bmu_bitmap = 0;
70342956Sdillon		}
70442956Sdillon		return(memindex);
70542956Sdillon	}
70642956Sdillon
70742956Sdillon	/*
70842956Sdillon	 * Meta node.  If allocating the entire object we can special
70942956Sdillon	 * case it.  However, we need to figure out how much memory
71042956Sdillon	 * is required to manage 'count' blocks, so we continue on anyway.
71142956Sdillon	 */
71242956Sdillon
71342956Sdillon	if (scan) {
71442956Sdillon		scan->bm_bighint = 0;
71542956Sdillon		scan->u.bmu_avail = 0;
71642956Sdillon	}
71742956Sdillon
71842956Sdillon	radix >>= BLIST_META_RADIX_SHIFT;
71942956Sdillon	next_skip = (skip >> BLIST_META_RADIX_SHIFT);
72042956Sdillon
72142956Sdillon	for (i = 1; i <= skip; i += next_skip) {
72242956Sdillon		if (count >= radix) {
72342956Sdillon			/*
72442956Sdillon			 * Allocate the entire object
72542956Sdillon			 */
72642956Sdillon			memindex = i + blst_radix_init(
72742956Sdillon			    ((scan) ? &scan[i] : NULL),
72842956Sdillon			    radix,
72942956Sdillon			    next_skip - 1,
73042956Sdillon			    radix
73142956Sdillon			);
73242956Sdillon			count -= radix;
73342956Sdillon		} else if (count > 0) {
73442956Sdillon			/*
73542956Sdillon			 * Allocate a partial object
73642956Sdillon			 */
73742956Sdillon			memindex = i + blst_radix_init(
73842956Sdillon			    ((scan) ? &scan[i] : NULL),
73942956Sdillon			    radix,
74042956Sdillon			    next_skip - 1,
74142956Sdillon			    count
74242956Sdillon			);
74342956Sdillon			count = 0;
74442956Sdillon		} else {
74542956Sdillon			/*
74642956Sdillon			 * Add terminator and break out
74742956Sdillon			 */
74842956Sdillon			if (scan)
74942956Sdillon				scan[i].bm_bighint = (daddr_t)-1;
75042956Sdillon			break;
75142956Sdillon		}
75242956Sdillon	}
75342956Sdillon	if (memindex < i)
75442956Sdillon		memindex = i;
75542956Sdillon	return(memindex);
75642956Sdillon}
75742956Sdillon
75842956Sdillon#ifdef BLIST_DEBUG
75942956Sdillon
76042956Sdillonstatic void
76142956Sdillonblst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab)
76242956Sdillon{
76342956Sdillon	int i;
76442956Sdillon	int next_skip;
76542956Sdillon	int lastState = 0;
76642956Sdillon
76742956Sdillon	if (radix == BLIST_BMAP_RADIX) {
76842956Sdillon		printf(
76942956Sdillon		    "%*.*s(%04x,%d): bitmap %08x big=%d\n",
77042956Sdillon		    tab, tab, "",
77142956Sdillon		    blk, radix,
77242956Sdillon		    scan->u.bmu_bitmap,
77342956Sdillon		    scan->bm_bighint
77442956Sdillon		);
77542956Sdillon		return;
77642956Sdillon	}
77742956Sdillon
77842956Sdillon	if (scan->u.bmu_avail == 0) {
77942956Sdillon		printf(
78042956Sdillon		    "%*.*s(%04x,%d) ALL ALLOCATED\n",
78142956Sdillon		    tab, tab, "",
78242956Sdillon		    blk,
78342956Sdillon		    radix
78442956Sdillon		);
78542956Sdillon		return;
78642956Sdillon	}
78742956Sdillon	if (scan->u.bmu_avail == radix) {
78842956Sdillon		printf(
78942956Sdillon		    "%*.*s(%04x,%d) ALL FREE\n",
79042956Sdillon		    tab, tab, "",
79142956Sdillon		    blk,
79242956Sdillon		    radix
79342956Sdillon		);
79442956Sdillon		return;
79542956Sdillon	}
79642956Sdillon
79742956Sdillon	printf(
79842956Sdillon	    "%*.*s(%04x,%d): subtree (%d/%d) big=%d {\n",
79942956Sdillon	    tab, tab, "",
80042956Sdillon	    blk, radix,
80142956Sdillon	    scan->u.bmu_avail,
80242956Sdillon	    radix,
80342956Sdillon	    scan->bm_bighint
80442956Sdillon	);
80542956Sdillon
80642956Sdillon	radix >>= BLIST_META_RADIX_SHIFT;
80742956Sdillon	next_skip = (skip >> BLIST_META_RADIX_SHIFT);
80842956Sdillon	tab += 4;
80942956Sdillon
81042956Sdillon	for (i = 1; i <= skip; i += next_skip) {
81142956Sdillon		if (scan[i].bm_bighint == (daddr_t)-1) {
81242956Sdillon			printf(
81342956Sdillon			    "%*.*s(%04x,%d): Terminator\n",
81442956Sdillon			    tab, tab, "",
81542956Sdillon			    blk, radix
81642956Sdillon			);
81742956Sdillon			lastState = 0;
81842956Sdillon			break;
81942956Sdillon		}
82042956Sdillon		blst_radix_print(
82142956Sdillon		    &scan[i],
82242956Sdillon		    blk,
82342956Sdillon		    radix,
82442956Sdillon		    next_skip - 1,
82542956Sdillon		    tab
82642956Sdillon		);
82742956Sdillon		blk += radix;
82842956Sdillon	}
82942956Sdillon	tab -= 4;
83042956Sdillon
83142956Sdillon	printf(
83242956Sdillon	    "%*.*s}\n",
83342956Sdillon	    tab, tab, ""
83442956Sdillon	);
83542956Sdillon}
83642956Sdillon
83742956Sdillon#endif
83842956Sdillon
83942956Sdillon#ifdef BLIST_DEBUG
84042956Sdillon
84142956Sdillonint
84242956Sdillonmain(int ac, char **av)
84342956Sdillon{
84442956Sdillon	int size = 1024;
84542956Sdillon	int i;
84642956Sdillon	blist_t bl;
84742956Sdillon
84842956Sdillon	for (i = 1; i < ac; ++i) {
84942956Sdillon		const char *ptr = av[i];
85042956Sdillon		if (*ptr != '-') {
85142956Sdillon			size = strtol(ptr, NULL, 0);
85242956Sdillon			continue;
85342956Sdillon		}
85442956Sdillon		ptr += 2;
85542956Sdillon		fprintf(stderr, "Bad option: %s\n", ptr - 2);
85642956Sdillon		exit(1);
85742956Sdillon	}
85842956Sdillon	bl = blist_create(size);
85942956Sdillon	blist_free(bl, 0, size);
86042956Sdillon
86142956Sdillon	for (;;) {
86242956Sdillon		char buf[1024];
86342956Sdillon		daddr_t da = 0;
86442956Sdillon		daddr_t count = 0;
86542956Sdillon
86642956Sdillon
86742956Sdillon		printf("%d/%d/%d> ", bl->bl_free, size, bl->bl_radix);
86842956Sdillon		fflush(stdout);
86942956Sdillon		if (fgets(buf, sizeof(buf), stdin) == NULL)
87042956Sdillon			break;
87142956Sdillon		switch(buf[0]) {
87242956Sdillon		case 'r':
87342956Sdillon			if (sscanf(buf + 1, "%d", &count) == 1) {
87442956Sdillon				blist_resize(&bl, count, 1);
87542956Sdillon			} else {
87642956Sdillon				printf("?\n");
87742956Sdillon			}
87842956Sdillon		case 'p':
87942956Sdillon			blist_print(bl);
88042956Sdillon			break;
88142956Sdillon		case 'a':
88242956Sdillon			if (sscanf(buf + 1, "%d", &count) == 1) {
88342956Sdillon				daddr_t blk = blist_alloc(bl, count);
88442956Sdillon				printf("    R=%04x\n", blk);
88542956Sdillon			} else {
88642956Sdillon				printf("?\n");
88742956Sdillon			}
88842956Sdillon			break;
88942956Sdillon		case 'f':
89042956Sdillon			if (sscanf(buf + 1, "%x %d", &da, &count) == 2) {
89142956Sdillon				blist_free(bl, da, count);
89242956Sdillon			} else {
89342956Sdillon				printf("?\n");
89442956Sdillon			}
89542956Sdillon			break;
89642956Sdillon		case '?':
89742956Sdillon		case 'h':
89842956Sdillon			puts(
89942956Sdillon			    "p          -print\n"
90042956Sdillon			    "a %d       -allocate\n"
90142956Sdillon			    "f %x %d    -free\n"
90242956Sdillon			    "r %d       -resize\n"
90342956Sdillon			    "h/?        -help"
90442956Sdillon			);
90542956Sdillon			break;
90642956Sdillon		default:
90742956Sdillon			printf("?\n");
90842956Sdillon			break;
90942956Sdillon		}
91042956Sdillon	}
91142956Sdillon	return(0);
91242956Sdillon}
91342956Sdillon
91442956Sdillonvoid
91542956Sdillonpanic(const char *ctl, ...)
91642956Sdillon{
91742956Sdillon	va_list va;
91842956Sdillon
91942956Sdillon	va_start(va, ctl);
92042956Sdillon	vfprintf(stderr, ctl, va);
92142956Sdillon	fprintf(stderr, "\n");
92242956Sdillon	va_end(va);
92342956Sdillon	exit(1);
92442956Sdillon}
92542956Sdillon
92642956Sdillon#endif
92742956Sdillon
928