subr_blist.c revision 109086
1
2/*
3 * BLIST.C -	Bitmap allocator/deallocator, using a radix tree with hinting
4 *
5 *	(c)Copyright 1998, Matthew Dillon.  Terms for use and redistribution
6 *	are covered by the BSD Copyright as found in /usr/src/COPYRIGHT.
7 *
8 *	This module implements a general bitmap allocator/deallocator.  The
9 *	allocator eats around 2 bits per 'block'.  The module does not
10 *	try to interpret the meaning of a 'block' other then to return
11 *	SWAPBLK_NONE on an allocation failure.
12 *
13 *	A radix tree is used to maintain the bitmap.  Two radix constants are
14 *	involved:  One for the bitmaps contained in the leaf nodes (typically
15 *	32), and one for the meta nodes (typically 16).  Both meta and leaf
16 *	nodes have a hint field.  This field gives us a hint as to the largest
17 *	free contiguous range of blocks under the node.  It may contain a
18 *	value that is too high, but will never contain a value that is too
19 *	low.  When the radix tree is searched, allocation failures in subtrees
20 *	update the hint.
21 *
22 *	The radix tree also implements two collapsed states for meta nodes:
23 *	the ALL-ALLOCATED state and the ALL-FREE state.  If a meta node is
24 *	in either of these two states, all information contained underneath
25 *	the node is considered stale.  These states are used to optimize
26 *	allocation and freeing operations.
27 *
28 * 	The hinting greatly increases code efficiency for allocations while
29 *	the general radix structure optimizes both allocations and frees.  The
30 *	radix tree should be able to operate well no matter how much
31 *	fragmentation there is and no matter how large a bitmap is used.
32 *
33 *	Unlike the rlist code, the blist code wires all necessary memory at
34 *	creation time.  Neither allocations nor frees require interaction with
35 *	the memory subsystem.  In contrast, the rlist code may allocate memory
36 *	on an rlist_free() call.  The non-blocking features of the blist code
37 *	are used to great advantage in the swap code (vm/nswap_pager.c).  The
38 *	rlist code uses a little less overall memory then the blist code (but
39 *	due to swap interleaving not all that much less), but the blist code
40 *	scales much, much better.
41 *
42 *	LAYOUT: The radix tree is layed out recursively using a
43 *	linear array.  Each meta node is immediately followed (layed out
44 *	sequentially in memory) by BLIST_META_RADIX lower level nodes.  This
45 *	is a recursive structure but one that can be easily scanned through
46 *	a very simple 'skip' calculation.  In order to support large radixes,
47 *	portions of the tree may reside outside our memory allocation.  We
48 *	handle this with an early-termination optimization (when bighint is
49 *	set to -1) on the scan.  The memory allocation is only large enough
50 *	to cover the number of blocks requested at creation time even if it
51 *	must be encompassed in larger root-node radix.
52 *
53 *	NOTE: the allocator cannot currently allocate more then
54 *	BLIST_BMAP_RADIX blocks per call.  It will panic with 'allocation too
55 *	large' if you try.  This is an area that could use improvement.  The
56 *	radix is large enough that this restriction does not effect the swap
57 *	system, though.  Currently only the allocation code is effected by
58 *	this algorithmic unfeature.  The freeing code can handle arbitrary
59 *	ranges.
60 *
61 *	This code can be compiled stand-alone for debugging.
62 *
63 * $FreeBSD: head/sys/kern/subr_blist.c 109086 2003-01-11 01:09:51Z dillon $
64 */
65
66#ifdef _KERNEL
67
68#include <sys/param.h>
69#include <sys/systm.h>
70#include <sys/lock.h>
71#include <sys/kernel.h>
72#include <sys/blist.h>
73#include <sys/malloc.h>
74#include <sys/proc.h>
75#include <sys/mutex.h>
76#include <vm/vm.h>
77#include <vm/vm_object.h>
78#include <vm/vm_kern.h>
79#include <vm/vm_extern.h>
80#include <vm/vm_page.h>
81
82#else
83
84#ifndef BLIST_NO_DEBUG
85#define BLIST_DEBUG
86#endif
87
88#define SWAPBLK_NONE ((daddr_t)-1)
89
90#include <sys/types.h>
91#include <stdio.h>
92#include <string.h>
93#include <stdlib.h>
94#include <stdarg.h>
95
96#define malloc(a,b,c)	calloc(a, 1)
97#define free(a,b)	free(a)
98
99typedef unsigned int u_daddr_t;
100
101#include <sys/blist.h>
102
103void panic(const char *ctl, ...);
104
105#endif
106
107/*
108 * static support functions
109 */
110
111static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count);
112static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk,
113				daddr_t count, daddr_t radix, int skip);
114static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
115static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
116					daddr_t radix, int skip, daddr_t blk);
117static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
118				daddr_t skip, blist_t dest, daddr_t count);
119static int blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
120static int blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
121				daddr_t radix, int skip, daddr_t blk);
122static daddr_t	blst_radix_init(blmeta_t *scan, daddr_t radix,
123						int skip, daddr_t count);
124#ifndef _KERNEL
125static void	blst_radix_print(blmeta_t *scan, daddr_t blk,
126					daddr_t radix, int skip, int tab);
127#endif
128
129#ifdef _KERNEL
130static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
131#endif
132
133/*
134 * blist_create() - create a blist capable of handling up to the specified
135 *		    number of blocks
136 *
137 *	blocks must be greater then 0
138 *
139 *	The smallest blist consists of a single leaf node capable of
140 *	managing BLIST_BMAP_RADIX blocks.
141 */
142
143blist_t
144blist_create(daddr_t blocks)
145{
146	blist_t bl;
147	int radix;
148	int skip = 0;
149
150	/*
151	 * Calculate radix and skip field used for scanning.
152	 */
153	radix = BLIST_BMAP_RADIX;
154
155	while (radix < blocks) {
156		radix *= BLIST_META_RADIX;
157		skip = (skip + 1) * BLIST_META_RADIX;
158	}
159
160	bl = malloc(sizeof(struct blist), M_SWAP, M_WAITOK | M_ZERO);
161
162	bl->bl_blocks = blocks;
163	bl->bl_radix = radix;
164	bl->bl_skip = skip;
165	bl->bl_rootblks = 1 +
166	    blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks);
167	bl->bl_root = malloc(sizeof(blmeta_t) * bl->bl_rootblks, M_SWAP, M_WAITOK);
168
169#if defined(BLIST_DEBUG)
170	printf(
171		"BLIST representing %lld blocks (%lld MB of swap)"
172		", requiring %lldK of ram\n",
173		(long long)bl->bl_blocks,
174		(long long)bl->bl_blocks * 4 / 1024,
175		(long long)(bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024
176	);
177	printf("BLIST raw radix tree contains %lld records\n",
178	    (long long)bl->bl_rootblks);
179#endif
180	blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks);
181
182	return(bl);
183}
184
185void
186blist_destroy(blist_t bl)
187{
188	free(bl->bl_root, M_SWAP);
189	free(bl, M_SWAP);
190}
191
192/*
193 * blist_alloc() - reserve space in the block bitmap.  Return the base
194 *		     of a contiguous region or SWAPBLK_NONE if space could
195 *		     not be allocated.
196 */
197
198daddr_t
199blist_alloc(blist_t bl, daddr_t count)
200{
201	daddr_t blk = SWAPBLK_NONE;
202
203	if (bl) {
204		if (bl->bl_radix == BLIST_BMAP_RADIX)
205			blk = blst_leaf_alloc(bl->bl_root, 0, count);
206		else
207			blk = blst_meta_alloc(bl->bl_root, 0, count, bl->bl_radix, bl->bl_skip);
208		if (blk != SWAPBLK_NONE)
209			bl->bl_free -= count;
210	}
211	return(blk);
212}
213
214/*
215 * blist_free() -	free up space in the block bitmap.  Return the base
216 *		     	of a contiguous region.  Panic if an inconsistancy is
217 *			found.
218 */
219
220void
221blist_free(blist_t bl, daddr_t blkno, daddr_t count)
222{
223	if (bl) {
224		if (bl->bl_radix == BLIST_BMAP_RADIX)
225			blst_leaf_free(bl->bl_root, blkno, count);
226		else
227			blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0);
228		bl->bl_free += count;
229	}
230}
231
232/*
233 * blist_fill() -	mark a region in the block bitmap as off-limits
234 *			to the allocator (i.e. allocate it), ignoring any
235 *			existing allocations.  Return the number of blocks
236 *			actually filled that were free before the call.
237 */
238
239int
240blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
241{
242	int filled;
243
244	if (bl) {
245		if (bl->bl_radix == BLIST_BMAP_RADIX)
246			filled = blst_leaf_fill(bl->bl_root, blkno, count);
247		else
248			filled = blst_meta_fill(bl->bl_root, blkno, count,
249			    bl->bl_radix, bl->bl_skip, 0);
250		bl->bl_free -= filled;
251		return filled;
252	} else
253		return 0;
254}
255
256/*
257 * blist_resize() -	resize an existing radix tree to handle the
258 *			specified number of blocks.  This will reallocate
259 *			the tree and transfer the previous bitmap to the new
260 *			one.  When extending the tree you can specify whether
261 *			the new blocks are to left allocated or freed.
262 */
263
264void
265blist_resize(blist_t *pbl, daddr_t count, int freenew)
266{
267    blist_t newbl = blist_create(count);
268    blist_t save = *pbl;
269
270    *pbl = newbl;
271    if (count > save->bl_blocks)
272	    count = save->bl_blocks;
273    blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count);
274
275    /*
276     * If resizing upwards, should we free the new space or not?
277     */
278    if (freenew && count < newbl->bl_blocks) {
279	    blist_free(newbl, count, newbl->bl_blocks - count);
280    }
281    blist_destroy(save);
282}
283
284#ifdef BLIST_DEBUG
285
286/*
287 * blist_print()    - dump radix tree
288 */
289
290void
291blist_print(blist_t bl)
292{
293	printf("BLIST {\n");
294	blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4);
295	printf("}\n");
296}
297
298#endif
299
300/************************************************************************
301 *			  ALLOCATION SUPPORT FUNCTIONS			*
302 ************************************************************************
303 *
304 *	These support functions do all the actual work.  They may seem
305 *	rather longish, but that's because I've commented them up.  The
306 *	actual code is straight forward.
307 *
308 */
309
310/*
311 * blist_leaf_alloc() -	allocate at a leaf in the radix tree (a bitmap).
312 *
313 *	This is the core of the allocator and is optimized for the 1 block
314 *	and the BLIST_BMAP_RADIX block allocation cases.  Other cases are
315 *	somewhat slower.  The 1 block allocation case is log2 and extremely
316 *	quick.
317 */
318
319static daddr_t
320blst_leaf_alloc(
321	blmeta_t *scan,
322	daddr_t blk,
323	int count
324) {
325	u_daddr_t orig = scan->u.bmu_bitmap;
326
327	if (orig == 0) {
328		/*
329		 * Optimize bitmap all-allocated case.  Also, count = 1
330		 * case assumes at least 1 bit is free in the bitmap, so
331		 * we have to take care of this case here.
332		 */
333		scan->bm_bighint = 0;
334		return(SWAPBLK_NONE);
335	}
336	if (count == 1) {
337		/*
338		 * Optimized code to allocate one bit out of the bitmap
339		 */
340		u_daddr_t mask;
341		int j = BLIST_BMAP_RADIX/2;
342		int r = 0;
343
344		mask = (u_daddr_t)-1 >> (BLIST_BMAP_RADIX/2);
345
346		while (j) {
347			if ((orig & mask) == 0) {
348			    r += j;
349			    orig >>= j;
350			}
351			j >>= 1;
352			mask >>= j;
353		}
354		scan->u.bmu_bitmap &= ~(1 << r);
355		return(blk + r);
356	}
357	if (count <= BLIST_BMAP_RADIX) {
358		/*
359		 * non-optimized code to allocate N bits out of the bitmap.
360		 * The more bits, the faster the code runs.  It will run
361		 * the slowest allocating 2 bits, but since there aren't any
362		 * memory ops in the core loop (or shouldn't be, anyway),
363		 * you probably won't notice the difference.
364		 */
365		int j;
366		int n = BLIST_BMAP_RADIX - count;
367		u_daddr_t mask;
368
369		mask = (u_daddr_t)-1 >> n;
370
371		for (j = 0; j <= n; ++j) {
372			if ((orig & mask) == mask) {
373				scan->u.bmu_bitmap &= ~mask;
374				return(blk + j);
375			}
376			mask = (mask << 1);
377		}
378	}
379	/*
380	 * We couldn't allocate count in this subtree, update bighint.
381	 */
382	scan->bm_bighint = count - 1;
383	return(SWAPBLK_NONE);
384}
385
386/*
387 * blist_meta_alloc() -	allocate at a meta in the radix tree.
388 *
389 *	Attempt to allocate at a meta node.  If we can't, we update
390 *	bighint and return a failure.  Updating bighint optimize future
391 *	calls that hit this node.  We have to check for our collapse cases
392 *	and we have a few optimizations strewn in as well.
393 */
394
395static daddr_t
396blst_meta_alloc(
397	blmeta_t *scan,
398	daddr_t blk,
399	daddr_t count,
400	daddr_t radix,
401	int skip
402) {
403	int i;
404	int next_skip = ((u_int)skip / BLIST_META_RADIX);
405
406	if (scan->u.bmu_avail == 0)  {
407		/*
408		 * ALL-ALLOCATED special case
409		 */
410		scan->bm_bighint = count;
411		return(SWAPBLK_NONE);
412	}
413
414	if (scan->u.bmu_avail == radix) {
415		radix /= BLIST_META_RADIX;
416
417		/*
418		 * ALL-FREE special case, initialize uninitialize
419		 * sublevel.
420		 */
421		for (i = 1; i <= skip; i += next_skip) {
422			if (scan[i].bm_bighint == (daddr_t)-1)
423				break;
424			if (next_skip == 1) {
425				scan[i].u.bmu_bitmap = (u_daddr_t)-1;
426				scan[i].bm_bighint = BLIST_BMAP_RADIX;
427			} else {
428				scan[i].bm_bighint = radix;
429				scan[i].u.bmu_avail = radix;
430			}
431		}
432	} else {
433		radix /= BLIST_META_RADIX;
434	}
435
436	for (i = 1; i <= skip; i += next_skip) {
437		if (count <= scan[i].bm_bighint) {
438			/*
439			 * count fits in object
440			 */
441			daddr_t r;
442			if (next_skip == 1) {
443				r = blst_leaf_alloc(&scan[i], blk, count);
444			} else {
445				r = blst_meta_alloc(&scan[i], blk, count, radix, next_skip - 1);
446			}
447			if (r != SWAPBLK_NONE) {
448				scan->u.bmu_avail -= count;
449				if (scan->bm_bighint > scan->u.bmu_avail)
450					scan->bm_bighint = scan->u.bmu_avail;
451				return(r);
452			}
453		} else if (scan[i].bm_bighint == (daddr_t)-1) {
454			/*
455			 * Terminator
456			 */
457			break;
458		} else if (count > radix) {
459			/*
460			 * count does not fit in object even if it were
461			 * complete free.
462			 */
463			panic("blist_meta_alloc: allocation too large");
464		}
465		blk += radix;
466	}
467
468	/*
469	 * We couldn't allocate count in this subtree, update bighint.
470	 */
471	if (scan->bm_bighint >= count)
472		scan->bm_bighint = count - 1;
473	return(SWAPBLK_NONE);
474}
475
476/*
477 * BLST_LEAF_FREE() -	free allocated block from leaf bitmap
478 *
479 */
480
481static void
482blst_leaf_free(
483	blmeta_t *scan,
484	daddr_t blk,
485	int count
486) {
487	/*
488	 * free some data in this bitmap
489	 *
490	 * e.g.
491	 *	0000111111111110000
492	 *          \_________/\__/
493	 *		v        n
494	 */
495	int n = blk & (BLIST_BMAP_RADIX - 1);
496	u_daddr_t mask;
497
498	mask = ((u_daddr_t)-1 << n) &
499	    ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
500
501	if (scan->u.bmu_bitmap & mask)
502		panic("blst_radix_free: freeing free block");
503	scan->u.bmu_bitmap |= mask;
504
505	/*
506	 * We could probably do a better job here.  We are required to make
507	 * bighint at least as large as the biggest contiguous block of
508	 * data.  If we just shoehorn it, a little extra overhead will
509	 * be incured on the next allocation (but only that one typically).
510	 */
511	scan->bm_bighint = BLIST_BMAP_RADIX;
512}
513
514/*
515 * BLST_META_FREE() - free allocated blocks from radix tree meta info
516 *
517 *	This support routine frees a range of blocks from the bitmap.
518 *	The range must be entirely enclosed by this radix node.  If a
519 *	meta node, we break the range down recursively to free blocks
520 *	in subnodes (which means that this code can free an arbitrary
521 *	range whereas the allocation code cannot allocate an arbitrary
522 *	range).
523 */
524
525static void
526blst_meta_free(
527	blmeta_t *scan,
528	daddr_t freeBlk,
529	daddr_t count,
530	daddr_t radix,
531	int skip,
532	daddr_t blk
533) {
534	int i;
535	int next_skip = ((u_int)skip / BLIST_META_RADIX);
536
537#if 0
538	printf("FREE (%llx,%lld) FROM (%llx,%lld)\n",
539	    (long long)freeBlk, (long long)count,
540	    (long long)blk, (long long)radix
541	);
542#endif
543
544	if (scan->u.bmu_avail == 0) {
545		/*
546		 * ALL-ALLOCATED special case, with possible
547		 * shortcut to ALL-FREE special case.
548		 */
549		scan->u.bmu_avail = count;
550		scan->bm_bighint = count;
551
552		if (count != radix)  {
553			for (i = 1; i <= skip; i += next_skip) {
554				if (scan[i].bm_bighint == (daddr_t)-1)
555					break;
556				scan[i].bm_bighint = 0;
557				if (next_skip == 1) {
558					scan[i].u.bmu_bitmap = 0;
559				} else {
560					scan[i].u.bmu_avail = 0;
561				}
562			}
563			/* fall through */
564		}
565	} else {
566		scan->u.bmu_avail += count;
567		/* scan->bm_bighint = radix; */
568	}
569
570	/*
571	 * ALL-FREE special case.
572	 */
573
574	if (scan->u.bmu_avail == radix)
575		return;
576	if (scan->u.bmu_avail > radix)
577		panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld",
578		    (long long)count, (long long)scan->u.bmu_avail,
579		    (long long)radix);
580
581	/*
582	 * Break the free down into its components
583	 */
584
585	radix /= BLIST_META_RADIX;
586
587	i = (freeBlk - blk) / radix;
588	blk += i * radix;
589	i = i * next_skip + 1;
590
591	while (i <= skip && blk < freeBlk + count) {
592		daddr_t v;
593
594		v = blk + radix - freeBlk;
595		if (v > count)
596			v = count;
597
598		if (scan->bm_bighint == (daddr_t)-1)
599			panic("blst_meta_free: freeing unexpected range");
600
601		if (next_skip == 1) {
602			blst_leaf_free(&scan[i], freeBlk, v);
603		} else {
604			blst_meta_free(&scan[i], freeBlk, v, radix, next_skip - 1, blk);
605		}
606		if (scan->bm_bighint < scan[i].bm_bighint)
607		    scan->bm_bighint = scan[i].bm_bighint;
608		count -= v;
609		freeBlk += v;
610		blk += radix;
611		i += next_skip;
612	}
613}
614
615/*
616 * BLIST_RADIX_COPY() - copy one radix tree to another
617 *
618 *	Locates free space in the source tree and frees it in the destination
619 *	tree.  The space may not already be free in the destination.
620 */
621
622static void blst_copy(
623	blmeta_t *scan,
624	daddr_t blk,
625	daddr_t radix,
626	daddr_t skip,
627	blist_t dest,
628	daddr_t count
629) {
630	int next_skip;
631	int i;
632
633	/*
634	 * Leaf node
635	 */
636
637	if (radix == BLIST_BMAP_RADIX) {
638		u_daddr_t v = scan->u.bmu_bitmap;
639
640		if (v == (u_daddr_t)-1) {
641			blist_free(dest, blk, count);
642		} else if (v != 0) {
643			int i;
644
645			for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) {
646				if (v & (1 << i))
647					blist_free(dest, blk + i, 1);
648			}
649		}
650		return;
651	}
652
653	/*
654	 * Meta node
655	 */
656
657	if (scan->u.bmu_avail == 0) {
658		/*
659		 * Source all allocated, leave dest allocated
660		 */
661		return;
662	}
663	if (scan->u.bmu_avail == radix) {
664		/*
665		 * Source all free, free entire dest
666		 */
667		if (count < radix)
668			blist_free(dest, blk, count);
669		else
670			blist_free(dest, blk, radix);
671		return;
672	}
673
674
675	radix /= BLIST_META_RADIX;
676	next_skip = ((u_int)skip / BLIST_META_RADIX);
677
678	for (i = 1; count && i <= skip; i += next_skip) {
679		if (scan[i].bm_bighint == (daddr_t)-1)
680			break;
681
682		if (count >= radix) {
683			blst_copy(
684			    &scan[i],
685			    blk,
686			    radix,
687			    next_skip - 1,
688			    dest,
689			    radix
690			);
691			count -= radix;
692		} else {
693			if (count) {
694				blst_copy(
695				    &scan[i],
696				    blk,
697				    radix,
698				    next_skip - 1,
699				    dest,
700				    count
701				);
702			}
703			count = 0;
704		}
705		blk += radix;
706	}
707}
708
709/*
710 * BLST_LEAF_FILL() -	allocate specific blocks in leaf bitmap
711 *
712 *	This routine allocates all blocks in the specified range
713 *	regardless of any existing allocations in that range.  Returns
714 *	the number of blocks allocated by the call.
715 */
716
717static int
718blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
719{
720	int n = blk & (BLIST_BMAP_RADIX - 1);
721	int nblks;
722	u_daddr_t mask, bitmap;
723
724	mask = ((u_daddr_t)-1 << n) &
725	    ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
726
727	/* Count the number of blocks we're about to allocate */
728	bitmap = scan->u.bmu_bitmap & mask;
729	for (nblks = 0; bitmap != 0; nblks++)
730		bitmap &= bitmap - 1;
731
732	scan->u.bmu_bitmap &= ~mask;
733	return nblks;
734}
735
736/*
737 * BLIST_META_FILL() -	allocate specific blocks at a meta node
738 *
739 *	This routine allocates the specified range of blocks,
740 *	regardless of any existing allocations in the range.  The
741 *	range must be within the extent of this node.  Returns the
742 *	number of blocks allocated by the call.
743 */
744static int
745blst_meta_fill(
746	blmeta_t *scan,
747	daddr_t allocBlk,
748	daddr_t count,
749	daddr_t radix,
750	int skip,
751	daddr_t blk
752) {
753	int i;
754	int next_skip = ((u_int)skip / BLIST_META_RADIX);
755	int nblks = 0;
756
757	if (count == radix || scan->u.bmu_avail == 0)  {
758		/*
759		 * ALL-ALLOCATED special case
760		 */
761		nblks = scan->u.bmu_avail;
762		scan->u.bmu_avail = 0;
763		scan->bm_bighint = count;
764		return nblks;
765	}
766
767	if (scan->u.bmu_avail == radix) {
768		radix /= BLIST_META_RADIX;
769
770		/*
771		 * ALL-FREE special case, initialize sublevel
772		 */
773		for (i = 1; i <= skip; i += next_skip) {
774			if (scan[i].bm_bighint == (daddr_t)-1)
775				break;
776			if (next_skip == 1) {
777				scan[i].u.bmu_bitmap = (u_daddr_t)-1;
778				scan[i].bm_bighint = BLIST_BMAP_RADIX;
779			} else {
780				scan[i].bm_bighint = radix;
781				scan[i].u.bmu_avail = radix;
782			}
783		}
784	} else {
785		radix /= BLIST_META_RADIX;
786	}
787
788	if (count > radix)
789		panic("blist_meta_fill: allocation too large");
790
791	i = (allocBlk - blk) / radix;
792	blk += i * radix;
793	i = i * next_skip + 1;
794
795	while (i <= skip && blk < allocBlk + count) {
796		daddr_t v;
797
798		v = blk + radix - allocBlk;
799		if (v > count)
800			v = count;
801
802		if (scan->bm_bighint == (daddr_t)-1)
803			panic("blst_meta_fill: filling unexpected range");
804
805		if (next_skip == 1) {
806			nblks += blst_leaf_fill(&scan[i], allocBlk, v);
807		} else {
808			nblks += blst_meta_fill(&scan[i], allocBlk, v,
809			    radix, next_skip - 1, blk);
810		}
811		count -= v;
812		allocBlk += v;
813		blk += radix;
814		i += next_skip;
815	}
816	scan->u.bmu_avail -= nblks;
817	return nblks;
818}
819
820/*
821 * BLST_RADIX_INIT() - initialize radix tree
822 *
823 *	Initialize our meta structures and bitmaps and calculate the exact
824 *	amount of space required to manage 'count' blocks - this space may
825 *	be considerably less then the calculated radix due to the large
826 *	RADIX values we use.
827 */
828
829static daddr_t
830blst_radix_init(blmeta_t *scan, daddr_t radix, int skip, daddr_t count)
831{
832	int i;
833	int next_skip;
834	daddr_t memindex = 0;
835
836	/*
837	 * Leaf node
838	 */
839
840	if (radix == BLIST_BMAP_RADIX) {
841		if (scan) {
842			scan->bm_bighint = 0;
843			scan->u.bmu_bitmap = 0;
844		}
845		return(memindex);
846	}
847
848	/*
849	 * Meta node.  If allocating the entire object we can special
850	 * case it.  However, we need to figure out how much memory
851	 * is required to manage 'count' blocks, so we continue on anyway.
852	 */
853
854	if (scan) {
855		scan->bm_bighint = 0;
856		scan->u.bmu_avail = 0;
857	}
858
859	radix /= BLIST_META_RADIX;
860	next_skip = ((u_int)skip / BLIST_META_RADIX);
861
862	for (i = 1; i <= skip; i += next_skip) {
863		if (count >= radix) {
864			/*
865			 * Allocate the entire object
866			 */
867			memindex = i + blst_radix_init(
868			    ((scan) ? &scan[i] : NULL),
869			    radix,
870			    next_skip - 1,
871			    radix
872			);
873			count -= radix;
874		} else if (count > 0) {
875			/*
876			 * Allocate a partial object
877			 */
878			memindex = i + blst_radix_init(
879			    ((scan) ? &scan[i] : NULL),
880			    radix,
881			    next_skip - 1,
882			    count
883			);
884			count = 0;
885		} else {
886			/*
887			 * Add terminator and break out
888			 */
889			if (scan)
890				scan[i].bm_bighint = (daddr_t)-1;
891			break;
892		}
893	}
894	if (memindex < i)
895		memindex = i;
896	return(memindex);
897}
898
899#ifdef BLIST_DEBUG
900
901static void
902blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab)
903{
904	int i;
905	int next_skip;
906	int lastState = 0;
907
908	if (radix == BLIST_BMAP_RADIX) {
909		printf(
910		    "%*.*s(%08llx,%lld): bitmap %08llx big=%lld\n",
911		    tab, tab, "",
912		    (long long)blk, (long long)radix,
913		    (long long)scan->u.bmu_bitmap,
914		    (long long)scan->bm_bighint
915		);
916		return;
917	}
918
919	if (scan->u.bmu_avail == 0) {
920		printf(
921		    "%*.*s(%08llx,%lld) ALL ALLOCATED\n",
922		    tab, tab, "",
923		    (long long)blk,
924		    (long long)radix
925		);
926		return;
927	}
928	if (scan->u.bmu_avail == radix) {
929		printf(
930		    "%*.*s(%08llx,%lld) ALL FREE\n",
931		    tab, tab, "",
932		    (long long)blk,
933		    (long long)radix
934		);
935		return;
936	}
937
938	printf(
939	    "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n",
940	    tab, tab, "",
941	    (long long)blk, (long long)radix,
942	    (long long)scan->u.bmu_avail,
943	    (long long)radix,
944	    (long long)scan->bm_bighint
945	);
946
947	radix /= BLIST_META_RADIX;
948	next_skip = ((u_int)skip / BLIST_META_RADIX);
949	tab += 4;
950
951	for (i = 1; i <= skip; i += next_skip) {
952		if (scan[i].bm_bighint == (daddr_t)-1) {
953			printf(
954			    "%*.*s(%08llx,%lld): Terminator\n",
955			    tab, tab, "",
956			    (long long)blk, (long long)radix
957			);
958			lastState = 0;
959			break;
960		}
961		blst_radix_print(
962		    &scan[i],
963		    blk,
964		    radix,
965		    next_skip - 1,
966		    tab
967		);
968		blk += radix;
969	}
970	tab -= 4;
971
972	printf(
973	    "%*.*s}\n",
974	    tab, tab, ""
975	);
976}
977
978#endif
979
980#ifdef BLIST_DEBUG
981
982int
983main(int ac, char **av)
984{
985	int size = 1024;
986	int i;
987	blist_t bl;
988
989	for (i = 1; i < ac; ++i) {
990		const char *ptr = av[i];
991		if (*ptr != '-') {
992			size = strtol(ptr, NULL, 0);
993			continue;
994		}
995		ptr += 2;
996		fprintf(stderr, "Bad option: %s\n", ptr - 2);
997		exit(1);
998	}
999	bl = blist_create(size);
1000	blist_free(bl, 0, size);
1001
1002	for (;;) {
1003		char buf[1024];
1004		daddr_t da = 0;
1005		daddr_t count = 0;
1006
1007
1008		printf("%lld/%lld/%lld> ", (long long)bl->bl_free,
1009		    (long long)size, (long long)bl->bl_radix);
1010		fflush(stdout);
1011		if (fgets(buf, sizeof(buf), stdin) == NULL)
1012			break;
1013		switch(buf[0]) {
1014		case 'r':
1015			if (sscanf(buf + 1, "%lld", &count) == 1) {
1016				blist_resize(&bl, count, 1);
1017			} else {
1018				printf("?\n");
1019			}
1020		case 'p':
1021			blist_print(bl);
1022			break;
1023		case 'a':
1024			if (sscanf(buf + 1, "%lld", &count) == 1) {
1025				daddr_t blk = blist_alloc(bl, count);
1026				printf("    R=%08llx\n", (long long)blk);
1027			} else {
1028				printf("?\n");
1029			}
1030			break;
1031		case 'f':
1032			if (sscanf(buf + 1, "%llx %lld",
1033			    (long long *)&da, (long long *)&count) == 2) {
1034				blist_free(bl, da, count);
1035			} else {
1036				printf("?\n");
1037			}
1038			break;
1039		case 'l':
1040			if (sscanf(buf + 1, "%llx %lld",
1041			    (long long *)&da, (long long *)&count) == 2) {
1042				printf("    n=%d\n",
1043				    blist_fill(bl, da, count));
1044			} else {
1045				printf("?\n");
1046			}
1047			break;
1048		case '?':
1049		case 'h':
1050			puts(
1051			    "p          -print\n"
1052			    "a %d       -allocate\n"
1053			    "f %x %d    -free\n"
1054			    "l %x %d    -fill\n"
1055			    "r %d       -resize\n"
1056			    "h/?        -help"
1057			);
1058			break;
1059		default:
1060			printf("?\n");
1061			break;
1062		}
1063	}
1064	return(0);
1065}
1066
1067void
1068panic(const char *ctl, ...)
1069{
1070	va_list va;
1071
1072	va_start(va, ctl);
1073	vfprintf(stderr, ctl, va);
1074	fprintf(stderr, "\n");
1075	va_end(va);
1076	exit(1);
1077}
1078
1079#endif
1080
1081