Deleted Added
full compact
subr_blist.c (109623) subr_blist.c (111119)
1
2/*
3 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting
4 *
5 * (c)Copyright 1998, Matthew Dillon. Terms for use and redistribution
6 * are covered by the BSD Copyright as found in /usr/src/COPYRIGHT.
7 *
8 * This module implements a general bitmap allocator/deallocator. The
9 * allocator eats around 2 bits per 'block'. The module does not
10 * try to interpret the meaning of a 'block' other then to return
11 * SWAPBLK_NONE on an allocation failure.
12 *
13 * A radix tree is used to maintain the bitmap. Two radix constants are
14 * involved: One for the bitmaps contained in the leaf nodes (typically
15 * 32), and one for the meta nodes (typically 16). Both meta and leaf
16 * nodes have a hint field. This field gives us a hint as to the largest
17 * free contiguous range of blocks under the node. It may contain a
18 * value that is too high, but will never contain a value that is too
19 * low. When the radix tree is searched, allocation failures in subtrees
20 * update the hint.
21 *
22 * The radix tree also implements two collapsed states for meta nodes:
23 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is
24 * in either of these two states, all information contained underneath
25 * the node is considered stale. These states are used to optimize
26 * allocation and freeing operations.
27 *
28 * The hinting greatly increases code efficiency for allocations while
29 * the general radix structure optimizes both allocations and frees. The
30 * radix tree should be able to operate well no matter how much
31 * fragmentation there is and no matter how large a bitmap is used.
32 *
33 * Unlike the rlist code, the blist code wires all necessary memory at
34 * creation time. Neither allocations nor frees require interaction with
35 * the memory subsystem. In contrast, the rlist code may allocate memory
36 * on an rlist_free() call. The non-blocking features of the blist code
37 * are used to great advantage in the swap code (vm/nswap_pager.c). The
38 * rlist code uses a little less overall memory then the blist code (but
39 * due to swap interleaving not all that much less), but the blist code
40 * scales much, much better.
41 *
42 * LAYOUT: The radix tree is layed out recursively using a
43 * linear array. Each meta node is immediately followed (layed out
44 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This
45 * is a recursive structure but one that can be easily scanned through
46 * a very simple 'skip' calculation. In order to support large radixes,
47 * portions of the tree may reside outside our memory allocation. We
48 * handle this with an early-termination optimization (when bighint is
49 * set to -1) on the scan. The memory allocation is only large enough
50 * to cover the number of blocks requested at creation time even if it
51 * must be encompassed in larger root-node radix.
52 *
53 * NOTE: the allocator cannot currently allocate more then
54 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too
55 * large' if you try. This is an area that could use improvement. The
56 * radix is large enough that this restriction does not effect the swap
57 * system, though. Currently only the allocation code is effected by
58 * this algorithmic unfeature. The freeing code can handle arbitrary
59 * ranges.
60 *
61 * This code can be compiled stand-alone for debugging.
62 *
1
2/*
3 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting
4 *
5 * (c)Copyright 1998, Matthew Dillon. Terms for use and redistribution
6 * are covered by the BSD Copyright as found in /usr/src/COPYRIGHT.
7 *
8 * This module implements a general bitmap allocator/deallocator. The
9 * allocator eats around 2 bits per 'block'. The module does not
10 * try to interpret the meaning of a 'block' other then to return
11 * SWAPBLK_NONE on an allocation failure.
12 *
13 * A radix tree is used to maintain the bitmap. Two radix constants are
14 * involved: One for the bitmaps contained in the leaf nodes (typically
15 * 32), and one for the meta nodes (typically 16). Both meta and leaf
16 * nodes have a hint field. This field gives us a hint as to the largest
17 * free contiguous range of blocks under the node. It may contain a
18 * value that is too high, but will never contain a value that is too
19 * low. When the radix tree is searched, allocation failures in subtrees
20 * update the hint.
21 *
22 * The radix tree also implements two collapsed states for meta nodes:
23 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is
24 * in either of these two states, all information contained underneath
25 * the node is considered stale. These states are used to optimize
26 * allocation and freeing operations.
27 *
28 * The hinting greatly increases code efficiency for allocations while
29 * the general radix structure optimizes both allocations and frees. The
30 * radix tree should be able to operate well no matter how much
31 * fragmentation there is and no matter how large a bitmap is used.
32 *
33 * Unlike the rlist code, the blist code wires all necessary memory at
34 * creation time. Neither allocations nor frees require interaction with
35 * the memory subsystem. In contrast, the rlist code may allocate memory
36 * on an rlist_free() call. The non-blocking features of the blist code
37 * are used to great advantage in the swap code (vm/nswap_pager.c). The
38 * rlist code uses a little less overall memory then the blist code (but
39 * due to swap interleaving not all that much less), but the blist code
40 * scales much, much better.
41 *
42 * LAYOUT: The radix tree is layed out recursively using a
43 * linear array. Each meta node is immediately followed (layed out
44 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This
45 * is a recursive structure but one that can be easily scanned through
46 * a very simple 'skip' calculation. In order to support large radixes,
47 * portions of the tree may reside outside our memory allocation. We
48 * handle this with an early-termination optimization (when bighint is
49 * set to -1) on the scan. The memory allocation is only large enough
50 * to cover the number of blocks requested at creation time even if it
51 * must be encompassed in larger root-node radix.
52 *
53 * NOTE: the allocator cannot currently allocate more then
54 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too
55 * large' if you try. This is an area that could use improvement. The
56 * radix is large enough that this restriction does not effect the swap
57 * system, though. Currently only the allocation code is effected by
58 * this algorithmic unfeature. The freeing code can handle arbitrary
59 * ranges.
60 *
61 * This code can be compiled stand-alone for debugging.
62 *
63 * $FreeBSD: head/sys/kern/subr_blist.c 109623 2003-01-21 08:56:16Z alfred $
63 * $FreeBSD: head/sys/kern/subr_blist.c 111119 2003-02-19 05:47:46Z imp $
64 */
65
66#ifdef _KERNEL
67
68#include <sys/param.h>
69#include <sys/systm.h>
70#include <sys/lock.h>
71#include <sys/kernel.h>
72#include <sys/blist.h>
73#include <sys/malloc.h>
74#include <sys/proc.h>
75#include <sys/mutex.h>
76#include <vm/vm.h>
77#include <vm/vm_object.h>
78#include <vm/vm_kern.h>
79#include <vm/vm_extern.h>
80#include <vm/vm_page.h>
81
82#else
83
84#ifndef BLIST_NO_DEBUG
85#define BLIST_DEBUG
86#endif
87
88#define SWAPBLK_NONE ((daddr_t)-1)
89
90#include <sys/types.h>
91#include <stdio.h>
92#include <string.h>
93#include <stdlib.h>
94#include <stdarg.h>
95
96#define malloc(a,b,c) calloc(a, 1)
97#define free(a,b) free(a)
98
99typedef unsigned int u_daddr_t;
100
101#include <sys/blist.h>
102
103void panic(const char *ctl, ...);
104
105#endif
106
107/*
108 * static support functions
109 */
110
111static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count);
112static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk,
113 daddr_t count, daddr_t radix, int skip);
114static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
115static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
116 daddr_t radix, int skip, daddr_t blk);
117static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
118 daddr_t skip, blist_t dest, daddr_t count);
119static int blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
120static int blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
121 daddr_t radix, int skip, daddr_t blk);
122static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix,
123 int skip, daddr_t count);
124#ifndef _KERNEL
125static void blst_radix_print(blmeta_t *scan, daddr_t blk,
126 daddr_t radix, int skip, int tab);
127#endif
128
129#ifdef _KERNEL
130static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
131#endif
132
133/*
134 * blist_create() - create a blist capable of handling up to the specified
135 * number of blocks
136 *
137 * blocks must be greater then 0
138 *
139 * The smallest blist consists of a single leaf node capable of
140 * managing BLIST_BMAP_RADIX blocks.
141 */
142
143blist_t
144blist_create(daddr_t blocks)
145{
146 blist_t bl;
147 int radix;
148 int skip = 0;
149
150 /*
151 * Calculate radix and skip field used for scanning.
152 */
153 radix = BLIST_BMAP_RADIX;
154
155 while (radix < blocks) {
156 radix *= BLIST_META_RADIX;
157 skip = (skip + 1) * BLIST_META_RADIX;
158 }
159
64 */
65
66#ifdef _KERNEL
67
68#include <sys/param.h>
69#include <sys/systm.h>
70#include <sys/lock.h>
71#include <sys/kernel.h>
72#include <sys/blist.h>
73#include <sys/malloc.h>
74#include <sys/proc.h>
75#include <sys/mutex.h>
76#include <vm/vm.h>
77#include <vm/vm_object.h>
78#include <vm/vm_kern.h>
79#include <vm/vm_extern.h>
80#include <vm/vm_page.h>
81
82#else
83
84#ifndef BLIST_NO_DEBUG
85#define BLIST_DEBUG
86#endif
87
88#define SWAPBLK_NONE ((daddr_t)-1)
89
90#include <sys/types.h>
91#include <stdio.h>
92#include <string.h>
93#include <stdlib.h>
94#include <stdarg.h>
95
96#define malloc(a,b,c) calloc(a, 1)
97#define free(a,b) free(a)
98
99typedef unsigned int u_daddr_t;
100
101#include <sys/blist.h>
102
103void panic(const char *ctl, ...);
104
105#endif
106
107/*
108 * static support functions
109 */
110
111static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count);
112static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk,
113 daddr_t count, daddr_t radix, int skip);
114static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
115static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
116 daddr_t radix, int skip, daddr_t blk);
117static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
118 daddr_t skip, blist_t dest, daddr_t count);
119static int blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
120static int blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
121 daddr_t radix, int skip, daddr_t blk);
122static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix,
123 int skip, daddr_t count);
124#ifndef _KERNEL
125static void blst_radix_print(blmeta_t *scan, daddr_t blk,
126 daddr_t radix, int skip, int tab);
127#endif
128
129#ifdef _KERNEL
130static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
131#endif
132
133/*
134 * blist_create() - create a blist capable of handling up to the specified
135 * number of blocks
136 *
137 * blocks must be greater then 0
138 *
139 * The smallest blist consists of a single leaf node capable of
140 * managing BLIST_BMAP_RADIX blocks.
141 */
142
143blist_t
144blist_create(daddr_t blocks)
145{
146 blist_t bl;
147 int radix;
148 int skip = 0;
149
150 /*
151 * Calculate radix and skip field used for scanning.
152 */
153 radix = BLIST_BMAP_RADIX;
154
155 while (radix < blocks) {
156 radix *= BLIST_META_RADIX;
157 skip = (skip + 1) * BLIST_META_RADIX;
158 }
159
160 bl = malloc(sizeof(struct blist), M_SWAP, M_ZERO);
160 bl = malloc(sizeof(struct blist), M_SWAP, M_WAITOK | M_ZERO);
161
162 bl->bl_blocks = blocks;
163 bl->bl_radix = radix;
164 bl->bl_skip = skip;
165 bl->bl_rootblks = 1 +
166 blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks);
161
162 bl->bl_blocks = blocks;
163 bl->bl_radix = radix;
164 bl->bl_skip = skip;
165 bl->bl_rootblks = 1 +
166 blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks);
167 bl->bl_root = malloc(sizeof(blmeta_t) * bl->bl_rootblks, M_SWAP, 0);
167 bl->bl_root = malloc(sizeof(blmeta_t) * bl->bl_rootblks, M_SWAP, M_WAITOK);
168
169#if defined(BLIST_DEBUG)
170 printf(
171 "BLIST representing %lld blocks (%lld MB of swap)"
172 ", requiring %lldK of ram\n",
173 (long long)bl->bl_blocks,
174 (long long)bl->bl_blocks * 4 / 1024,
175 (long long)(bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024
176 );
177 printf("BLIST raw radix tree contains %lld records\n",
178 (long long)bl->bl_rootblks);
179#endif
180 blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks);
181
182 return(bl);
183}
184
185void
186blist_destroy(blist_t bl)
187{
188 free(bl->bl_root, M_SWAP);
189 free(bl, M_SWAP);
190}
191
192/*
193 * blist_alloc() - reserve space in the block bitmap. Return the base
194 * of a contiguous region or SWAPBLK_NONE if space could
195 * not be allocated.
196 */
197
198daddr_t
199blist_alloc(blist_t bl, daddr_t count)
200{
201 daddr_t blk = SWAPBLK_NONE;
202
203 if (bl) {
204 if (bl->bl_radix == BLIST_BMAP_RADIX)
205 blk = blst_leaf_alloc(bl->bl_root, 0, count);
206 else
207 blk = blst_meta_alloc(bl->bl_root, 0, count, bl->bl_radix, bl->bl_skip);
208 if (blk != SWAPBLK_NONE)
209 bl->bl_free -= count;
210 }
211 return(blk);
212}
213
214/*
215 * blist_free() - free up space in the block bitmap. Return the base
216 * of a contiguous region. Panic if an inconsistancy is
217 * found.
218 */
219
220void
221blist_free(blist_t bl, daddr_t blkno, daddr_t count)
222{
223 if (bl) {
224 if (bl->bl_radix == BLIST_BMAP_RADIX)
225 blst_leaf_free(bl->bl_root, blkno, count);
226 else
227 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0);
228 bl->bl_free += count;
229 }
230}
231
232/*
233 * blist_fill() - mark a region in the block bitmap as off-limits
234 * to the allocator (i.e. allocate it), ignoring any
235 * existing allocations. Return the number of blocks
236 * actually filled that were free before the call.
237 */
238
239int
240blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
241{
242 int filled;
243
244 if (bl) {
245 if (bl->bl_radix == BLIST_BMAP_RADIX)
246 filled = blst_leaf_fill(bl->bl_root, blkno, count);
247 else
248 filled = blst_meta_fill(bl->bl_root, blkno, count,
249 bl->bl_radix, bl->bl_skip, 0);
250 bl->bl_free -= filled;
251 return filled;
252 } else
253 return 0;
254}
255
256/*
257 * blist_resize() - resize an existing radix tree to handle the
258 * specified number of blocks. This will reallocate
259 * the tree and transfer the previous bitmap to the new
260 * one. When extending the tree you can specify whether
261 * the new blocks are to left allocated or freed.
262 */
263
264void
265blist_resize(blist_t *pbl, daddr_t count, int freenew)
266{
267 blist_t newbl = blist_create(count);
268 blist_t save = *pbl;
269
270 *pbl = newbl;
271 if (count > save->bl_blocks)
272 count = save->bl_blocks;
273 blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count);
274
275 /*
276 * If resizing upwards, should we free the new space or not?
277 */
278 if (freenew && count < newbl->bl_blocks) {
279 blist_free(newbl, count, newbl->bl_blocks - count);
280 }
281 blist_destroy(save);
282}
283
284#ifdef BLIST_DEBUG
285
286/*
287 * blist_print() - dump radix tree
288 */
289
290void
291blist_print(blist_t bl)
292{
293 printf("BLIST {\n");
294 blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4);
295 printf("}\n");
296}
297
298#endif
299
300/************************************************************************
301 * ALLOCATION SUPPORT FUNCTIONS *
302 ************************************************************************
303 *
304 * These support functions do all the actual work. They may seem
305 * rather longish, but that's because I've commented them up. The
306 * actual code is straight forward.
307 *
308 */
309
310/*
311 * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap).
312 *
313 * This is the core of the allocator and is optimized for the 1 block
314 * and the BLIST_BMAP_RADIX block allocation cases. Other cases are
315 * somewhat slower. The 1 block allocation case is log2 and extremely
316 * quick.
317 */
318
319static daddr_t
320blst_leaf_alloc(
321 blmeta_t *scan,
322 daddr_t blk,
323 int count
324) {
325 u_daddr_t orig = scan->u.bmu_bitmap;
326
327 if (orig == 0) {
328 /*
329 * Optimize bitmap all-allocated case. Also, count = 1
330 * case assumes at least 1 bit is free in the bitmap, so
331 * we have to take care of this case here.
332 */
333 scan->bm_bighint = 0;
334 return(SWAPBLK_NONE);
335 }
336 if (count == 1) {
337 /*
338 * Optimized code to allocate one bit out of the bitmap
339 */
340 u_daddr_t mask;
341 int j = BLIST_BMAP_RADIX/2;
342 int r = 0;
343
344 mask = (u_daddr_t)-1 >> (BLIST_BMAP_RADIX/2);
345
346 while (j) {
347 if ((orig & mask) == 0) {
348 r += j;
349 orig >>= j;
350 }
351 j >>= 1;
352 mask >>= j;
353 }
354 scan->u.bmu_bitmap &= ~(1 << r);
355 return(blk + r);
356 }
357 if (count <= BLIST_BMAP_RADIX) {
358 /*
359 * non-optimized code to allocate N bits out of the bitmap.
360 * The more bits, the faster the code runs. It will run
361 * the slowest allocating 2 bits, but since there aren't any
362 * memory ops in the core loop (or shouldn't be, anyway),
363 * you probably won't notice the difference.
364 */
365 int j;
366 int n = BLIST_BMAP_RADIX - count;
367 u_daddr_t mask;
368
369 mask = (u_daddr_t)-1 >> n;
370
371 for (j = 0; j <= n; ++j) {
372 if ((orig & mask) == mask) {
373 scan->u.bmu_bitmap &= ~mask;
374 return(blk + j);
375 }
376 mask = (mask << 1);
377 }
378 }
379 /*
380 * We couldn't allocate count in this subtree, update bighint.
381 */
382 scan->bm_bighint = count - 1;
383 return(SWAPBLK_NONE);
384}
385
386/*
387 * blist_meta_alloc() - allocate at a meta in the radix tree.
388 *
389 * Attempt to allocate at a meta node. If we can't, we update
390 * bighint and return a failure. Updating bighint optimize future
391 * calls that hit this node. We have to check for our collapse cases
392 * and we have a few optimizations strewn in as well.
393 */
394
395static daddr_t
396blst_meta_alloc(
397 blmeta_t *scan,
398 daddr_t blk,
399 daddr_t count,
400 daddr_t radix,
401 int skip
402) {
403 int i;
404 int next_skip = ((u_int)skip / BLIST_META_RADIX);
405
406 if (scan->u.bmu_avail == 0) {
407 /*
408 * ALL-ALLOCATED special case
409 */
410 scan->bm_bighint = count;
411 return(SWAPBLK_NONE);
412 }
413
414 if (scan->u.bmu_avail == radix) {
415 radix /= BLIST_META_RADIX;
416
417 /*
418 * ALL-FREE special case, initialize uninitialize
419 * sublevel.
420 */
421 for (i = 1; i <= skip; i += next_skip) {
422 if (scan[i].bm_bighint == (daddr_t)-1)
423 break;
424 if (next_skip == 1) {
425 scan[i].u.bmu_bitmap = (u_daddr_t)-1;
426 scan[i].bm_bighint = BLIST_BMAP_RADIX;
427 } else {
428 scan[i].bm_bighint = radix;
429 scan[i].u.bmu_avail = radix;
430 }
431 }
432 } else {
433 radix /= BLIST_META_RADIX;
434 }
435
436 for (i = 1; i <= skip; i += next_skip) {
437 if (count <= scan[i].bm_bighint) {
438 /*
439 * count fits in object
440 */
441 daddr_t r;
442 if (next_skip == 1) {
443 r = blst_leaf_alloc(&scan[i], blk, count);
444 } else {
445 r = blst_meta_alloc(&scan[i], blk, count, radix, next_skip - 1);
446 }
447 if (r != SWAPBLK_NONE) {
448 scan->u.bmu_avail -= count;
449 if (scan->bm_bighint > scan->u.bmu_avail)
450 scan->bm_bighint = scan->u.bmu_avail;
451 return(r);
452 }
453 } else if (scan[i].bm_bighint == (daddr_t)-1) {
454 /*
455 * Terminator
456 */
457 break;
458 } else if (count > radix) {
459 /*
460 * count does not fit in object even if it were
461 * complete free.
462 */
463 panic("blist_meta_alloc: allocation too large");
464 }
465 blk += radix;
466 }
467
468 /*
469 * We couldn't allocate count in this subtree, update bighint.
470 */
471 if (scan->bm_bighint >= count)
472 scan->bm_bighint = count - 1;
473 return(SWAPBLK_NONE);
474}
475
476/*
477 * BLST_LEAF_FREE() - free allocated block from leaf bitmap
478 *
479 */
480
481static void
482blst_leaf_free(
483 blmeta_t *scan,
484 daddr_t blk,
485 int count
486) {
487 /*
488 * free some data in this bitmap
489 *
490 * e.g.
491 * 0000111111111110000
492 * \_________/\__/
493 * v n
494 */
495 int n = blk & (BLIST_BMAP_RADIX - 1);
496 u_daddr_t mask;
497
498 mask = ((u_daddr_t)-1 << n) &
499 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
500
501 if (scan->u.bmu_bitmap & mask)
502 panic("blst_radix_free: freeing free block");
503 scan->u.bmu_bitmap |= mask;
504
505 /*
506 * We could probably do a better job here. We are required to make
507 * bighint at least as large as the biggest contiguous block of
508 * data. If we just shoehorn it, a little extra overhead will
509 * be incured on the next allocation (but only that one typically).
510 */
511 scan->bm_bighint = BLIST_BMAP_RADIX;
512}
513
514/*
515 * BLST_META_FREE() - free allocated blocks from radix tree meta info
516 *
517 * This support routine frees a range of blocks from the bitmap.
518 * The range must be entirely enclosed by this radix node. If a
519 * meta node, we break the range down recursively to free blocks
520 * in subnodes (which means that this code can free an arbitrary
521 * range whereas the allocation code cannot allocate an arbitrary
522 * range).
523 */
524
525static void
526blst_meta_free(
527 blmeta_t *scan,
528 daddr_t freeBlk,
529 daddr_t count,
530 daddr_t radix,
531 int skip,
532 daddr_t blk
533) {
534 int i;
535 int next_skip = ((u_int)skip / BLIST_META_RADIX);
536
537#if 0
538 printf("FREE (%llx,%lld) FROM (%llx,%lld)\n",
539 (long long)freeBlk, (long long)count,
540 (long long)blk, (long long)radix
541 );
542#endif
543
544 if (scan->u.bmu_avail == 0) {
545 /*
546 * ALL-ALLOCATED special case, with possible
547 * shortcut to ALL-FREE special case.
548 */
549 scan->u.bmu_avail = count;
550 scan->bm_bighint = count;
551
552 if (count != radix) {
553 for (i = 1; i <= skip; i += next_skip) {
554 if (scan[i].bm_bighint == (daddr_t)-1)
555 break;
556 scan[i].bm_bighint = 0;
557 if (next_skip == 1) {
558 scan[i].u.bmu_bitmap = 0;
559 } else {
560 scan[i].u.bmu_avail = 0;
561 }
562 }
563 /* fall through */
564 }
565 } else {
566 scan->u.bmu_avail += count;
567 /* scan->bm_bighint = radix; */
568 }
569
570 /*
571 * ALL-FREE special case.
572 */
573
574 if (scan->u.bmu_avail == radix)
575 return;
576 if (scan->u.bmu_avail > radix)
577 panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld",
578 (long long)count, (long long)scan->u.bmu_avail,
579 (long long)radix);
580
581 /*
582 * Break the free down into its components
583 */
584
585 radix /= BLIST_META_RADIX;
586
587 i = (freeBlk - blk) / radix;
588 blk += i * radix;
589 i = i * next_skip + 1;
590
591 while (i <= skip && blk < freeBlk + count) {
592 daddr_t v;
593
594 v = blk + radix - freeBlk;
595 if (v > count)
596 v = count;
597
598 if (scan->bm_bighint == (daddr_t)-1)
599 panic("blst_meta_free: freeing unexpected range");
600
601 if (next_skip == 1) {
602 blst_leaf_free(&scan[i], freeBlk, v);
603 } else {
604 blst_meta_free(&scan[i], freeBlk, v, radix, next_skip - 1, blk);
605 }
606 if (scan->bm_bighint < scan[i].bm_bighint)
607 scan->bm_bighint = scan[i].bm_bighint;
608 count -= v;
609 freeBlk += v;
610 blk += radix;
611 i += next_skip;
612 }
613}
614
615/*
616 * BLIST_RADIX_COPY() - copy one radix tree to another
617 *
618 * Locates free space in the source tree and frees it in the destination
619 * tree. The space may not already be free in the destination.
620 */
621
622static void blst_copy(
623 blmeta_t *scan,
624 daddr_t blk,
625 daddr_t radix,
626 daddr_t skip,
627 blist_t dest,
628 daddr_t count
629) {
630 int next_skip;
631 int i;
632
633 /*
634 * Leaf node
635 */
636
637 if (radix == BLIST_BMAP_RADIX) {
638 u_daddr_t v = scan->u.bmu_bitmap;
639
640 if (v == (u_daddr_t)-1) {
641 blist_free(dest, blk, count);
642 } else if (v != 0) {
643 int i;
644
645 for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) {
646 if (v & (1 << i))
647 blist_free(dest, blk + i, 1);
648 }
649 }
650 return;
651 }
652
653 /*
654 * Meta node
655 */
656
657 if (scan->u.bmu_avail == 0) {
658 /*
659 * Source all allocated, leave dest allocated
660 */
661 return;
662 }
663 if (scan->u.bmu_avail == radix) {
664 /*
665 * Source all free, free entire dest
666 */
667 if (count < radix)
668 blist_free(dest, blk, count);
669 else
670 blist_free(dest, blk, radix);
671 return;
672 }
673
674
675 radix /= BLIST_META_RADIX;
676 next_skip = ((u_int)skip / BLIST_META_RADIX);
677
678 for (i = 1; count && i <= skip; i += next_skip) {
679 if (scan[i].bm_bighint == (daddr_t)-1)
680 break;
681
682 if (count >= radix) {
683 blst_copy(
684 &scan[i],
685 blk,
686 radix,
687 next_skip - 1,
688 dest,
689 radix
690 );
691 count -= radix;
692 } else {
693 if (count) {
694 blst_copy(
695 &scan[i],
696 blk,
697 radix,
698 next_skip - 1,
699 dest,
700 count
701 );
702 }
703 count = 0;
704 }
705 blk += radix;
706 }
707}
708
709/*
710 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap
711 *
712 * This routine allocates all blocks in the specified range
713 * regardless of any existing allocations in that range. Returns
714 * the number of blocks allocated by the call.
715 */
716
717static int
718blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
719{
720 int n = blk & (BLIST_BMAP_RADIX - 1);
721 int nblks;
722 u_daddr_t mask, bitmap;
723
724 mask = ((u_daddr_t)-1 << n) &
725 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
726
727 /* Count the number of blocks we're about to allocate */
728 bitmap = scan->u.bmu_bitmap & mask;
729 for (nblks = 0; bitmap != 0; nblks++)
730 bitmap &= bitmap - 1;
731
732 scan->u.bmu_bitmap &= ~mask;
733 return nblks;
734}
735
736/*
737 * BLIST_META_FILL() - allocate specific blocks at a meta node
738 *
739 * This routine allocates the specified range of blocks,
740 * regardless of any existing allocations in the range. The
741 * range must be within the extent of this node. Returns the
742 * number of blocks allocated by the call.
743 */
744static int
745blst_meta_fill(
746 blmeta_t *scan,
747 daddr_t allocBlk,
748 daddr_t count,
749 daddr_t radix,
750 int skip,
751 daddr_t blk
752) {
753 int i;
754 int next_skip = ((u_int)skip / BLIST_META_RADIX);
755 int nblks = 0;
756
757 if (count == radix || scan->u.bmu_avail == 0) {
758 /*
759 * ALL-ALLOCATED special case
760 */
761 nblks = scan->u.bmu_avail;
762 scan->u.bmu_avail = 0;
763 scan->bm_bighint = count;
764 return nblks;
765 }
766
767 if (scan->u.bmu_avail == radix) {
768 radix /= BLIST_META_RADIX;
769
770 /*
771 * ALL-FREE special case, initialize sublevel
772 */
773 for (i = 1; i <= skip; i += next_skip) {
774 if (scan[i].bm_bighint == (daddr_t)-1)
775 break;
776 if (next_skip == 1) {
777 scan[i].u.bmu_bitmap = (u_daddr_t)-1;
778 scan[i].bm_bighint = BLIST_BMAP_RADIX;
779 } else {
780 scan[i].bm_bighint = radix;
781 scan[i].u.bmu_avail = radix;
782 }
783 }
784 } else {
785 radix /= BLIST_META_RADIX;
786 }
787
788 if (count > radix)
789 panic("blist_meta_fill: allocation too large");
790
791 i = (allocBlk - blk) / radix;
792 blk += i * radix;
793 i = i * next_skip + 1;
794
795 while (i <= skip && blk < allocBlk + count) {
796 daddr_t v;
797
798 v = blk + radix - allocBlk;
799 if (v > count)
800 v = count;
801
802 if (scan->bm_bighint == (daddr_t)-1)
803 panic("blst_meta_fill: filling unexpected range");
804
805 if (next_skip == 1) {
806 nblks += blst_leaf_fill(&scan[i], allocBlk, v);
807 } else {
808 nblks += blst_meta_fill(&scan[i], allocBlk, v,
809 radix, next_skip - 1, blk);
810 }
811 count -= v;
812 allocBlk += v;
813 blk += radix;
814 i += next_skip;
815 }
816 scan->u.bmu_avail -= nblks;
817 return nblks;
818}
819
820/*
821 * BLST_RADIX_INIT() - initialize radix tree
822 *
823 * Initialize our meta structures and bitmaps and calculate the exact
824 * amount of space required to manage 'count' blocks - this space may
825 * be considerably less then the calculated radix due to the large
826 * RADIX values we use.
827 */
828
829static daddr_t
830blst_radix_init(blmeta_t *scan, daddr_t radix, int skip, daddr_t count)
831{
832 int i;
833 int next_skip;
834 daddr_t memindex = 0;
835
836 /*
837 * Leaf node
838 */
839
840 if (radix == BLIST_BMAP_RADIX) {
841 if (scan) {
842 scan->bm_bighint = 0;
843 scan->u.bmu_bitmap = 0;
844 }
845 return(memindex);
846 }
847
848 /*
849 * Meta node. If allocating the entire object we can special
850 * case it. However, we need to figure out how much memory
851 * is required to manage 'count' blocks, so we continue on anyway.
852 */
853
854 if (scan) {
855 scan->bm_bighint = 0;
856 scan->u.bmu_avail = 0;
857 }
858
859 radix /= BLIST_META_RADIX;
860 next_skip = ((u_int)skip / BLIST_META_RADIX);
861
862 for (i = 1; i <= skip; i += next_skip) {
863 if (count >= radix) {
864 /*
865 * Allocate the entire object
866 */
867 memindex = i + blst_radix_init(
868 ((scan) ? &scan[i] : NULL),
869 radix,
870 next_skip - 1,
871 radix
872 );
873 count -= radix;
874 } else if (count > 0) {
875 /*
876 * Allocate a partial object
877 */
878 memindex = i + blst_radix_init(
879 ((scan) ? &scan[i] : NULL),
880 radix,
881 next_skip - 1,
882 count
883 );
884 count = 0;
885 } else {
886 /*
887 * Add terminator and break out
888 */
889 if (scan)
890 scan[i].bm_bighint = (daddr_t)-1;
891 break;
892 }
893 }
894 if (memindex < i)
895 memindex = i;
896 return(memindex);
897}
898
899#ifdef BLIST_DEBUG
900
901static void
902blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab)
903{
904 int i;
905 int next_skip;
906 int lastState = 0;
907
908 if (radix == BLIST_BMAP_RADIX) {
909 printf(
910 "%*.*s(%08llx,%lld): bitmap %08llx big=%lld\n",
911 tab, tab, "",
912 (long long)blk, (long long)radix,
913 (long long)scan->u.bmu_bitmap,
914 (long long)scan->bm_bighint
915 );
916 return;
917 }
918
919 if (scan->u.bmu_avail == 0) {
920 printf(
921 "%*.*s(%08llx,%lld) ALL ALLOCATED\n",
922 tab, tab, "",
923 (long long)blk,
924 (long long)radix
925 );
926 return;
927 }
928 if (scan->u.bmu_avail == radix) {
929 printf(
930 "%*.*s(%08llx,%lld) ALL FREE\n",
931 tab, tab, "",
932 (long long)blk,
933 (long long)radix
934 );
935 return;
936 }
937
938 printf(
939 "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n",
940 tab, tab, "",
941 (long long)blk, (long long)radix,
942 (long long)scan->u.bmu_avail,
943 (long long)radix,
944 (long long)scan->bm_bighint
945 );
946
947 radix /= BLIST_META_RADIX;
948 next_skip = ((u_int)skip / BLIST_META_RADIX);
949 tab += 4;
950
951 for (i = 1; i <= skip; i += next_skip) {
952 if (scan[i].bm_bighint == (daddr_t)-1) {
953 printf(
954 "%*.*s(%08llx,%lld): Terminator\n",
955 tab, tab, "",
956 (long long)blk, (long long)radix
957 );
958 lastState = 0;
959 break;
960 }
961 blst_radix_print(
962 &scan[i],
963 blk,
964 radix,
965 next_skip - 1,
966 tab
967 );
968 blk += radix;
969 }
970 tab -= 4;
971
972 printf(
973 "%*.*s}\n",
974 tab, tab, ""
975 );
976}
977
978#endif
979
980#ifdef BLIST_DEBUG
981
982int
983main(int ac, char **av)
984{
985 int size = 1024;
986 int i;
987 blist_t bl;
988
989 for (i = 1; i < ac; ++i) {
990 const char *ptr = av[i];
991 if (*ptr != '-') {
992 size = strtol(ptr, NULL, 0);
993 continue;
994 }
995 ptr += 2;
996 fprintf(stderr, "Bad option: %s\n", ptr - 2);
997 exit(1);
998 }
999 bl = blist_create(size);
1000 blist_free(bl, 0, size);
1001
1002 for (;;) {
1003 char buf[1024];
1004 daddr_t da = 0;
1005 daddr_t count = 0;
1006
1007
1008 printf("%lld/%lld/%lld> ", (long long)bl->bl_free,
1009 (long long)size, (long long)bl->bl_radix);
1010 fflush(stdout);
1011 if (fgets(buf, sizeof(buf), stdin) == NULL)
1012 break;
1013 switch(buf[0]) {
1014 case 'r':
1015 if (sscanf(buf + 1, "%lld", &count) == 1) {
1016 blist_resize(&bl, count, 1);
1017 } else {
1018 printf("?\n");
1019 }
1020 case 'p':
1021 blist_print(bl);
1022 break;
1023 case 'a':
1024 if (sscanf(buf + 1, "%lld", &count) == 1) {
1025 daddr_t blk = blist_alloc(bl, count);
1026 printf(" R=%08llx\n", (long long)blk);
1027 } else {
1028 printf("?\n");
1029 }
1030 break;
1031 case 'f':
1032 if (sscanf(buf + 1, "%llx %lld",
1033 (long long *)&da, (long long *)&count) == 2) {
1034 blist_free(bl, da, count);
1035 } else {
1036 printf("?\n");
1037 }
1038 break;
1039 case 'l':
1040 if (sscanf(buf + 1, "%llx %lld",
1041 (long long *)&da, (long long *)&count) == 2) {
1042 printf(" n=%d\n",
1043 blist_fill(bl, da, count));
1044 } else {
1045 printf("?\n");
1046 }
1047 break;
1048 case '?':
1049 case 'h':
1050 puts(
1051 "p -print\n"
1052 "a %d -allocate\n"
1053 "f %x %d -free\n"
1054 "l %x %d -fill\n"
1055 "r %d -resize\n"
1056 "h/? -help"
1057 );
1058 break;
1059 default:
1060 printf("?\n");
1061 break;
1062 }
1063 }
1064 return(0);
1065}
1066
1067void
1068panic(const char *ctl, ...)
1069{
1070 va_list va;
1071
1072 va_start(va, ctl);
1073 vfprintf(stderr, ctl, va);
1074 fprintf(stderr, "\n");
1075 va_end(va);
1076 exit(1);
1077}
1078
1079#endif
1080
168
169#if defined(BLIST_DEBUG)
170 printf(
171 "BLIST representing %lld blocks (%lld MB of swap)"
172 ", requiring %lldK of ram\n",
173 (long long)bl->bl_blocks,
174 (long long)bl->bl_blocks * 4 / 1024,
175 (long long)(bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024
176 );
177 printf("BLIST raw radix tree contains %lld records\n",
178 (long long)bl->bl_rootblks);
179#endif
180 blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks);
181
182 return(bl);
183}
184
185void
186blist_destroy(blist_t bl)
187{
188 free(bl->bl_root, M_SWAP);
189 free(bl, M_SWAP);
190}
191
192/*
193 * blist_alloc() - reserve space in the block bitmap. Return the base
194 * of a contiguous region or SWAPBLK_NONE if space could
195 * not be allocated.
196 */
197
198daddr_t
199blist_alloc(blist_t bl, daddr_t count)
200{
201 daddr_t blk = SWAPBLK_NONE;
202
203 if (bl) {
204 if (bl->bl_radix == BLIST_BMAP_RADIX)
205 blk = blst_leaf_alloc(bl->bl_root, 0, count);
206 else
207 blk = blst_meta_alloc(bl->bl_root, 0, count, bl->bl_radix, bl->bl_skip);
208 if (blk != SWAPBLK_NONE)
209 bl->bl_free -= count;
210 }
211 return(blk);
212}
213
214/*
215 * blist_free() - free up space in the block bitmap. Return the base
216 * of a contiguous region. Panic if an inconsistancy is
217 * found.
218 */
219
220void
221blist_free(blist_t bl, daddr_t blkno, daddr_t count)
222{
223 if (bl) {
224 if (bl->bl_radix == BLIST_BMAP_RADIX)
225 blst_leaf_free(bl->bl_root, blkno, count);
226 else
227 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0);
228 bl->bl_free += count;
229 }
230}
231
232/*
233 * blist_fill() - mark a region in the block bitmap as off-limits
234 * to the allocator (i.e. allocate it), ignoring any
235 * existing allocations. Return the number of blocks
236 * actually filled that were free before the call.
237 */
238
239int
240blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
241{
242 int filled;
243
244 if (bl) {
245 if (bl->bl_radix == BLIST_BMAP_RADIX)
246 filled = blst_leaf_fill(bl->bl_root, blkno, count);
247 else
248 filled = blst_meta_fill(bl->bl_root, blkno, count,
249 bl->bl_radix, bl->bl_skip, 0);
250 bl->bl_free -= filled;
251 return filled;
252 } else
253 return 0;
254}
255
256/*
257 * blist_resize() - resize an existing radix tree to handle the
258 * specified number of blocks. This will reallocate
259 * the tree and transfer the previous bitmap to the new
260 * one. When extending the tree you can specify whether
261 * the new blocks are to left allocated or freed.
262 */
263
264void
265blist_resize(blist_t *pbl, daddr_t count, int freenew)
266{
267 blist_t newbl = blist_create(count);
268 blist_t save = *pbl;
269
270 *pbl = newbl;
271 if (count > save->bl_blocks)
272 count = save->bl_blocks;
273 blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count);
274
275 /*
276 * If resizing upwards, should we free the new space or not?
277 */
278 if (freenew && count < newbl->bl_blocks) {
279 blist_free(newbl, count, newbl->bl_blocks - count);
280 }
281 blist_destroy(save);
282}
283
284#ifdef BLIST_DEBUG
285
286/*
287 * blist_print() - dump radix tree
288 */
289
290void
291blist_print(blist_t bl)
292{
293 printf("BLIST {\n");
294 blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4);
295 printf("}\n");
296}
297
298#endif
299
300/************************************************************************
301 * ALLOCATION SUPPORT FUNCTIONS *
302 ************************************************************************
303 *
304 * These support functions do all the actual work. They may seem
305 * rather longish, but that's because I've commented them up. The
306 * actual code is straight forward.
307 *
308 */
309
310/*
311 * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap).
312 *
313 * This is the core of the allocator and is optimized for the 1 block
314 * and the BLIST_BMAP_RADIX block allocation cases. Other cases are
315 * somewhat slower. The 1 block allocation case is log2 and extremely
316 * quick.
317 */
318
319static daddr_t
320blst_leaf_alloc(
321 blmeta_t *scan,
322 daddr_t blk,
323 int count
324) {
325 u_daddr_t orig = scan->u.bmu_bitmap;
326
327 if (orig == 0) {
328 /*
329 * Optimize bitmap all-allocated case. Also, count = 1
330 * case assumes at least 1 bit is free in the bitmap, so
331 * we have to take care of this case here.
332 */
333 scan->bm_bighint = 0;
334 return(SWAPBLK_NONE);
335 }
336 if (count == 1) {
337 /*
338 * Optimized code to allocate one bit out of the bitmap
339 */
340 u_daddr_t mask;
341 int j = BLIST_BMAP_RADIX/2;
342 int r = 0;
343
344 mask = (u_daddr_t)-1 >> (BLIST_BMAP_RADIX/2);
345
346 while (j) {
347 if ((orig & mask) == 0) {
348 r += j;
349 orig >>= j;
350 }
351 j >>= 1;
352 mask >>= j;
353 }
354 scan->u.bmu_bitmap &= ~(1 << r);
355 return(blk + r);
356 }
357 if (count <= BLIST_BMAP_RADIX) {
358 /*
359 * non-optimized code to allocate N bits out of the bitmap.
360 * The more bits, the faster the code runs. It will run
361 * the slowest allocating 2 bits, but since there aren't any
362 * memory ops in the core loop (or shouldn't be, anyway),
363 * you probably won't notice the difference.
364 */
365 int j;
366 int n = BLIST_BMAP_RADIX - count;
367 u_daddr_t mask;
368
369 mask = (u_daddr_t)-1 >> n;
370
371 for (j = 0; j <= n; ++j) {
372 if ((orig & mask) == mask) {
373 scan->u.bmu_bitmap &= ~mask;
374 return(blk + j);
375 }
376 mask = (mask << 1);
377 }
378 }
379 /*
380 * We couldn't allocate count in this subtree, update bighint.
381 */
382 scan->bm_bighint = count - 1;
383 return(SWAPBLK_NONE);
384}
385
386/*
387 * blist_meta_alloc() - allocate at a meta in the radix tree.
388 *
389 * Attempt to allocate at a meta node. If we can't, we update
390 * bighint and return a failure. Updating bighint optimize future
391 * calls that hit this node. We have to check for our collapse cases
392 * and we have a few optimizations strewn in as well.
393 */
394
395static daddr_t
396blst_meta_alloc(
397 blmeta_t *scan,
398 daddr_t blk,
399 daddr_t count,
400 daddr_t radix,
401 int skip
402) {
403 int i;
404 int next_skip = ((u_int)skip / BLIST_META_RADIX);
405
406 if (scan->u.bmu_avail == 0) {
407 /*
408 * ALL-ALLOCATED special case
409 */
410 scan->bm_bighint = count;
411 return(SWAPBLK_NONE);
412 }
413
414 if (scan->u.bmu_avail == radix) {
415 radix /= BLIST_META_RADIX;
416
417 /*
418 * ALL-FREE special case, initialize uninitialize
419 * sublevel.
420 */
421 for (i = 1; i <= skip; i += next_skip) {
422 if (scan[i].bm_bighint == (daddr_t)-1)
423 break;
424 if (next_skip == 1) {
425 scan[i].u.bmu_bitmap = (u_daddr_t)-1;
426 scan[i].bm_bighint = BLIST_BMAP_RADIX;
427 } else {
428 scan[i].bm_bighint = radix;
429 scan[i].u.bmu_avail = radix;
430 }
431 }
432 } else {
433 radix /= BLIST_META_RADIX;
434 }
435
436 for (i = 1; i <= skip; i += next_skip) {
437 if (count <= scan[i].bm_bighint) {
438 /*
439 * count fits in object
440 */
441 daddr_t r;
442 if (next_skip == 1) {
443 r = blst_leaf_alloc(&scan[i], blk, count);
444 } else {
445 r = blst_meta_alloc(&scan[i], blk, count, radix, next_skip - 1);
446 }
447 if (r != SWAPBLK_NONE) {
448 scan->u.bmu_avail -= count;
449 if (scan->bm_bighint > scan->u.bmu_avail)
450 scan->bm_bighint = scan->u.bmu_avail;
451 return(r);
452 }
453 } else if (scan[i].bm_bighint == (daddr_t)-1) {
454 /*
455 * Terminator
456 */
457 break;
458 } else if (count > radix) {
459 /*
460 * count does not fit in object even if it were
461 * complete free.
462 */
463 panic("blist_meta_alloc: allocation too large");
464 }
465 blk += radix;
466 }
467
468 /*
469 * We couldn't allocate count in this subtree, update bighint.
470 */
471 if (scan->bm_bighint >= count)
472 scan->bm_bighint = count - 1;
473 return(SWAPBLK_NONE);
474}
475
476/*
477 * BLST_LEAF_FREE() - free allocated block from leaf bitmap
478 *
479 */
480
481static void
482blst_leaf_free(
483 blmeta_t *scan,
484 daddr_t blk,
485 int count
486) {
487 /*
488 * free some data in this bitmap
489 *
490 * e.g.
491 * 0000111111111110000
492 * \_________/\__/
493 * v n
494 */
495 int n = blk & (BLIST_BMAP_RADIX - 1);
496 u_daddr_t mask;
497
498 mask = ((u_daddr_t)-1 << n) &
499 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
500
501 if (scan->u.bmu_bitmap & mask)
502 panic("blst_radix_free: freeing free block");
503 scan->u.bmu_bitmap |= mask;
504
505 /*
506 * We could probably do a better job here. We are required to make
507 * bighint at least as large as the biggest contiguous block of
508 * data. If we just shoehorn it, a little extra overhead will
509 * be incured on the next allocation (but only that one typically).
510 */
511 scan->bm_bighint = BLIST_BMAP_RADIX;
512}
513
514/*
515 * BLST_META_FREE() - free allocated blocks from radix tree meta info
516 *
517 * This support routine frees a range of blocks from the bitmap.
518 * The range must be entirely enclosed by this radix node. If a
519 * meta node, we break the range down recursively to free blocks
520 * in subnodes (which means that this code can free an arbitrary
521 * range whereas the allocation code cannot allocate an arbitrary
522 * range).
523 */
524
525static void
526blst_meta_free(
527 blmeta_t *scan,
528 daddr_t freeBlk,
529 daddr_t count,
530 daddr_t radix,
531 int skip,
532 daddr_t blk
533) {
534 int i;
535 int next_skip = ((u_int)skip / BLIST_META_RADIX);
536
537#if 0
538 printf("FREE (%llx,%lld) FROM (%llx,%lld)\n",
539 (long long)freeBlk, (long long)count,
540 (long long)blk, (long long)radix
541 );
542#endif
543
544 if (scan->u.bmu_avail == 0) {
545 /*
546 * ALL-ALLOCATED special case, with possible
547 * shortcut to ALL-FREE special case.
548 */
549 scan->u.bmu_avail = count;
550 scan->bm_bighint = count;
551
552 if (count != radix) {
553 for (i = 1; i <= skip; i += next_skip) {
554 if (scan[i].bm_bighint == (daddr_t)-1)
555 break;
556 scan[i].bm_bighint = 0;
557 if (next_skip == 1) {
558 scan[i].u.bmu_bitmap = 0;
559 } else {
560 scan[i].u.bmu_avail = 0;
561 }
562 }
563 /* fall through */
564 }
565 } else {
566 scan->u.bmu_avail += count;
567 /* scan->bm_bighint = radix; */
568 }
569
570 /*
571 * ALL-FREE special case.
572 */
573
574 if (scan->u.bmu_avail == radix)
575 return;
576 if (scan->u.bmu_avail > radix)
577 panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld",
578 (long long)count, (long long)scan->u.bmu_avail,
579 (long long)radix);
580
581 /*
582 * Break the free down into its components
583 */
584
585 radix /= BLIST_META_RADIX;
586
587 i = (freeBlk - blk) / radix;
588 blk += i * radix;
589 i = i * next_skip + 1;
590
591 while (i <= skip && blk < freeBlk + count) {
592 daddr_t v;
593
594 v = blk + radix - freeBlk;
595 if (v > count)
596 v = count;
597
598 if (scan->bm_bighint == (daddr_t)-1)
599 panic("blst_meta_free: freeing unexpected range");
600
601 if (next_skip == 1) {
602 blst_leaf_free(&scan[i], freeBlk, v);
603 } else {
604 blst_meta_free(&scan[i], freeBlk, v, radix, next_skip - 1, blk);
605 }
606 if (scan->bm_bighint < scan[i].bm_bighint)
607 scan->bm_bighint = scan[i].bm_bighint;
608 count -= v;
609 freeBlk += v;
610 blk += radix;
611 i += next_skip;
612 }
613}
614
615/*
616 * BLIST_RADIX_COPY() - copy one radix tree to another
617 *
618 * Locates free space in the source tree and frees it in the destination
619 * tree. The space may not already be free in the destination.
620 */
621
622static void blst_copy(
623 blmeta_t *scan,
624 daddr_t blk,
625 daddr_t radix,
626 daddr_t skip,
627 blist_t dest,
628 daddr_t count
629) {
630 int next_skip;
631 int i;
632
633 /*
634 * Leaf node
635 */
636
637 if (radix == BLIST_BMAP_RADIX) {
638 u_daddr_t v = scan->u.bmu_bitmap;
639
640 if (v == (u_daddr_t)-1) {
641 blist_free(dest, blk, count);
642 } else if (v != 0) {
643 int i;
644
645 for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) {
646 if (v & (1 << i))
647 blist_free(dest, blk + i, 1);
648 }
649 }
650 return;
651 }
652
653 /*
654 * Meta node
655 */
656
657 if (scan->u.bmu_avail == 0) {
658 /*
659 * Source all allocated, leave dest allocated
660 */
661 return;
662 }
663 if (scan->u.bmu_avail == radix) {
664 /*
665 * Source all free, free entire dest
666 */
667 if (count < radix)
668 blist_free(dest, blk, count);
669 else
670 blist_free(dest, blk, radix);
671 return;
672 }
673
674
675 radix /= BLIST_META_RADIX;
676 next_skip = ((u_int)skip / BLIST_META_RADIX);
677
678 for (i = 1; count && i <= skip; i += next_skip) {
679 if (scan[i].bm_bighint == (daddr_t)-1)
680 break;
681
682 if (count >= radix) {
683 blst_copy(
684 &scan[i],
685 blk,
686 radix,
687 next_skip - 1,
688 dest,
689 radix
690 );
691 count -= radix;
692 } else {
693 if (count) {
694 blst_copy(
695 &scan[i],
696 blk,
697 radix,
698 next_skip - 1,
699 dest,
700 count
701 );
702 }
703 count = 0;
704 }
705 blk += radix;
706 }
707}
708
709/*
710 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap
711 *
712 * This routine allocates all blocks in the specified range
713 * regardless of any existing allocations in that range. Returns
714 * the number of blocks allocated by the call.
715 */
716
717static int
718blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
719{
720 int n = blk & (BLIST_BMAP_RADIX - 1);
721 int nblks;
722 u_daddr_t mask, bitmap;
723
724 mask = ((u_daddr_t)-1 << n) &
725 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
726
727 /* Count the number of blocks we're about to allocate */
728 bitmap = scan->u.bmu_bitmap & mask;
729 for (nblks = 0; bitmap != 0; nblks++)
730 bitmap &= bitmap - 1;
731
732 scan->u.bmu_bitmap &= ~mask;
733 return nblks;
734}
735
736/*
737 * BLIST_META_FILL() - allocate specific blocks at a meta node
738 *
739 * This routine allocates the specified range of blocks,
740 * regardless of any existing allocations in the range. The
741 * range must be within the extent of this node. Returns the
742 * number of blocks allocated by the call.
743 */
744static int
745blst_meta_fill(
746 blmeta_t *scan,
747 daddr_t allocBlk,
748 daddr_t count,
749 daddr_t radix,
750 int skip,
751 daddr_t blk
752) {
753 int i;
754 int next_skip = ((u_int)skip / BLIST_META_RADIX);
755 int nblks = 0;
756
757 if (count == radix || scan->u.bmu_avail == 0) {
758 /*
759 * ALL-ALLOCATED special case
760 */
761 nblks = scan->u.bmu_avail;
762 scan->u.bmu_avail = 0;
763 scan->bm_bighint = count;
764 return nblks;
765 }
766
767 if (scan->u.bmu_avail == radix) {
768 radix /= BLIST_META_RADIX;
769
770 /*
771 * ALL-FREE special case, initialize sublevel
772 */
773 for (i = 1; i <= skip; i += next_skip) {
774 if (scan[i].bm_bighint == (daddr_t)-1)
775 break;
776 if (next_skip == 1) {
777 scan[i].u.bmu_bitmap = (u_daddr_t)-1;
778 scan[i].bm_bighint = BLIST_BMAP_RADIX;
779 } else {
780 scan[i].bm_bighint = radix;
781 scan[i].u.bmu_avail = radix;
782 }
783 }
784 } else {
785 radix /= BLIST_META_RADIX;
786 }
787
788 if (count > radix)
789 panic("blist_meta_fill: allocation too large");
790
791 i = (allocBlk - blk) / radix;
792 blk += i * radix;
793 i = i * next_skip + 1;
794
795 while (i <= skip && blk < allocBlk + count) {
796 daddr_t v;
797
798 v = blk + radix - allocBlk;
799 if (v > count)
800 v = count;
801
802 if (scan->bm_bighint == (daddr_t)-1)
803 panic("blst_meta_fill: filling unexpected range");
804
805 if (next_skip == 1) {
806 nblks += blst_leaf_fill(&scan[i], allocBlk, v);
807 } else {
808 nblks += blst_meta_fill(&scan[i], allocBlk, v,
809 radix, next_skip - 1, blk);
810 }
811 count -= v;
812 allocBlk += v;
813 blk += radix;
814 i += next_skip;
815 }
816 scan->u.bmu_avail -= nblks;
817 return nblks;
818}
819
820/*
821 * BLST_RADIX_INIT() - initialize radix tree
822 *
823 * Initialize our meta structures and bitmaps and calculate the exact
824 * amount of space required to manage 'count' blocks - this space may
825 * be considerably less then the calculated radix due to the large
826 * RADIX values we use.
827 */
828
829static daddr_t
830blst_radix_init(blmeta_t *scan, daddr_t radix, int skip, daddr_t count)
831{
832 int i;
833 int next_skip;
834 daddr_t memindex = 0;
835
836 /*
837 * Leaf node
838 */
839
840 if (radix == BLIST_BMAP_RADIX) {
841 if (scan) {
842 scan->bm_bighint = 0;
843 scan->u.bmu_bitmap = 0;
844 }
845 return(memindex);
846 }
847
848 /*
849 * Meta node. If allocating the entire object we can special
850 * case it. However, we need to figure out how much memory
851 * is required to manage 'count' blocks, so we continue on anyway.
852 */
853
854 if (scan) {
855 scan->bm_bighint = 0;
856 scan->u.bmu_avail = 0;
857 }
858
859 radix /= BLIST_META_RADIX;
860 next_skip = ((u_int)skip / BLIST_META_RADIX);
861
862 for (i = 1; i <= skip; i += next_skip) {
863 if (count >= radix) {
864 /*
865 * Allocate the entire object
866 */
867 memindex = i + blst_radix_init(
868 ((scan) ? &scan[i] : NULL),
869 radix,
870 next_skip - 1,
871 radix
872 );
873 count -= radix;
874 } else if (count > 0) {
875 /*
876 * Allocate a partial object
877 */
878 memindex = i + blst_radix_init(
879 ((scan) ? &scan[i] : NULL),
880 radix,
881 next_skip - 1,
882 count
883 );
884 count = 0;
885 } else {
886 /*
887 * Add terminator and break out
888 */
889 if (scan)
890 scan[i].bm_bighint = (daddr_t)-1;
891 break;
892 }
893 }
894 if (memindex < i)
895 memindex = i;
896 return(memindex);
897}
898
899#ifdef BLIST_DEBUG
900
901static void
902blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab)
903{
904 int i;
905 int next_skip;
906 int lastState = 0;
907
908 if (radix == BLIST_BMAP_RADIX) {
909 printf(
910 "%*.*s(%08llx,%lld): bitmap %08llx big=%lld\n",
911 tab, tab, "",
912 (long long)blk, (long long)radix,
913 (long long)scan->u.bmu_bitmap,
914 (long long)scan->bm_bighint
915 );
916 return;
917 }
918
919 if (scan->u.bmu_avail == 0) {
920 printf(
921 "%*.*s(%08llx,%lld) ALL ALLOCATED\n",
922 tab, tab, "",
923 (long long)blk,
924 (long long)radix
925 );
926 return;
927 }
928 if (scan->u.bmu_avail == radix) {
929 printf(
930 "%*.*s(%08llx,%lld) ALL FREE\n",
931 tab, tab, "",
932 (long long)blk,
933 (long long)radix
934 );
935 return;
936 }
937
938 printf(
939 "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n",
940 tab, tab, "",
941 (long long)blk, (long long)radix,
942 (long long)scan->u.bmu_avail,
943 (long long)radix,
944 (long long)scan->bm_bighint
945 );
946
947 radix /= BLIST_META_RADIX;
948 next_skip = ((u_int)skip / BLIST_META_RADIX);
949 tab += 4;
950
951 for (i = 1; i <= skip; i += next_skip) {
952 if (scan[i].bm_bighint == (daddr_t)-1) {
953 printf(
954 "%*.*s(%08llx,%lld): Terminator\n",
955 tab, tab, "",
956 (long long)blk, (long long)radix
957 );
958 lastState = 0;
959 break;
960 }
961 blst_radix_print(
962 &scan[i],
963 blk,
964 radix,
965 next_skip - 1,
966 tab
967 );
968 blk += radix;
969 }
970 tab -= 4;
971
972 printf(
973 "%*.*s}\n",
974 tab, tab, ""
975 );
976}
977
978#endif
979
980#ifdef BLIST_DEBUG
981
982int
983main(int ac, char **av)
984{
985 int size = 1024;
986 int i;
987 blist_t bl;
988
989 for (i = 1; i < ac; ++i) {
990 const char *ptr = av[i];
991 if (*ptr != '-') {
992 size = strtol(ptr, NULL, 0);
993 continue;
994 }
995 ptr += 2;
996 fprintf(stderr, "Bad option: %s\n", ptr - 2);
997 exit(1);
998 }
999 bl = blist_create(size);
1000 blist_free(bl, 0, size);
1001
1002 for (;;) {
1003 char buf[1024];
1004 daddr_t da = 0;
1005 daddr_t count = 0;
1006
1007
1008 printf("%lld/%lld/%lld> ", (long long)bl->bl_free,
1009 (long long)size, (long long)bl->bl_radix);
1010 fflush(stdout);
1011 if (fgets(buf, sizeof(buf), stdin) == NULL)
1012 break;
1013 switch(buf[0]) {
1014 case 'r':
1015 if (sscanf(buf + 1, "%lld", &count) == 1) {
1016 blist_resize(&bl, count, 1);
1017 } else {
1018 printf("?\n");
1019 }
1020 case 'p':
1021 blist_print(bl);
1022 break;
1023 case 'a':
1024 if (sscanf(buf + 1, "%lld", &count) == 1) {
1025 daddr_t blk = blist_alloc(bl, count);
1026 printf(" R=%08llx\n", (long long)blk);
1027 } else {
1028 printf("?\n");
1029 }
1030 break;
1031 case 'f':
1032 if (sscanf(buf + 1, "%llx %lld",
1033 (long long *)&da, (long long *)&count) == 2) {
1034 blist_free(bl, da, count);
1035 } else {
1036 printf("?\n");
1037 }
1038 break;
1039 case 'l':
1040 if (sscanf(buf + 1, "%llx %lld",
1041 (long long *)&da, (long long *)&count) == 2) {
1042 printf(" n=%d\n",
1043 blist_fill(bl, da, count));
1044 } else {
1045 printf("?\n");
1046 }
1047 break;
1048 case '?':
1049 case 'h':
1050 puts(
1051 "p -print\n"
1052 "a %d -allocate\n"
1053 "f %x %d -free\n"
1054 "l %x %d -fill\n"
1055 "r %d -resize\n"
1056 "h/? -help"
1057 );
1058 break;
1059 default:
1060 printf("?\n");
1061 break;
1062 }
1063 }
1064 return(0);
1065}
1066
1067void
1068panic(const char *ctl, ...)
1069{
1070 va_list va;
1071
1072 va_start(va, ctl);
1073 vfprintf(stderr, ctl, va);
1074 fprintf(stderr, "\n");
1075 va_end(va);
1076 exit(1);
1077}
1078
1079#endif
1080