1/*-
2 * Copyright (c) 1998 Matthew Dillon.  All Rights Reserved.
3 * Redistribution and use in source and binary forms, with or without
4 * modification, are permitted provided that the following conditions
5 * are met:
6 * 1. Redistributions of source code must retain the above copyright
7 *    notice, this list of conditions and the following disclaimer.
8 * 2. Redistributions in binary form must reproduce the above copyright
9 *    notice, this list of conditions and the following disclaimer in the
10 *    documentation and/or other materials provided with the distribution.
11 * 4. Neither the name of the University nor the names of its contributors
12 *    may be used to endorse or promote products derived from this software
13 *    without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
21 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * BLIST.C -	Bitmap allocator/deallocator, using a radix tree with hinting
30 *
31 *	This module implements a general bitmap allocator/deallocator.  The
32 *	allocator eats around 2 bits per 'block'.  The module does not
33 *	try to interpret the meaning of a 'block' other then to return
34 *	SWAPBLK_NONE on an allocation failure.
35 *
36 *	A radix tree is used to maintain the bitmap.  Two radix constants are
37 *	involved:  One for the bitmaps contained in the leaf nodes (typically
38 *	32), and one for the meta nodes (typically 16).  Both meta and leaf
39 *	nodes have a hint field.  This field gives us a hint as to the largest
40 *	free contiguous range of blocks under the node.  It may contain a
41 *	value that is too high, but will never contain a value that is too
42 *	low.  When the radix tree is searched, allocation failures in subtrees
43 *	update the hint.
44 *
45 *	The radix tree also implements two collapsed states for meta nodes:
46 *	the ALL-ALLOCATED state and the ALL-FREE state.  If a meta node is
47 *	in either of these two states, all information contained underneath
48 *	the node is considered stale.  These states are used to optimize
49 *	allocation and freeing operations.
50 *
51 * 	The hinting greatly increases code efficiency for allocations while
52 *	the general radix structure optimizes both allocations and frees.  The
53 *	radix tree should be able to operate well no matter how much
54 *	fragmentation there is and no matter how large a bitmap is used.
55 *
56 *	Unlike the rlist code, the blist code wires all necessary memory at
57 *	creation time.  Neither allocations nor frees require interaction with
58 *	the memory subsystem.  In contrast, the rlist code may allocate memory
59 *	on an rlist_free() call.  The non-blocking features of the blist code
60 *	are used to great advantage in the swap code (vm/nswap_pager.c).  The
61 *	rlist code uses a little less overall memory then the blist code (but
62 *	due to swap interleaving not all that much less), but the blist code
63 *	scales much, much better.
64 *
65 *	LAYOUT: The radix tree is layed out recursively using a
66 *	linear array.  Each meta node is immediately followed (layed out
67 *	sequentially in memory) by BLIST_META_RADIX lower level nodes.  This
68 *	is a recursive structure but one that can be easily scanned through
69 *	a very simple 'skip' calculation.  In order to support large radixes,
70 *	portions of the tree may reside outside our memory allocation.  We
71 *	handle this with an early-termination optimization (when bighint is
72 *	set to -1) on the scan.  The memory allocation is only large enough
73 *	to cover the number of blocks requested at creation time even if it
74 *	must be encompassed in larger root-node radix.
75 *
76 *	NOTE: the allocator cannot currently allocate more then
77 *	BLIST_BMAP_RADIX blocks per call.  It will panic with 'allocation too
78 *	large' if you try.  This is an area that could use improvement.  The
79 *	radix is large enough that this restriction does not effect the swap
80 *	system, though.  Currently only the allocation code is effected by
81 *	this algorithmic unfeature.  The freeing code can handle arbitrary
82 *	ranges.
83 *
84 *	This code can be compiled stand-alone for debugging.
85 */
86
87/*
88 *  NOTE: a few modifications is made in the orginal FreeBSD code:
89 *  1. change the name of some variables/constants
90 *  2. discard the code handling ALL_FREE and ALL_ALLOCATED state
91 *  3. allocate more than 32 slots won't lead to kernel panic
92 *  4. remove all the code for debugging
93 *
94 *                                                      Zhao Shuai
95 *                                                   upczhsh@163.com
96 */
97
98
99#include <stdlib.h>
100#include <util/RadixBitmap.h>
101
102#define TERMINATOR -1
103
104
105static uint32
106radix_bitmap_init(radix_node *node, uint32 radix, uint32 skip, uint32 slots)
107{
108	uint32 index = 0;
109	int32 big_hint = radix < slots ? radix : slots;
110
111	// leaf node
112	if (radix == BITMAP_RADIX) {
113		if (node) {
114			node->big_hint = big_hint;
115			if (big_hint == BITMAP_RADIX)
116				node->u.bitmap = 0;
117			else
118				node->u.bitmap = (bitmap_t)-1 << big_hint;
119		}
120		return index;
121	}
122
123	// not leaf node
124	if (node) {
125		node->big_hint = big_hint;
126		node->u.available = big_hint;
127	}
128
129	radix /= NODE_RADIX;
130	uint32 next_skip = skip / NODE_RADIX;
131
132	uint32 i;
133	for (i = 1; i <= skip; i += next_skip) {
134		if (slots >= radix) {
135			index = i + radix_bitmap_init(node ? &node[i] : NULL,
136					radix, next_skip - 1, radix);
137			slots -= radix;
138		} else if (slots > 0) {
139			index = i + radix_bitmap_init(node ? &node[i] : NULL,
140					radix, next_skip - 1, slots);
141			slots = 0;
142		} else { // add a terminator
143			if (node)
144				node[i].big_hint = TERMINATOR;
145			break;
146		}
147	}
148
149	if (index < i)
150		index = i;
151
152	return index;
153}
154
155
156radix_bitmap *
157radix_bitmap_create(uint32 slots)
158{
159	uint32 radix = BITMAP_RADIX;
160	uint32 skip = 0;
161
162	while (radix < slots) {
163		radix *= NODE_RADIX;
164		skip = (skip + 1) * NODE_RADIX;
165	}
166
167	radix_bitmap *bmp = (radix_bitmap *)malloc(sizeof(radix_bitmap));
168	if (bmp == NULL)
169		return NULL;
170
171	bmp->radix = radix;
172	bmp->skip = skip;
173	bmp->free_slots = slots;
174	bmp->root_size = 1 + radix_bitmap_init(NULL, radix, skip, slots);
175
176	bmp->root = (radix_node *)malloc(bmp->root_size * sizeof(radix_node));
177	if (bmp->root == NULL) {
178		free(bmp);
179		return NULL;
180	}
181
182	radix_bitmap_init(bmp->root, radix, skip, slots);
183
184	return bmp;
185}
186
187
188void
189radix_bitmap_destroy(radix_bitmap *bmp)
190{
191	free(bmp->root);
192	free(bmp);
193}
194
195
196static radix_slot_t
197radix_leaf_alloc(radix_node *leaf, radix_slot_t slotIndex, int32 count)
198{
199	if (count <= (int32)BITMAP_RADIX) {
200		bitmap_t bitmap = ~leaf->u.bitmap;
201		uint32 n = BITMAP_RADIX - count;
202		bitmap_t mask = (bitmap_t)-1 >> n;
203		for (uint32 j = 0; j <= n; j++) {
204			if ((bitmap & mask) == mask) {
205				leaf->u.bitmap |= mask;
206				return (slotIndex + j);
207			}
208			mask <<= 1;
209		}
210	}
211
212	// we could not allocate count here, update big_hint
213	if (leaf->big_hint >= count)
214		leaf->big_hint = count - 1;
215	return RADIX_SLOT_NONE;
216}
217
218
219static radix_slot_t
220radix_node_alloc(radix_node *node, radix_slot_t slotIndex, int32 count,
221		uint32 radix, uint32 skip)
222{
223	uint32 next_skip = skip / NODE_RADIX;
224	radix /= NODE_RADIX;
225
226	for (uint32 i = 1; i <= skip; i += next_skip) {
227		if (node[i].big_hint == TERMINATOR)  // TERMINATOR
228			break;
229
230		if (count <= node[i].big_hint) {
231			radix_slot_t addr = RADIX_SLOT_NONE;
232			if (next_skip == 1)
233				addr = radix_leaf_alloc(&node[i], slotIndex, count);
234			else
235				addr = radix_node_alloc(&node[i], slotIndex, count, radix,
236						next_skip - 1);
237			if (addr != RADIX_SLOT_NONE) {
238				node->u.available -= count;
239				if (node->big_hint > node->u.available)
240					node->big_hint = node->u.available;
241
242				return addr;
243			}
244		}
245		slotIndex += radix;
246	}
247
248	// we could not allocate count in the subtree, update big_hint
249	if (node->big_hint >= count)
250		node->big_hint = count - 1;
251	return RADIX_SLOT_NONE;
252}
253
254
255radix_slot_t
256radix_bitmap_alloc(radix_bitmap *bmp, uint32 count)
257{
258	radix_slot_t addr = RADIX_SLOT_NONE;
259
260	if (bmp->radix == BITMAP_RADIX)
261		addr = radix_leaf_alloc(bmp->root, 0, count);
262	else
263		addr = radix_node_alloc(bmp->root, 0, count, bmp->radix, bmp->skip);
264
265	if (addr != RADIX_SLOT_NONE)
266		bmp->free_slots -= count;
267
268	return addr;
269}
270
271
272static void
273radix_leaf_dealloc(radix_node *leaf, radix_slot_t slotIndex, uint32 count)
274{
275	uint32 n = slotIndex & (BITMAP_RADIX - 1);
276	bitmap_t mask = ((bitmap_t)-1 >> (BITMAP_RADIX - count - n))
277		& ((bitmap_t)-1 << n);
278	leaf->u.bitmap &= ~mask;
279
280	leaf->big_hint = BITMAP_RADIX;
281}
282
283
284static void
285radix_node_dealloc(radix_node *node, radix_slot_t slotIndex, uint32 count,
286		uint32 radix, uint32 skip, radix_slot_t index)
287{
288	node->u.available += count;
289
290	uint32 next_skip = skip / NODE_RADIX;
291	radix /= NODE_RADIX;
292
293	uint32 i = (slotIndex - index) / radix;
294	index += i * radix;
295	i = i * next_skip + 1;
296
297	while (i <= skip && index < slotIndex + count) {
298		uint32 v = index + radix - slotIndex;
299		if (v > count)
300			v = count;
301
302		if (next_skip == 1)
303			radix_leaf_dealloc(&node[i], slotIndex, v);
304		else
305			radix_node_dealloc(&node[i], slotIndex, v, radix,
306					next_skip - 1, index);
307
308		if (node->big_hint < node[i].big_hint)
309			node->big_hint = node[i].big_hint;
310
311		count -= v;
312		slotIndex += v;
313		index += radix;
314		i += next_skip;
315	}
316}
317
318
319void
320radix_bitmap_dealloc(radix_bitmap *bmp, radix_slot_t slotIndex, uint32 count)
321{
322	if (bmp->radix == BITMAP_RADIX)
323		radix_leaf_dealloc(bmp->root, slotIndex, count);
324	else
325		radix_node_dealloc(bmp->root, slotIndex, count, bmp->radix,
326				bmp->skip, 0);
327
328	bmp->free_slots += count;
329}
330
331