1/*
2 * Copyright 2008, Axel D��rfler, axeld@pinc-software.de.
3 * Copyright 2002/03, Thomas Kurschel. All rights reserved.
4 *
5 * Distributed under the terms of the MIT License.
6 */
7
8/*!	Deadlock-safe allocation of locked memory.
9
10	Allocation/freeing is optimized for speed. Count of <sem>
11	is the number of free blocks and thus should be modified
12	by each alloc() and free(). As this count is only crucial if
13	an allocation is waiting for a free block, <sem> is only
14	updated on demand - the correct number of free blocks is
15	stored in <free_blocks>. There are only three cases where
16	<sem> is updated:
17
18	- if an	allocation fails because there is no free block left;
19	  in this case, <num_waiting> increased by one and then the
20	  thread makes <sem> up-to-date and waits for a free block
21	  via <sem> in one step; finally, <num_waiting> is decreased
22	  by one
23	- if a block is freed and <num_waiting> is non-zero;
24	  here, count of <sem> is updated to release threads waiting
25	  for allocation
26	- if a new chunk of blocks is allocated;
27	  same as previous case
28*/
29
30
31#include <KernelExport.h>
32#include <drivers/locked_pool.h>
33#include <lock.h>
34#include "dl_list.h"
35
36#include <string.h>
37#include <module.h>
38#include <malloc.h>
39
40
41//#define TRACE_LOCKED_POOL
42#ifdef TRACE_LOCKED_POOL
43#	define TRACE(x) dprintf x
44#else
45#	define TRACE(x) ;
46#endif
47
48
49// info about pool
50typedef struct locked_pool {
51	struct mutex mutex;			// to be used whenever some variable of the first
52								// block of this structure is read or modified
53	int free_blocks;			// # free blocks
54	int num_waiting;			// # waiting allocations
55	void *free_list;			// list of free blocks
56	int next_ofs;				// offset of next-pointer in block
57
58	sem_id sem;					// count=number of free blocks
59	char *name;
60	size_t header_size;			// effective size of chunk header
61	struct chunk_header *chunks;// list of chunks
62	size_t block_size;			// size of memory block
63	uint32 lock_flags;			// flags for lock_memory()
64	int min_free_blocks;		// min. number of free blocks
65	int num_blocks;				// cur. number of blocks
66	int max_blocks;				// maximum number of blocks
67	int enlarge_by;				// number of blocks to enlarge pool by
68	size_t alignment;			// block alignment restrictions
69	locked_pool_add_hook add_hook;		// user hooks
70	locked_pool_remove_hook remove_hook;
71	void *hook_arg;						// arg for user hooks
72	struct locked_pool *prev, *next;	// global cyclic list
73} locked_pool;
74
75
76// header of memory chunk
77typedef struct chunk_header {
78	struct chunk_header *next;	// free-list
79	area_id area;				// underlying area
80	int num_blocks;				// size in blocks
81} chunk_header;
82
83
84// global list of pools
85static locked_pool *sLockedPools;
86// mutex to protect sLockedPools
87static mutex sLockedPoolsLock;
88// true, if thread should shut down
89static bool sShuttingDown;
90// background thread to enlarge pools
91static thread_id sEnlargerThread;
92// semaphore to wake up enlarger thread
93static sem_id sEnlargerSemaphore;
94
95// macro to access next-pointer in free block
96#define NEXT_PTR(pool, a) ((void **)(((char *)a) + pool->next_ofs))
97
98
99/*! Enlarge memory pool by <num_block> blocks */
100static status_t
101enlarge_pool(locked_pool *pool, int numBlocks)
102{
103	void **next;
104	int i;
105	int numWaiting;
106	status_t status;
107	area_id area;
108	chunk_header *chunk;
109	size_t chunkSize;
110	void *block, *lastBlock;
111
112	TRACE(("enlarge_pool()\n"));
113
114	// get memory directly from VM; we don't let user code access memory
115	chunkSize = numBlocks * pool->block_size + pool->header_size;
116	chunkSize = (chunkSize + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1);
117
118	status = area = create_area(pool->name,
119		(void **)&chunk, B_ANY_KERNEL_ADDRESS, chunkSize,
120		pool->lock_flags, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
121	if (status < B_OK) {
122		dprintf("cannot enlarge pool (%s)\n", strerror(status));
123		// TODO: we should wait a bit and try again!
124		return status;
125	}
126
127	chunk->area = area;
128	chunk->num_blocks = numBlocks;
129
130	// create free_list and call add-hook
131	// very important: we first create a freelist within the chunk,
132	// going from lower to higher addresses; at the end of the loop,
133	// "next" points to the head of the list and "lastBlock" to the
134	// last list node!
135	next = NULL;
136
137	lastBlock = (char *)chunk + pool->header_size +
138		(numBlocks-1) * pool->block_size;
139
140	for (i = 0, block = lastBlock; i < numBlocks;
141		 ++i, block = (char *)block - pool->block_size)
142	{
143		if (pool->add_hook) {
144			if ((status = pool->add_hook(block, pool->hook_arg)) < B_OK)
145				break;
146		}
147
148		*NEXT_PTR(pool, block) = next;
149		next = block;
150	}
151
152	if (i < numBlocks) {
153		// ups - pre-init failed somehow
154		// call remove-hook for blocks that we called add-hook for
155		int j;
156
157		for (block = lastBlock, j = 0; j < i; ++j,
158				block = (char *)block - pool->block_size) {
159			if (pool->remove_hook)
160				pool->remove_hook(block, pool->hook_arg);
161		}
162
163		// destroy area and give up
164		delete_area(chunk->area);
165
166		return status;
167	}
168
169	// add new blocks to pool
170	mutex_lock(&pool->mutex);
171
172	// see remarks about initialising list within chunk
173	*NEXT_PTR(pool, lastBlock) = pool->free_list;
174	pool->free_list = next;
175
176	chunk->next = pool->chunks;
177	pool->chunks = chunk;
178
179	pool->num_blocks += numBlocks;
180	pool->free_blocks += numBlocks;
181
182	TRACE(("done - num_blocks=%d, free_blocks=%d, num_waiting=%d\n",
183		pool->num_blocks, pool->free_blocks, pool->num_waiting));
184
185	numWaiting = min_c(pool->num_waiting, numBlocks);
186	pool->num_waiting -= numWaiting;
187
188	mutex_unlock(&pool->mutex);
189
190	// release threads that wait for empty blocks
191	release_sem_etc(pool->sem, numWaiting, 0);
192
193	return B_OK;
194}
195
196
197/*! Background thread that adjusts pool size */
198static int32
199enlarger_thread(void *arg)
200{
201	while (1) {
202		locked_pool *pool;
203
204		acquire_sem(sEnlargerSemaphore);
205
206		if (sShuttingDown)
207			break;
208
209		// protect traversing of global list and
210		// block destroy_pool() to not clean up a pool we are enlarging
211		mutex_lock(&sLockedPoolsLock);
212
213		for (pool = sLockedPools; pool; pool = pool->next) {
214			int num_free;
215
216			// this mutex is probably not necessary (at least on 80x86)
217			// but I'm not sure about atomicity of other architectures
218			// (anyway - this routine is not performance critical)
219			mutex_lock(&pool->mutex);
220			num_free = pool->free_blocks;
221			mutex_unlock(&pool->mutex);
222
223			// perhaps blocks got freed meanwhile, i.e. pool is large enough
224			if (num_free > pool->min_free_blocks)
225				continue;
226
227			// enlarge pool as much as possible
228			// never create more blocks then defined - the caller may have
229			// a good reason for choosing the limit
230			if (pool->num_blocks < pool->max_blocks) {
231				enlarge_pool(pool,
232					min(pool->enlarge_by, pool->max_blocks - pool->num_blocks));
233			}
234		}
235
236		mutex_unlock(&sLockedPoolsLock);
237	}
238
239	return 0;
240}
241
242
243/*! Free all chunks belonging to pool */
244static void
245free_chunks(locked_pool *pool)
246{
247	chunk_header *chunk, *next;
248
249	for (chunk = pool->chunks; chunk; chunk = next) {
250		int i;
251		void *block, *lastBlock;
252
253		next = chunk->next;
254
255		lastBlock = (char *)chunk + pool->header_size +
256			(chunk->num_blocks-1) * pool->block_size;
257
258		// don't forget to call remove-hook
259		for (i = 0, block = lastBlock; i < pool->num_blocks; ++i, block = (char *)block - pool->block_size) {
260			if (pool->remove_hook)
261				pool->remove_hook(block, pool->hook_arg);
262		}
263
264		delete_area(chunk->area);
265	}
266
267	pool->chunks = NULL;
268}
269
270
271/*! Global init, executed when module is loaded */
272static status_t
273init_locked_pool(void)
274{
275	status_t status;
276
277	mutex_init(&sLockedPoolsLock, "locked_pool_global_list");
278
279	status = sEnlargerSemaphore = create_sem(0,
280		"locked_pool_enlarger");
281	if (status < B_OK)
282		goto err2;
283
284	sLockedPools = NULL;
285	sShuttingDown = false;
286
287	status = sEnlargerThread = spawn_kernel_thread(enlarger_thread,
288		"locked_pool_enlarger", B_NORMAL_PRIORITY, NULL);
289	if (status < B_OK)
290		goto err3;
291
292	resume_thread(sEnlargerThread);
293	return B_OK;
294
295err3:
296	delete_sem(sEnlargerSemaphore);
297err2:
298	mutex_destroy(&sLockedPoolsLock);
299	return status;
300}
301
302
303/*! Global uninit, executed before module is unloaded */
304static status_t
305uninit_locked_pool(void)
306{
307	sShuttingDown = true;
308
309	release_sem(sEnlargerSemaphore);
310
311	wait_for_thread(sEnlargerThread, NULL);
312
313	delete_sem(sEnlargerSemaphore);
314	mutex_destroy(&sLockedPoolsLock);
315
316	return B_OK;
317}
318
319
320//	#pragma mark - Module API
321
322
323/*! Alloc memory from pool */
324static void *
325pool_alloc(locked_pool *pool)
326{
327	void *block;
328
329	TRACE(("pool_alloc()\n"));
330
331	mutex_lock(&pool->mutex);
332
333	--pool->free_blocks;
334
335	if (pool->free_blocks > 0) {
336		// there are free blocks - grab one
337
338		TRACE(("freeblocks=%d, free_list=%p\n",
339			pool->free_blocks, pool->free_list));
340
341		block = pool->free_list;
342		pool->free_list = *NEXT_PTR(pool, block);
343
344		TRACE(("new free_list=%p\n", pool->free_list));
345
346		mutex_unlock(&pool->mutex);
347		return block;
348	}
349
350	// entire pool is in use
351	// we should do a ++free_blocks here, but this can lead to race
352	// condition: when we wait for <sem> and a block gets released
353	// and another thread calls alloc() before we grab the freshly freed
354	// block, the other thread could overtake us and grab the free block
355	// instead! by leaving free_block at a negative value, the other
356	// thread cannot see the free block and thus will leave it for us
357
358	// tell them we are waiting on semaphore
359	++pool->num_waiting;
360
361	TRACE(("%d waiting allocs\n", pool->num_waiting));
362
363	mutex_unlock(&pool->mutex);
364
365	// awake background thread
366	release_sem_etc(sEnlargerSemaphore, 1, B_DO_NOT_RESCHEDULE);
367	// make samphore up-to-date and wait until a block is available
368	acquire_sem(pool->sem);
369
370	mutex_lock(&pool->mutex);
371
372	TRACE(("continuing alloc (%d free blocks)\n", pool->free_blocks));
373
374	block = pool->free_list;
375	pool->free_list = *NEXT_PTR(pool, block);
376
377	mutex_unlock(&pool->mutex);
378	return block;
379}
380
381
382static void
383pool_free(locked_pool *pool, void *block)
384{
385	TRACE(("pool_free()\n"));
386
387	mutex_lock(&pool->mutex);
388
389	// add to free list
390	*NEXT_PTR(pool, block) = pool->free_list;
391	pool->free_list = block;
392
393	++pool->free_blocks;
394
395	TRACE(("freeblocks=%d, free_list=%p\n", pool->free_blocks,
396		pool->free_list));
397
398	if (pool->num_waiting == 0) {
399		// if no one is waiting, this is it
400		mutex_unlock(&pool->mutex);
401		return;
402	}
403
404	// someone is waiting on the semaphore
405
406	TRACE(("%d waiting allocs\n", pool->num_waiting));
407	pool->num_waiting--;
408
409	mutex_unlock(&pool->mutex);
410
411	// now it is up-to-date and waiting allocations can be continued
412	release_sem(pool->sem);
413	return;
414}
415
416
417static locked_pool *
418create_pool(int block_size, int alignment, int next_ofs,
419	int chunkSize, int max_blocks, int min_free_blocks,
420	const char *name, uint32 lock_flags,
421	locked_pool_add_hook add_hook,
422	locked_pool_remove_hook remove_hook, void *hook_arg)
423{
424	locked_pool *pool;
425	status_t status;
426
427	TRACE(("create_pool()\n"));
428
429	pool = (locked_pool *)malloc(sizeof(*pool));
430	if (pool == NULL)
431		return NULL;
432
433	memset(pool, 0, sizeof(*pool));
434
435	mutex_init(&pool->mutex, "locked_pool");
436
437	if ((status = pool->sem = create_sem(0, "locked_pool")) < 0)
438		goto err1;
439
440	if ((pool->name = strdup(name)) == NULL) {
441		status = B_NO_MEMORY;
442		goto err3;
443	}
444
445	pool->alignment = alignment;
446
447	// take care that there is always enough space to fulfill alignment
448	pool->block_size = (block_size + pool->alignment) & ~pool->alignment;
449
450	pool->next_ofs = next_ofs;
451	pool->lock_flags = lock_flags;
452
453	pool->header_size = max((sizeof( chunk_header ) + pool->alignment) & ~pool->alignment,
454		pool->alignment + 1);
455
456	pool->enlarge_by = (((chunkSize + B_PAGE_SIZE - 1) & ~(B_PAGE_SIZE - 1)) - pool->header_size)
457		/ pool->block_size;
458
459	pool->max_blocks = max_blocks;
460	pool->min_free_blocks = min_free_blocks;
461	pool->free_blocks = 0;
462	pool->num_blocks = 0;
463	pool->num_waiting = 0;
464	pool->free_list = NULL;
465	pool->add_hook = add_hook;
466	pool->remove_hook = remove_hook;
467	pool->hook_arg = hook_arg;
468	pool->chunks = NULL;
469
470	TRACE(("block_size=%d, alignment=%d, next_ofs=%d, wiring_flags=%d, header_size=%d, enlarge_by=%d\n",
471		(int)pool->block_size, (int)pool->alignment, (int)pool->next_ofs,
472		(int)pool->lock_flags, (int)pool->header_size, pool->enlarge_by));
473
474	// if there is a minimum size, enlarge pool right now
475	if (min_free_blocks > 0) {
476		if ((status = enlarge_pool(pool, min(pool->enlarge_by, pool->max_blocks))) < 0)
477			goto err4;
478	}
479
480	// add to global list, so enlarger thread takes care of pool
481	mutex_lock(&sLockedPoolsLock);
482	ADD_DL_LIST_HEAD(pool, sLockedPools, );
483	mutex_unlock(&sLockedPoolsLock);
484
485	return pool;
486
487err4:
488	free(pool->name);
489err3:
490	delete_sem(pool->sem);
491err1:
492	mutex_destroy(&pool->mutex);
493	free(pool);
494	return NULL;
495}
496
497
498static void
499destroy_pool(locked_pool *pool)
500{
501	TRACE(("destroy_pool()\n"));
502
503	// first, remove from global list, so enlarger thread
504	// won't touch this pool anymore
505	mutex_lock(&sLockedPoolsLock);
506	REMOVE_DL_LIST(pool, sLockedPools, );
507	mutex_unlock(&sLockedPoolsLock);
508
509	// then cleanup pool
510	free_chunks(pool);
511
512	free(pool->name);
513	delete_sem(pool->sem);
514	mutex_destroy(&pool->mutex);
515
516	free(pool);
517}
518
519
520static status_t
521std_ops(int32 op, ...)
522{
523	switch (op) {
524		case B_MODULE_INIT:
525			return init_locked_pool();
526		case B_MODULE_UNINIT:
527			return uninit_locked_pool();
528
529		default:
530			return B_ERROR;
531	}
532}
533
534
535locked_pool_interface interface = {
536	{
537		LOCKED_POOL_MODULE_NAME,
538		0,
539		std_ops
540	},
541
542	pool_alloc,
543	pool_free,
544
545	create_pool,
546	destroy_pool
547};
548
549
550module_info *modules[] = {
551	&interface.minfo,
552	NULL
553};
554