1/*
2 * Copyright 2008-2010, Michael Lotz, mmlr@mlotz.ch.
3 * Copyright 2002-2010, Axel D��rfler, axeld@pinc-software.de.
4 * Distributed under the terms of the MIT License.
5 *
6 * Copyright 2001, Travis Geiselbrecht. All rights reserved.
7 * Distributed under the terms of the NewOS License.
8 */
9
10
11#include <arch/debug.h>
12#include <debug.h>
13#include <elf.h>
14#include <heap.h>
15#include <int.h>
16#include <kernel.h>
17#include <lock.h>
18#include <string.h>
19#include <team.h>
20#include <thread.h>
21#include <tracing.h>
22#include <util/AutoLock.h>
23#include <vm/vm.h>
24#include <vm/vm_page.h>
25
26
27//#define TRACE_HEAP
28#ifdef TRACE_HEAP
29#	define TRACE(x) dprintf x
30#else
31#	define TRACE(x) ;
32#endif
33
34
35#if !USE_DEBUG_HEAP_FOR_MALLOC
36#	undef KERNEL_HEAP_LEAK_CHECK
37#endif
38
39
40#if KERNEL_HEAP_LEAK_CHECK
41typedef struct heap_leak_check_info_s {
42	addr_t		caller;
43	size_t		size;
44	thread_id	thread;
45	team_id		team;
46} heap_leak_check_info;
47
48struct caller_info {
49	addr_t		caller;
50	uint32		count;
51	uint32		size;
52};
53
54static const int32 kCallerInfoTableSize = 1024;
55static caller_info sCallerInfoTable[kCallerInfoTableSize];
56static int32 sCallerInfoCount = 0;
57#endif	// KERNEL_HEAP_LEAK_CHECK
58
59
60typedef struct heap_page_s heap_page;
61
62
63typedef struct heap_area_s {
64	area_id			area;
65
66	addr_t			base;
67	size_t			size;
68
69	uint32			page_count;
70	uint32			free_page_count;
71
72	heap_page *		free_pages;
73	heap_page *		page_table;
74
75	heap_area_s *	prev;
76	heap_area_s *	next;
77	heap_area_s *	all_next;
78} heap_area;
79
80
81#define MAX_BIN_COUNT	31	// depends on the size of the bin_index field
82
83typedef struct heap_page_s {
84	heap_area *		area;
85	uint16			index;
86	uint16			bin_index : 5;
87	uint16			free_count : 10;
88	uint16			in_use : 1;
89	heap_page_s *	next;
90	heap_page_s *	prev;
91	union {
92		uint16			empty_index;
93		uint16			allocation_id; // used for bin == bin_count allocations
94	};
95	addr_t *		free_list;
96} heap_page;
97
98
99typedef struct heap_bin_s {
100	mutex		lock;
101	uint32		element_size;
102	uint16		max_free_count;
103	heap_page *	page_list; // sorted so that the desired page is always first
104} heap_bin;
105
106
107struct heap_allocator_s {
108	rw_lock		area_lock;
109	mutex		page_lock;
110
111	const char *name;
112	uint32		bin_count;
113	uint32		page_size;
114
115	uint32		total_pages;
116	uint32		total_free_pages;
117	uint32		empty_areas;
118
119#if KERNEL_HEAP_LEAK_CHECK
120	addr_t		(*get_caller)();
121#endif
122
123	heap_bin *	bins;
124	heap_area *	areas; // sorted so that the desired area is always first
125	heap_area *	all_areas; // all areas including full ones
126};
127
128
129static const uint32 kAreaAllocationMagic = 'AAMG';
130typedef struct area_allocation_info_s {
131	area_id		area;
132	void *		base;
133	uint32		magic;
134	size_t		size;
135	size_t		allocation_size;
136	size_t		allocation_alignment;
137	void *		allocation_base;
138} area_allocation_info;
139
140
141struct DeferredFreeListEntry : SinglyLinkedListLinkImpl<DeferredFreeListEntry> {
142};
143
144
145typedef SinglyLinkedList<DeferredFreeListEntry> DeferredFreeList;
146typedef SinglyLinkedList<DeferredDeletable> DeferredDeletableList;
147
148
149#if USE_DEBUG_HEAP_FOR_MALLOC
150
151#define VIP_HEAP_SIZE	1024 * 1024
152
153// Heap class configuration
154#define HEAP_CLASS_COUNT 3
155static const heap_class sHeapClasses[HEAP_CLASS_COUNT] = {
156	{
157		"small",					/* name */
158		50,							/* initial percentage */
159		B_PAGE_SIZE / 8,			/* max allocation size */
160		B_PAGE_SIZE,				/* page size */
161		8,							/* min bin size */
162		4,							/* bin alignment */
163		8,							/* min count per page */
164		16							/* max waste per page */
165	},
166	{
167		"medium",					/* name */
168		30,							/* initial percentage */
169		B_PAGE_SIZE * 2,			/* max allocation size */
170		B_PAGE_SIZE * 8,			/* page size */
171		B_PAGE_SIZE / 8,			/* min bin size */
172		32,							/* bin alignment */
173		4,							/* min count per page */
174		64							/* max waste per page */
175	},
176	{
177		"large",					/* name */
178		20,							/* initial percentage */
179		HEAP_AREA_USE_THRESHOLD,	/* max allocation size */
180		B_PAGE_SIZE * 16,			/* page size */
181		B_PAGE_SIZE * 2,			/* min bin size */
182		128,						/* bin alignment */
183		1,							/* min count per page */
184		256							/* max waste per page */
185	}
186};
187
188
189static uint32 sHeapCount;
190static heap_allocator *sHeaps[HEAP_CLASS_COUNT * SMP_MAX_CPUS];
191static uint32 *sLastGrowRequest[HEAP_CLASS_COUNT * SMP_MAX_CPUS];
192static uint32 *sLastHandledGrowRequest[HEAP_CLASS_COUNT * SMP_MAX_CPUS];
193
194static heap_allocator *sVIPHeap;
195static heap_allocator *sGrowHeap = NULL;
196static thread_id sHeapGrowThread = -1;
197static sem_id sHeapGrowSem = -1;
198static sem_id sHeapGrownNotify = -1;
199static bool sAddGrowHeap = false;
200
201#endif	// USE_DEBUG_HEAP_FOR_MALLOC
202
203static DeferredFreeList sDeferredFreeList;
204static DeferredDeletableList sDeferredDeletableList;
205static spinlock sDeferredFreeListLock;
206
207
208
209// #pragma mark - Tracing
210
211#if KERNEL_HEAP_TRACING
212namespace KernelHeapTracing {
213
214class Allocate : public AbstractTraceEntry {
215	public:
216		Allocate(addr_t address, size_t size)
217			:	fAddress(address),
218				fSize(size)
219		{
220			Initialized();
221		}
222
223		virtual void AddDump(TraceOutput &out)
224		{
225			out.Print("heap allocate: 0x%08lx (%lu bytes)", fAddress, fSize);
226		}
227
228	private:
229		addr_t	fAddress;
230		size_t	fSize;
231};
232
233
234class Reallocate : public AbstractTraceEntry {
235	public:
236		Reallocate(addr_t oldAddress, addr_t newAddress, size_t newSize)
237			:	fOldAddress(oldAddress),
238				fNewAddress(newAddress),
239				fNewSize(newSize)
240		{
241			Initialized();
242		};
243
244		virtual void AddDump(TraceOutput &out)
245		{
246			out.Print("heap reallocate: 0x%08lx -> 0x%08lx (%lu bytes)",
247				fOldAddress, fNewAddress, fNewSize);
248		}
249
250	private:
251		addr_t	fOldAddress;
252		addr_t	fNewAddress;
253		size_t	fNewSize;
254};
255
256
257class Free : public AbstractTraceEntry {
258	public:
259		Free(addr_t address)
260			:	fAddress(address)
261		{
262			Initialized();
263		};
264
265		virtual void AddDump(TraceOutput &out)
266		{
267			out.Print("heap free: 0x%08lx", fAddress);
268		}
269
270	private:
271		addr_t	fAddress;
272};
273
274
275} // namespace KernelHeapTracing
276
277#	define T(x)	if (!gKernelStartup) new(std::nothrow) KernelHeapTracing::x;
278#else
279#	define T(x)	;
280#endif
281
282
283// #pragma mark - Debug functions
284
285
286#if KERNEL_HEAP_LEAK_CHECK
287static addr_t
288get_caller()
289{
290	// Find the first return address outside of the allocator code. Note, that
291	// this makes certain assumptions about how the code for the functions
292	// ends up in the kernel object.
293	addr_t returnAddresses[5];
294	int32 depth = arch_debug_get_stack_trace(returnAddresses, 5, 0, 1,
295		STACK_TRACE_KERNEL);
296	for (int32 i = 0; i < depth; i++) {
297		if (returnAddresses[i] < (addr_t)&get_caller
298			|| returnAddresses[i] > (addr_t)&deferred_delete) {
299			return returnAddresses[i];
300		}
301	}
302
303	return 0;
304}
305#endif
306
307
308static void
309dump_page(heap_page *page)
310{
311	uint32 count = 0;
312	for (addr_t *temp = page->free_list; temp != NULL; temp = (addr_t *)*temp)
313		count++;
314
315	kprintf("\t\tpage %p: bin_index: %u; free_count: %u; empty_index: %u; "
316		"free_list %p (%" B_PRIu32 " entr%s)\n", page, page->bin_index,
317		page->free_count, page->empty_index, page->free_list, count,
318		count == 1 ? "y" : "ies");
319}
320
321
322static void
323dump_bin(heap_bin *bin)
324{
325	uint32 count = 0;
326	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
327		count++;
328
329	kprintf("\telement_size: %" B_PRIu32 "; max_free_count: %u; page_list %p "
330		"(%" B_PRIu32 " pages);\n", bin->element_size, bin->max_free_count,
331		bin->page_list, count);
332
333	for (heap_page *page = bin->page_list; page != NULL; page = page->next)
334		dump_page(page);
335}
336
337
338static void
339dump_bin_list(heap_allocator *heap)
340{
341	for (uint32 i = 0; i < heap->bin_count; i++)
342		dump_bin(&heap->bins[i]);
343	kprintf("\n");
344}
345
346
347static void
348dump_allocator_areas(heap_allocator *heap)
349{
350	heap_area *area = heap->all_areas;
351	while (area) {
352		kprintf("\tarea %p: area: %" B_PRId32 "; base: %p; size: %zu; page_count: "
353			"%" B_PRIu32 "; free_pages: %p (%" B_PRIu32 " entr%s)\n", area,
354			area->area, (void *)area->base, area->size, area->page_count,
355			area->free_pages, area->free_page_count,
356			area->free_page_count == 1 ? "y" : "ies");
357		area = area->all_next;
358	}
359
360	kprintf("\n");
361}
362
363
364static void
365dump_allocator(heap_allocator *heap, bool areas, bool bins)
366{
367	kprintf("allocator %p: name: %s; page_size: %" B_PRIu32 "; bin_count: "
368		"%" B_PRIu32 "; pages: %" B_PRIu32 "; free_pages: %" B_PRIu32 "; "
369		"empty_areas: %" B_PRIu32 "\n", heap, heap->name, heap->page_size,
370		heap->bin_count, heap->total_pages, heap->total_free_pages,
371		heap->empty_areas);
372
373	if (areas)
374		dump_allocator_areas(heap);
375	if (bins)
376		dump_bin_list(heap);
377}
378
379
380static int
381dump_heap_list(int argc, char **argv)
382{
383#if USE_DEBUG_HEAP_FOR_MALLOC
384	if (argc == 2 && strcmp(argv[1], "grow") == 0) {
385		// only dump dedicated grow heap info
386		kprintf("dedicated grow heap:\n");
387		dump_allocator(sGrowHeap, true, true);
388		return 0;
389	}
390#endif
391
392	bool stats = false;
393	int i = 1;
394
395	if (strcmp(argv[1], "stats") == 0) {
396		stats = true;
397		i++;
398	}
399
400	uint64 heapAddress = 0;
401	if (i < argc && !evaluate_debug_expression(argv[i], &heapAddress, true)) {
402		print_debugger_command_usage(argv[0]);
403		return 0;
404	}
405
406	if (heapAddress == 0) {
407#if USE_DEBUG_HEAP_FOR_MALLOC
408		// dump default kernel heaps
409		for (uint32 i = 0; i < sHeapCount; i++)
410			dump_allocator(sHeaps[i], !stats, !stats);
411#else
412		print_debugger_command_usage(argv[0]);
413#endif
414	} else {
415		// dump specified heap
416		dump_allocator((heap_allocator*)(addr_t)heapAddress, !stats, !stats);
417	}
418
419	return 0;
420}
421
422
423#if !KERNEL_HEAP_LEAK_CHECK
424
425static int
426dump_allocations(int argc, char **argv)
427{
428	uint64 heapAddress = 0;
429	bool statsOnly = false;
430	for (int32 i = 1; i < argc; i++) {
431		if (strcmp(argv[i], "stats") == 0)
432			statsOnly = true;
433		else if (!evaluate_debug_expression(argv[i], &heapAddress, true)) {
434			print_debugger_command_usage(argv[0]);
435			return 0;
436		}
437	}
438
439	size_t totalSize = 0;
440	uint32 totalCount = 0;
441#if USE_DEBUG_HEAP_FOR_MALLOC
442	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
443		heap_allocator *heap = sHeaps[heapIndex];
444		if (heapAddress != 0)
445			heap = (heap_allocator *)(addr_t)heapAddress;
446#else
447	while (true) {
448		heap_allocator *heap = (heap_allocator *)(addr_t)heapAddress;
449		if (heap == NULL) {
450			print_debugger_command_usage(argv[0]);
451			return 0;
452		}
453#endif
454#if 0
455	}
456#endif
457
458		// go through all the pages in all the areas
459		heap_area *area = heap->all_areas;
460		while (area) {
461			for (uint32 i = 0; i < area->page_count; i++) {
462				heap_page *page = &area->page_table[i];
463				if (!page->in_use)
464					continue;
465
466				addr_t base = area->base + i * heap->page_size;
467				if (page->bin_index < heap->bin_count) {
468					// page is used by a small allocation bin
469					uint32 elementCount = page->empty_index;
470					size_t elementSize
471						= heap->bins[page->bin_index].element_size;
472					for (uint32 j = 0; j < elementCount;
473							j++, base += elementSize) {
474						// walk the free list to see if this element is in use
475						bool elementInUse = true;
476						for (addr_t *temp = page->free_list; temp != NULL;
477								temp = (addr_t *)*temp) {
478							if ((addr_t)temp == base) {
479								elementInUse = false;
480								break;
481							}
482						}
483
484						if (!elementInUse)
485							continue;
486
487						if (!statsOnly) {
488							kprintf("address: 0x%p; size: %lu bytes\n",
489								(void *)base, elementSize);
490						}
491
492						totalSize += elementSize;
493						totalCount++;
494					}
495				} else {
496					// page is used by a big allocation, find the page count
497					uint32 pageCount = 1;
498					while (i + pageCount < area->page_count
499						&& area->page_table[i + pageCount].in_use
500						&& area->page_table[i + pageCount].bin_index
501							== heap->bin_count
502						&& area->page_table[i + pageCount].allocation_id
503							== page->allocation_id)
504						pageCount++;
505
506					size_t size = pageCount * heap->page_size;
507
508					if (!statsOnly) {
509						kprintf("address: %p; size: %lu bytes\n", (void *)base,
510							size);
511					}
512
513					totalSize += size;
514					totalCount++;
515
516					// skip the allocated pages
517					i += pageCount - 1;
518				}
519			}
520
521			area = area->all_next;
522		}
523
524		if (heapAddress != 0)
525			break;
526	}
527
528	kprintf("total allocations: %" B_PRIu32 "; total bytes: %zu\n", totalCount, totalSize);
529	return 0;
530}
531
532#else // !KERNEL_HEAP_LEAK_CHECK
533
534static int
535dump_allocations(int argc, char **argv)
536{
537	team_id team = -1;
538	thread_id thread = -1;
539	addr_t caller = 0;
540	addr_t address = 0;
541	bool statsOnly = false;
542
543	for (int32 i = 1; i < argc; i++) {
544		if (strcmp(argv[i], "team") == 0)
545			team = parse_expression(argv[++i]);
546		else if (strcmp(argv[i], "thread") == 0)
547			thread = parse_expression(argv[++i]);
548		else if (strcmp(argv[i], "caller") == 0)
549			caller = parse_expression(argv[++i]);
550		else if (strcmp(argv[i], "address") == 0)
551			address = parse_expression(argv[++i]);
552		else if (strcmp(argv[i], "stats") == 0)
553			statsOnly = true;
554		else {
555			print_debugger_command_usage(argv[0]);
556			return 0;
557		}
558	}
559
560	size_t totalSize = 0;
561	uint32 totalCount = 0;
562	for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
563		heap_allocator *heap = sHeaps[heapIndex];
564
565		// go through all the pages in all the areas
566		heap_area *area = heap->all_areas;
567		while (area) {
568			heap_leak_check_info *info = NULL;
569			for (uint32 i = 0; i < area->page_count; i++) {
570				heap_page *page = &area->page_table[i];
571				if (!page->in_use)
572					continue;
573
574				addr_t base = area->base + i * heap->page_size;
575				if (page->bin_index < heap->bin_count) {
576					// page is used by a small allocation bin
577					uint32 elementCount = page->empty_index;
578					size_t elementSize
579						= heap->bins[page->bin_index].element_size;
580					for (uint32 j = 0; j < elementCount;
581							j++, base += elementSize) {
582						// walk the free list to see if this element is in use
583						bool elementInUse = true;
584						for (addr_t *temp = page->free_list; temp != NULL;
585								temp = (addr_t *)*temp) {
586							if ((addr_t)temp == base) {
587								elementInUse = false;
588								break;
589							}
590						}
591
592						if (!elementInUse)
593							continue;
594
595						info = (heap_leak_check_info *)(base + elementSize
596							- sizeof(heap_leak_check_info));
597
598						if ((team == -1 || info->team == team)
599							&& (thread == -1 || info->thread == thread)
600							&& (caller == 0 || info->caller == caller)
601							&& (address == 0 || base == address)) {
602							// interesting...
603							if (!statsOnly) {
604								kprintf("team: % 6" B_PRId32 "; thread: % 6" B_PRId32 "; "
605									"address: 0x%08lx; size: %lu bytes; "
606									"caller: %#lx\n", info->team, info->thread,
607									base, info->size, info->caller);
608							}
609
610							totalSize += info->size;
611							totalCount++;
612						}
613					}
614				} else {
615					// page is used by a big allocation, find the page count
616					uint32 pageCount = 1;
617					while (i + pageCount < area->page_count
618						&& area->page_table[i + pageCount].in_use
619						&& area->page_table[i + pageCount].bin_index
620							== heap->bin_count
621						&& area->page_table[i + pageCount].allocation_id
622							== page->allocation_id)
623						pageCount++;
624
625					info = (heap_leak_check_info *)(base + pageCount
626						* heap->page_size - sizeof(heap_leak_check_info));
627
628					if ((team == -1 || info->team == team)
629						&& (thread == -1 || info->thread == thread)
630						&& (caller == 0 || info->caller == caller)
631						&& (address == 0 || base == address)) {
632						// interesting...
633						if (!statsOnly) {
634							kprintf("team: % 6" B_PRId32 "; thread: % 6" B_PRId32 ";"
635								" address: 0x%08lx; size: %lu bytes;"
636								" caller: %#lx\n", info->team, info->thread,
637								base, info->size, info->caller);
638						}
639
640						totalSize += info->size;
641						totalCount++;
642					}
643
644					// skip the allocated pages
645					i += pageCount - 1;
646				}
647			}
648
649			area = area->all_next;
650		}
651	}
652
653	kprintf("total allocations: %" B_PRIu32 "; total bytes: %" B_PRIuSIZE "\n",
654		totalCount, totalSize);
655	return 0;
656}
657
658
659static caller_info*
660get_caller_info(addr_t caller)
661{
662	// find the caller info
663	for (int32 i = 0; i < sCallerInfoCount; i++) {
664		if (caller == sCallerInfoTable[i].caller)
665			return &sCallerInfoTable[i];
666	}
667
668	// not found, add a new entry, if there are free slots
669	if (sCallerInfoCount >= kCallerInfoTableSize)
670		return NULL;
671
672	caller_info* info = &sCallerInfoTable[sCallerInfoCount++];
673	info->caller = caller;
674	info->count = 0;
675	info->size = 0;
676
677	return info;
678}
679
680
681static int
682caller_info_compare_size(const void* _a, const void* _b)
683{
684	const caller_info* a = (const caller_info*)_a;
685	const caller_info* b = (const caller_info*)_b;
686	return (int)(b->size - a->size);
687}
688
689
690static int
691caller_info_compare_count(const void* _a, const void* _b)
692{
693	const caller_info* a = (const caller_info*)_a;
694	const caller_info* b = (const caller_info*)_b;
695	return (int)(b->count - a->count);
696}
697
698
699static bool
700analyze_allocation_callers(heap_allocator *heap)
701{
702	// go through all the pages in all the areas
703	heap_area *area = heap->all_areas;
704	while (area) {
705		heap_leak_check_info *info = NULL;
706		for (uint32 i = 0; i < area->page_count; i++) {
707			heap_page *page = &area->page_table[i];
708			if (!page->in_use)
709				continue;
710
711			addr_t base = area->base + i * heap->page_size;
712			if (page->bin_index < heap->bin_count) {
713				// page is used by a small allocation bin
714				uint32 elementCount = page->empty_index;
715				size_t elementSize = heap->bins[page->bin_index].element_size;
716				for (uint32 j = 0; j < elementCount; j++, base += elementSize) {
717					// walk the free list to see if this element is in use
718					bool elementInUse = true;
719					for (addr_t *temp = page->free_list; temp != NULL;
720						temp = (addr_t *)*temp) {
721						if ((addr_t)temp == base) {
722							elementInUse = false;
723							break;
724						}
725					}
726
727					if (!elementInUse)
728						continue;
729
730					info = (heap_leak_check_info *)(base + elementSize
731						- sizeof(heap_leak_check_info));
732
733					caller_info *callerInfo = get_caller_info(info->caller);
734					if (callerInfo == NULL) {
735						kprintf("out of space for caller infos\n");
736						return false;
737					}
738
739					callerInfo->count++;
740					callerInfo->size += info->size;
741				}
742			} else {
743				// page is used by a big allocation, find the page count
744				uint32 pageCount = 1;
745				while (i + pageCount < area->page_count
746					&& area->page_table[i + pageCount].in_use
747					&& area->page_table[i + pageCount].bin_index
748						== heap->bin_count
749					&& area->page_table[i + pageCount].allocation_id
750						== page->allocation_id) {
751					pageCount++;
752				}
753
754				info = (heap_leak_check_info *)(base + pageCount
755					* heap->page_size - sizeof(heap_leak_check_info));
756
757				caller_info *callerInfo = get_caller_info(info->caller);
758				if (callerInfo == NULL) {
759					kprintf("out of space for caller infos\n");
760					return false;
761				}
762
763				callerInfo->count++;
764				callerInfo->size += info->size;
765
766				// skip the allocated pages
767				i += pageCount - 1;
768			}
769		}
770
771		area = area->all_next;
772	}
773
774	return true;
775}
776
777
778static int
779dump_allocations_per_caller(int argc, char **argv)
780{
781	bool sortBySize = true;
782	heap_allocator *heap = NULL;
783
784	for (int32 i = 1; i < argc; i++) {
785		if (strcmp(argv[i], "-c") == 0) {
786			sortBySize = false;
787		} else if (strcmp(argv[i], "-h") == 0) {
788			uint64 heapAddress;
789			if (++i >= argc
790				|| !evaluate_debug_expression(argv[i], &heapAddress, true)) {
791				print_debugger_command_usage(argv[0]);
792				return 0;
793			}
794
795			heap = (heap_allocator*)(addr_t)heapAddress;
796		} else {
797			print_debugger_command_usage(argv[0]);
798			return 0;
799		}
800	}
801
802	sCallerInfoCount = 0;
803
804	if (heap != NULL) {
805		if (!analyze_allocation_callers(heap))
806			return 0;
807	} else {
808		for (uint32 heapIndex = 0; heapIndex < sHeapCount; heapIndex++) {
809			if (!analyze_allocation_callers(sHeaps[heapIndex]))
810				return 0;
811		}
812	}
813
814	// sort the array
815	qsort(sCallerInfoTable, sCallerInfoCount, sizeof(caller_info),
816		sortBySize ? &caller_info_compare_size : &caller_info_compare_count);
817
818	kprintf("%" B_PRId32 " different callers, sorted by %s...\n\n",
819		sCallerInfoCount, sortBySize ? "size" : "count");
820
821	kprintf("     count        size      caller\n");
822	kprintf("----------------------------------\n");
823	for (int32 i = 0; i < sCallerInfoCount; i++) {
824		caller_info& info = sCallerInfoTable[i];
825		kprintf("%10" B_PRId32 "  %10" B_PRId32 "  %#08lx", info.count, info.size, info.caller);
826
827		const char *symbol;
828		const char *imageName;
829		bool exactMatch;
830		addr_t baseAddress;
831
832		if (elf_debug_lookup_symbol_address(info.caller, &baseAddress, &symbol,
833				&imageName, &exactMatch) == B_OK) {
834			kprintf("  %s + 0x%lx (%s)%s\n", symbol,
835				info.caller - baseAddress, imageName,
836				exactMatch ? "" : " (nearest)");
837		} else
838			kprintf("\n");
839	}
840
841	return 0;
842}
843
844#endif // KERNEL_HEAP_LEAK_CHECK
845
846
847#if PARANOID_HEAP_VALIDATION
848static void
849heap_validate_heap(heap_allocator *heap)
850{
851	ReadLocker areaReadLocker(heap->area_lock);
852	for (uint32 i = 0; i < heap->bin_count; i++)
853		mutex_lock(&heap->bins[i].lock);
854	MutexLocker pageLocker(heap->page_lock);
855
856	uint32 totalPageCount = 0;
857	uint32 totalFreePageCount = 0;
858	heap_area *area = heap->all_areas;
859	while (area != NULL) {
860		// validate the free pages list
861		uint32 freePageCount = 0;
862		heap_page *lastPage = NULL;
863		heap_page *page = area->free_pages;
864		while (page) {
865			if ((addr_t)page < (addr_t)&area->page_table[0]
866				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
867				panic("free page is not part of the page table\n");
868
869			if (page->index >= area->page_count)
870				panic("free page has invalid index\n");
871
872			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
873				panic("free page index does not lead to target page\n");
874
875			if (page->prev != lastPage)
876				panic("free page entry has invalid prev link\n");
877
878			if (page->in_use)
879				panic("free page marked as in use\n");
880
881			lastPage = page;
882			page = page->next;
883			freePageCount++;
884		}
885
886		totalPageCount += freePageCount;
887		totalFreePageCount += freePageCount;
888		if (area->free_page_count != freePageCount)
889			panic("free page count doesn't match free page list\n");
890
891		// validate the page table
892		uint32 usedPageCount = 0;
893		for (uint32 i = 0; i < area->page_count; i++) {
894			if (area->page_table[i].in_use)
895				usedPageCount++;
896		}
897
898		totalPageCount += usedPageCount;
899		if (freePageCount + usedPageCount != area->page_count) {
900			panic("free pages and used pages do not add up (%lu + %lu != %lu)\n",
901				freePageCount, usedPageCount, area->page_count);
902		}
903
904		area = area->all_next;
905	}
906
907	// validate the areas
908	area = heap->areas;
909	heap_area *lastArea = NULL;
910	uint32 lastFreeCount = 0;
911	while (area != NULL) {
912		if (area->free_page_count < lastFreeCount)
913			panic("size ordering of area list broken\n");
914
915		if (area->prev != lastArea)
916			panic("area list entry has invalid prev link\n");
917
918		lastArea = area;
919		lastFreeCount = area->free_page_count;
920		area = area->next;
921	}
922
923	lastArea = NULL;
924	area = heap->all_areas;
925	while (area != NULL) {
926		if (lastArea != NULL && lastArea->base < area->base)
927			panic("base ordering of all_areas list broken\n");
928
929		lastArea = area;
930		area = area->all_next;
931	}
932
933	// validate the bins
934	for (uint32 i = 0; i < heap->bin_count; i++) {
935		heap_bin *bin = &heap->bins[i];
936		heap_page *lastPage = NULL;
937		heap_page *page = bin->page_list;
938		lastFreeCount = 0;
939		while (page) {
940			area = heap->all_areas;
941			while (area) {
942				if (area == page->area)
943					break;
944				area = area->all_next;
945			}
946
947			if (area == NULL) {
948				panic("page area not present in area list\n");
949				page = page->next;
950				continue;
951			}
952
953			if ((addr_t)page < (addr_t)&area->page_table[0]
954				|| (addr_t)page >= (addr_t)&area->page_table[area->page_count])
955				panic("used page is not part of the page table\n");
956
957			if (page->index >= area->page_count)
958				panic("used page has invalid index\n");
959
960			if ((addr_t)&area->page_table[page->index] != (addr_t)page)
961				panic("used page index does not lead to target page\n");
962
963			if (page->prev != lastPage) {
964				panic("used page entry has invalid prev link (%p vs %p bin "
965					"%lu)\n", page->prev, lastPage, i);
966			}
967
968			if (!page->in_use)
969				panic("used page marked as not in use\n");
970
971			if (page->bin_index != i) {
972				panic("used page with bin index %u in page list of bin %lu\n",
973					page->bin_index, i);
974			}
975
976			if (page->free_count < lastFreeCount)
977				panic("ordering of bin page list broken\n");
978
979			// validate the free list
980			uint32 freeSlotsCount = 0;
981			addr_t *element = page->free_list;
982			addr_t pageBase = area->base + page->index * heap->page_size;
983			while (element) {
984				if ((addr_t)element < pageBase
985					|| (addr_t)element >= pageBase + heap->page_size)
986					panic("free list entry out of page range\n");
987
988				if (((addr_t)element - pageBase) % bin->element_size != 0)
989					panic("free list entry not on a element boundary\n");
990
991				element = (addr_t *)*element;
992				freeSlotsCount++;
993			}
994
995			uint32 slotCount = bin->max_free_count;
996			if (page->empty_index > slotCount) {
997				panic("empty index beyond slot count (%u with %lu slots)\n",
998					page->empty_index, slotCount);
999			}
1000
1001			freeSlotsCount += (slotCount - page->empty_index);
1002			if (freeSlotsCount > slotCount)
1003				panic("more free slots than fit into the page\n");
1004
1005			lastPage = page;
1006			lastFreeCount = page->free_count;
1007			page = page->next;
1008		}
1009	}
1010
1011	pageLocker.Unlock();
1012	for (uint32 i = 0; i < heap->bin_count; i++)
1013		mutex_unlock(&heap->bins[i].lock);
1014	areaReadLocker.Unlock();
1015}
1016#endif // PARANOID_HEAP_VALIDATION
1017
1018
1019// #pragma mark - Heap functions
1020
1021
1022void
1023heap_add_area(heap_allocator *heap, area_id areaID, addr_t base, size_t size)
1024{
1025	heap_area *area = (heap_area *)base;
1026	area->area = areaID;
1027
1028	base += sizeof(heap_area);
1029	size -= sizeof(heap_area);
1030
1031	uint32 pageCount = size / heap->page_size;
1032	size_t pageTableSize = pageCount * sizeof(heap_page);
1033	area->page_table = (heap_page *)base;
1034	base += pageTableSize;
1035	size -= pageTableSize;
1036
1037	// the rest is now actually usable memory (rounded to the next page)
1038	area->base = ROUNDUP(base, B_PAGE_SIZE);
1039	area->size = size & ~(B_PAGE_SIZE - 1);
1040
1041	// now we know the real page count
1042	pageCount = area->size / heap->page_size;
1043	area->page_count = pageCount;
1044
1045	// zero out the page table and fill in page indexes
1046	memset((void *)area->page_table, 0, pageTableSize);
1047	for (uint32 i = 0; i < pageCount; i++) {
1048		area->page_table[i].area = area;
1049		area->page_table[i].index = i;
1050	}
1051
1052	// add all pages up into the free pages list
1053	for (uint32 i = 1; i < pageCount; i++) {
1054		area->page_table[i - 1].next = &area->page_table[i];
1055		area->page_table[i].prev = &area->page_table[i - 1];
1056	}
1057	area->free_pages = &area->page_table[0];
1058	area->free_page_count = pageCount;
1059	area->page_table[0].prev = NULL;
1060	area->next = NULL;
1061
1062	WriteLocker areaWriteLocker(heap->area_lock);
1063	MutexLocker pageLocker(heap->page_lock);
1064	if (heap->areas == NULL) {
1065		// it's the only (empty) area in that heap
1066		area->prev = NULL;
1067		heap->areas = area;
1068	} else {
1069		// link in this area as the last one as it is completely empty
1070		heap_area *lastArea = heap->areas;
1071		while (lastArea->next != NULL)
1072			lastArea = lastArea->next;
1073
1074		lastArea->next = area;
1075		area->prev = lastArea;
1076	}
1077
1078	// insert this area in the all_areas list so it stays ordered by base
1079	if (heap->all_areas == NULL || heap->all_areas->base < area->base) {
1080		area->all_next = heap->all_areas;
1081		heap->all_areas = area;
1082	} else {
1083		heap_area *insert = heap->all_areas;
1084		while (insert->all_next && insert->all_next->base > area->base)
1085			insert = insert->all_next;
1086
1087		area->all_next = insert->all_next;
1088		insert->all_next = area;
1089	}
1090
1091	heap->total_pages += area->page_count;
1092	heap->total_free_pages += area->free_page_count;
1093
1094	if (areaID >= 0) {
1095		// this later on deletable area is yet empty - the empty count will be
1096		// decremented as soon as this area is used for the first time
1097		heap->empty_areas++;
1098	}
1099
1100	pageLocker.Unlock();
1101	areaWriteLocker.Unlock();
1102
1103	dprintf("heap_add_area: area %" B_PRId32 " added to %s heap %p - usable "
1104		"range %p - %p\n", area->area, heap->name, heap, (void *)area->base,
1105		(void *)(area->base + area->size));
1106}
1107
1108
1109static status_t
1110heap_remove_area(heap_allocator *heap, heap_area *area)
1111{
1112	if (area->free_page_count != area->page_count) {
1113		panic("tried removing heap area that has still pages in use");
1114		return B_ERROR;
1115	}
1116
1117	if (area->prev == NULL && area->next == NULL) {
1118		panic("tried removing the last non-full heap area");
1119		return B_ERROR;
1120	}
1121
1122	if (heap->areas == area)
1123		heap->areas = area->next;
1124	if (area->prev != NULL)
1125		area->prev->next = area->next;
1126	if (area->next != NULL)
1127		area->next->prev = area->prev;
1128
1129	if (heap->all_areas == area)
1130		heap->all_areas = area->all_next;
1131	else {
1132		heap_area *previous = heap->all_areas;
1133		while (previous) {
1134			if (previous->all_next == area) {
1135				previous->all_next = area->all_next;
1136				break;
1137			}
1138
1139			previous = previous->all_next;
1140		}
1141
1142		if (previous == NULL)
1143			panic("removing heap area that is not in all list");
1144	}
1145
1146	heap->total_pages -= area->page_count;
1147	heap->total_free_pages -= area->free_page_count;
1148
1149	dprintf("heap_remove_area: area %" B_PRId32 " with range %p - %p removed "
1150		"from %s heap %p\n", area->area, (void *)area->base,
1151		(void *)(area->base + area->size), heap->name, heap);
1152
1153	return B_OK;
1154}
1155
1156
1157heap_allocator *
1158heap_create_allocator(const char *name, addr_t base, size_t size,
1159	const heap_class *heapClass, bool allocateOnHeap)
1160{
1161	heap_allocator *heap;
1162	if (allocateOnHeap) {
1163		// allocate seperately on the heap
1164		heap = (heap_allocator *)malloc(sizeof(heap_allocator)
1165			+ sizeof(heap_bin) * MAX_BIN_COUNT);
1166	} else {
1167		// use up the first part of the area
1168		heap = (heap_allocator *)base;
1169		base += sizeof(heap_allocator);
1170		size -= sizeof(heap_allocator);
1171	}
1172
1173	heap->name = name;
1174	heap->page_size = heapClass->page_size;
1175	heap->total_pages = heap->total_free_pages = heap->empty_areas = 0;
1176	heap->areas = heap->all_areas = NULL;
1177	heap->bins = (heap_bin *)((addr_t)heap + sizeof(heap_allocator));
1178
1179#if KERNEL_HEAP_LEAK_CHECK
1180	heap->get_caller = &get_caller;
1181#endif
1182
1183	heap->bin_count = 0;
1184	size_t binSize = 0, lastSize = 0;
1185	uint32 count = heap->page_size / heapClass->min_bin_size;
1186	for (; count >= heapClass->min_count_per_page; count--, lastSize = binSize) {
1187		if (heap->bin_count >= MAX_BIN_COUNT)
1188			panic("heap configuration invalid - max bin count reached\n");
1189
1190		binSize = (heap->page_size / count) & ~(heapClass->bin_alignment - 1);
1191		if (binSize == lastSize)
1192			continue;
1193		if (heap->page_size - count * binSize > heapClass->max_waste_per_page)
1194			continue;
1195
1196		heap_bin *bin = &heap->bins[heap->bin_count];
1197		mutex_init(&bin->lock, "heap bin lock");
1198		bin->element_size = binSize;
1199		bin->max_free_count = heap->page_size / binSize;
1200		bin->page_list = NULL;
1201		heap->bin_count++;
1202	};
1203
1204	if (!allocateOnHeap) {
1205		base += heap->bin_count * sizeof(heap_bin);
1206		size -= heap->bin_count * sizeof(heap_bin);
1207	}
1208
1209	rw_lock_init(&heap->area_lock, "heap area rw lock");
1210	mutex_init(&heap->page_lock, "heap page lock");
1211
1212	heap_add_area(heap, -1, base, size);
1213	return heap;
1214}
1215
1216
1217static inline void
1218heap_free_pages_added(heap_allocator *heap, heap_area *area, uint32 pageCount)
1219{
1220	area->free_page_count += pageCount;
1221	heap->total_free_pages += pageCount;
1222
1223	if (area->free_page_count == pageCount) {
1224		// we need to add ourselfs to the area list of the heap
1225		area->prev = NULL;
1226		area->next = heap->areas;
1227		if (area->next)
1228			area->next->prev = area;
1229		heap->areas = area;
1230	} else {
1231		// we might need to move back in the area list
1232		if (area->next && area->next->free_page_count < area->free_page_count) {
1233			// move ourselfs so the list stays ordered
1234			heap_area *insert = area->next;
1235			while (insert->next
1236				&& insert->next->free_page_count < area->free_page_count)
1237				insert = insert->next;
1238
1239			if (area->prev)
1240				area->prev->next = area->next;
1241			if (area->next)
1242				area->next->prev = area->prev;
1243			if (heap->areas == area)
1244				heap->areas = area->next;
1245
1246			area->prev = insert;
1247			area->next = insert->next;
1248			if (area->next)
1249				area->next->prev = area;
1250			insert->next = area;
1251		}
1252	}
1253
1254	if (area->free_page_count == area->page_count && area->area >= 0)
1255		heap->empty_areas++;
1256}
1257
1258
1259static inline void
1260heap_free_pages_removed(heap_allocator *heap, heap_area *area, uint32 pageCount)
1261{
1262	if (area->free_page_count == area->page_count && area->area >= 0) {
1263		// this area was completely empty
1264		heap->empty_areas--;
1265	}
1266
1267	area->free_page_count -= pageCount;
1268	heap->total_free_pages -= pageCount;
1269
1270	if (area->free_page_count == 0) {
1271		// the area is now full so we remove it from the area list
1272		if (area->prev)
1273			area->prev->next = area->next;
1274		if (area->next)
1275			area->next->prev = area->prev;
1276		if (heap->areas == area)
1277			heap->areas = area->next;
1278		area->next = area->prev = NULL;
1279	} else {
1280		// we might need to move forward in the area list
1281		if (area->prev && area->prev->free_page_count > area->free_page_count) {
1282			// move ourselfs so the list stays ordered
1283			heap_area *insert = area->prev;
1284			while (insert->prev
1285				&& insert->prev->free_page_count > area->free_page_count)
1286				insert = insert->prev;
1287
1288			if (area->prev)
1289				area->prev->next = area->next;
1290			if (area->next)
1291				area->next->prev = area->prev;
1292
1293			area->prev = insert->prev;
1294			area->next = insert;
1295			if (area->prev)
1296				area->prev->next = area;
1297			if (heap->areas == insert)
1298				heap->areas = area;
1299			insert->prev = area;
1300		}
1301	}
1302}
1303
1304
1305static inline void
1306heap_link_page(heap_page *page, heap_page **list)
1307{
1308	page->prev = NULL;
1309	page->next = *list;
1310	if (page->next)
1311		page->next->prev = page;
1312	*list = page;
1313}
1314
1315
1316static inline void
1317heap_unlink_page(heap_page *page, heap_page **list)
1318{
1319	if (page->prev)
1320		page->prev->next = page->next;
1321	if (page->next)
1322		page->next->prev = page->prev;
1323	if (list && *list == page) {
1324		*list = page->next;
1325		if (page->next)
1326			page->next->prev = NULL;
1327	}
1328}
1329
1330
1331static heap_page *
1332heap_allocate_contiguous_pages(heap_allocator *heap, uint32 pageCount,
1333	size_t alignment)
1334{
1335	MutexLocker pageLocker(heap->page_lock);
1336	heap_area *area = heap->areas;
1337	while (area) {
1338		if (area->free_page_count < pageCount) {
1339			area = area->next;
1340			continue;
1341		}
1342
1343		uint32 step = 1;
1344		uint32 firstValid = 0;
1345		const uint32 lastValid = area->page_count - pageCount + 1;
1346
1347		if (alignment > heap->page_size) {
1348			firstValid = (ROUNDUP(area->base, alignment) - area->base)
1349				/ heap->page_size;
1350			step = alignment / heap->page_size;
1351		}
1352
1353		int32 first = -1;
1354		for (uint32 i = firstValid; i < lastValid; i += step) {
1355			if (area->page_table[i].in_use)
1356				continue;
1357
1358			first = i;
1359
1360			for (uint32 j = 1; j < pageCount; j++) {
1361				if (area->page_table[i + j].in_use) {
1362					first = -1;
1363					i += j / step * step;
1364					break;
1365				}
1366			}
1367
1368			if (first >= 0)
1369				break;
1370		}
1371
1372		if (first < 0) {
1373			area = area->next;
1374			continue;
1375		}
1376
1377		for (uint32 i = first; i < first + pageCount; i++) {
1378			heap_page *page = &area->page_table[i];
1379			page->in_use = 1;
1380			page->bin_index = heap->bin_count;
1381
1382			heap_unlink_page(page, &area->free_pages);
1383
1384			page->next = page->prev = NULL;
1385			page->free_list = NULL;
1386			page->allocation_id = (uint16)first;
1387		}
1388
1389		heap_free_pages_removed(heap, area, pageCount);
1390		return &area->page_table[first];
1391	}
1392
1393	return NULL;
1394}
1395
1396
1397#if KERNEL_HEAP_LEAK_CHECK
1398static void
1399heap_add_leak_check_info(heap_allocator *heap, addr_t address, size_t allocated,
1400	size_t size)
1401{
1402	heap_leak_check_info *info = (heap_leak_check_info *)(address + allocated
1403		- sizeof(heap_leak_check_info));
1404	info->size = size - sizeof(heap_leak_check_info);
1405	info->thread = (gKernelStartup ? 0 : thread_get_current_thread_id());
1406	info->team = (gKernelStartup ? 0 : team_get_current_team_id());
1407	info->caller = heap->get_caller();
1408}
1409#endif
1410
1411
1412static void *
1413heap_raw_alloc(heap_allocator *heap, size_t size, size_t alignment)
1414{
1415	TRACE(("heap %p: allocate %lu bytes from raw pages with alignment %lu\n",
1416		heap, size, alignment));
1417
1418	uint32 pageCount = (size + heap->page_size - 1) / heap->page_size;
1419	heap_page *firstPage = heap_allocate_contiguous_pages(heap, pageCount,
1420		alignment);
1421	if (firstPage == NULL) {
1422		TRACE(("heap %p: found no contiguous pages to allocate %ld bytes\n",
1423			heap, size));
1424		return NULL;
1425	}
1426
1427	addr_t address = firstPage->area->base + firstPage->index * heap->page_size;
1428#if KERNEL_HEAP_LEAK_CHECK
1429	heap_add_leak_check_info(heap, address, pageCount * heap->page_size, size);
1430#endif
1431	return (void *)address;
1432}
1433
1434
1435static void *
1436heap_allocate_from_bin(heap_allocator *heap, uint32 binIndex, size_t size)
1437{
1438	heap_bin *bin = &heap->bins[binIndex];
1439	TRACE(("heap %p: allocate %lu bytes from bin %lu with element_size %lu\n",
1440		heap, size, binIndex, bin->element_size));
1441
1442	MutexLocker binLocker(bin->lock);
1443	heap_page *page = bin->page_list;
1444	if (page == NULL) {
1445		MutexLocker pageLocker(heap->page_lock);
1446		heap_area *area = heap->areas;
1447		if (area == NULL) {
1448			TRACE(("heap %p: no free pages to allocate %lu bytes\n", heap,
1449				size));
1450			return NULL;
1451		}
1452
1453		// by design there are only areas in the list that still have
1454		// free pages available
1455		page = area->free_pages;
1456		area->free_pages = page->next;
1457		if (page->next)
1458			page->next->prev = NULL;
1459
1460		heap_free_pages_removed(heap, area, 1);
1461
1462		if (page->in_use)
1463			panic("got an in use page %p from the free pages list\n", page);
1464		page->in_use = 1;
1465
1466		pageLocker.Unlock();
1467
1468		page->bin_index = binIndex;
1469		page->free_count = bin->max_free_count;
1470		page->empty_index = 0;
1471		page->free_list = NULL;
1472		page->next = page->prev = NULL;
1473		bin->page_list = page;
1474	}
1475
1476	// we have a page where we have a free slot
1477	void *address = NULL;
1478	if (page->free_list) {
1479		// there's a previously freed entry we can use
1480		address = page->free_list;
1481		page->free_list = (addr_t *)*page->free_list;
1482	} else {
1483		// the page hasn't been fully allocated so use the next empty_index
1484		address = (void *)(page->area->base + page->index * heap->page_size
1485			+ page->empty_index * bin->element_size);
1486		page->empty_index++;
1487	}
1488
1489	page->free_count--;
1490	if (page->free_count == 0) {
1491		// the page is now full so we remove it from the page_list
1492		bin->page_list = page->next;
1493		if (page->next)
1494			page->next->prev = NULL;
1495		page->next = page->prev = NULL;
1496	}
1497
1498#if KERNEL_HEAP_LEAK_CHECK
1499	binLocker.Unlock();
1500	heap_add_leak_check_info(heap, (addr_t)address, bin->element_size, size);
1501#endif
1502	return address;
1503}
1504
1505
1506static bool
1507is_valid_alignment(size_t number)
1508{
1509	// this cryptic line accepts zero and all powers of two
1510	return ((~number + 1) | ((number << 1) - 1)) == ~0UL;
1511}
1512
1513
1514inline bool
1515heap_should_grow(heap_allocator *heap)
1516{
1517	// suggest growing if there is less than 20% of a grow size available
1518	return heap->total_free_pages * heap->page_size < HEAP_GROW_SIZE / 5;
1519}
1520
1521
1522void *
1523heap_memalign(heap_allocator *heap, size_t alignment, size_t size)
1524{
1525	TRACE(("memalign(alignment = %lu, size = %lu)\n", alignment, size));
1526
1527#if DEBUG
1528	if (!is_valid_alignment(alignment))
1529		panic("memalign() with an alignment which is not a power of 2\n");
1530#endif
1531
1532#if KERNEL_HEAP_LEAK_CHECK
1533	size += sizeof(heap_leak_check_info);
1534#endif
1535
1536	void *address = NULL;
1537	if (alignment < B_PAGE_SIZE) {
1538		if (alignment != 0) {
1539			// TODO: The alignment is done by ensuring that the element size
1540			// of the target bin is aligned with the requested alignment. This
1541			// has the problem that it wastes space because a better (smaller)
1542			// bin could possibly be selected. We should pick the best bin and
1543			// check if there is an aligned block in the free list or if a new
1544			// (page aligned) page has to be allocated anyway.
1545			size = ROUNDUP(size, alignment);
1546			for (uint32 i = 0; i < heap->bin_count; i++) {
1547				if (size <= heap->bins[i].element_size
1548					&& is_valid_alignment(heap->bins[i].element_size)) {
1549					address = heap_allocate_from_bin(heap, i, size);
1550					break;
1551				}
1552			}
1553		} else {
1554			for (uint32 i = 0; i < heap->bin_count; i++) {
1555				if (size <= heap->bins[i].element_size) {
1556					address = heap_allocate_from_bin(heap, i, size);
1557					break;
1558				}
1559			}
1560		}
1561	}
1562
1563	if (address == NULL)
1564		address = heap_raw_alloc(heap, size, alignment);
1565
1566#if KERNEL_HEAP_LEAK_CHECK
1567	size -= sizeof(heap_leak_check_info);
1568#endif
1569
1570	TRACE(("memalign(): asked to allocate %lu bytes, returning pointer %p\n",
1571		size, address));
1572
1573	T(Allocate((addr_t)address, size));
1574	if (address == NULL)
1575		return address;
1576
1577#if PARANOID_KERNEL_MALLOC
1578	memset(address, 0xcc, size);
1579#endif
1580
1581#if PARANOID_KERNEL_FREE
1582	// make sure 0xdeadbeef is cleared if we do not overwrite the memory
1583	// and the user does not clear it
1584	if (((uint32 *)address)[1] == 0xdeadbeef)
1585		((uint32 *)address)[1] = 0xcccccccc;
1586#endif
1587
1588	return address;
1589}
1590
1591
1592status_t
1593heap_free(heap_allocator *heap, void *address)
1594{
1595	if (address == NULL)
1596		return B_OK;
1597
1598	ReadLocker areaReadLocker(heap->area_lock);
1599	heap_area *area = heap->all_areas;
1600	while (area) {
1601		// since the all_areas list is ordered by base with the biggest
1602		// base at the top, we need only find the first area with a base
1603		// smaller than our address to become our only candidate for freeing
1604		if (area->base <= (addr_t)address) {
1605			if ((addr_t)address >= area->base + area->size) {
1606				// none of the other areas can contain the address as the list
1607				// is ordered
1608				return B_ENTRY_NOT_FOUND;
1609			}
1610
1611			// this area contains the allocation, we're done searching
1612			break;
1613		}
1614
1615		area = area->all_next;
1616	}
1617
1618	if (area == NULL) {
1619		// this address does not belong to us
1620		return B_ENTRY_NOT_FOUND;
1621	}
1622
1623	TRACE(("free(): asked to free pointer %p\n", address));
1624
1625	heap_page *page = &area->page_table[((addr_t)address - area->base)
1626		/ heap->page_size];
1627
1628	TRACE(("free(): page %p: bin_index %d, free_count %d\n", page,
1629		page->bin_index, page->free_count));
1630
1631	if (page->bin_index > heap->bin_count) {
1632		panic("free(): page %p: invalid bin_index %d\n", page, page->bin_index);
1633		return B_ERROR;
1634	}
1635
1636	if (page->bin_index < heap->bin_count) {
1637		// small allocation
1638		heap_bin *bin = &heap->bins[page->bin_index];
1639
1640#if PARANOID_KERNEL_FREE
1641		if (((uint32 *)address)[1] == 0xdeadbeef) {
1642			// This block looks like it was freed already, walk the free list
1643			// on this page to make sure this address doesn't exist.
1644			MutexLocker binLocker(bin->lock);
1645			for (addr_t *temp = page->free_list; temp != NULL;
1646					temp = (addr_t *)*temp) {
1647				if (temp == address) {
1648					panic("free(): address %p already exists in page free "
1649						"list\n", address);
1650					return B_ERROR;
1651				}
1652			}
1653		}
1654
1655		// the first 4 bytes are overwritten with the next free list pointer
1656		// later
1657		uint32 *dead = (uint32 *)address;
1658		for (uint32 i = 1; i < bin->element_size / sizeof(uint32); i++)
1659			dead[i] = 0xdeadbeef;
1660#endif
1661
1662		MutexLocker binLocker(bin->lock);
1663		if (((addr_t)address - area->base - page->index
1664			* heap->page_size) % bin->element_size != 0) {
1665			panic("free(): passed invalid pointer %p supposed to be in bin for "
1666				"element size %" B_PRIu32 "\n", address, bin->element_size);
1667			return B_ERROR;
1668		}
1669
1670		// add the address to the page free list
1671		*(addr_t *)address = (addr_t)page->free_list;
1672		page->free_list = (addr_t *)address;
1673		page->free_count++;
1674
1675		if (page->free_count == bin->max_free_count) {
1676			// we are now empty, remove the page from the bin list
1677			MutexLocker pageLocker(heap->page_lock);
1678			heap_unlink_page(page, &bin->page_list);
1679			page->in_use = 0;
1680			heap_link_page(page, &area->free_pages);
1681			heap_free_pages_added(heap, area, 1);
1682		} else if (page->free_count == 1) {
1683			// we need to add ourselfs to the page list of the bin
1684			heap_link_page(page, &bin->page_list);
1685		} else {
1686			// we might need to move back in the free pages list
1687			if (page->next && page->next->free_count < page->free_count) {
1688				// move ourselfs so the list stays ordered
1689				heap_page *insert = page->next;
1690				while (insert->next
1691					&& insert->next->free_count < page->free_count)
1692					insert = insert->next;
1693
1694				heap_unlink_page(page, &bin->page_list);
1695
1696				page->prev = insert;
1697				page->next = insert->next;
1698				if (page->next)
1699					page->next->prev = page;
1700				insert->next = page;
1701			}
1702		}
1703	} else {
1704		// large allocation, just return the pages to the page free list
1705		uint32 allocationID = page->allocation_id;
1706		uint32 maxPages = area->page_count - page->index;
1707		uint32 pageCount = 0;
1708
1709		MutexLocker pageLocker(heap->page_lock);
1710		for (uint32 i = 0; i < maxPages; i++) {
1711			// loop until we find the end of this allocation
1712			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1713				|| page[i].allocation_id != allocationID)
1714				break;
1715
1716			// this page still belongs to the same allocation
1717			page[i].in_use = 0;
1718			page[i].allocation_id = 0;
1719
1720			// return it to the free list
1721			heap_link_page(&page[i], &area->free_pages);
1722			pageCount++;
1723		}
1724
1725		heap_free_pages_added(heap, area, pageCount);
1726	}
1727
1728	T(Free((addr_t)address));
1729	areaReadLocker.Unlock();
1730
1731	if (heap->empty_areas > 1) {
1732		WriteLocker areaWriteLocker(heap->area_lock);
1733		MutexLocker pageLocker(heap->page_lock);
1734
1735		area_id areasToDelete[heap->empty_areas - 1];
1736		int32 areasToDeleteIndex = 0;
1737
1738		area = heap->areas;
1739		while (area != NULL && heap->empty_areas > 1) {
1740			heap_area *next = area->next;
1741			if (area->area >= 0
1742				&& area->free_page_count == area->page_count
1743				&& heap_remove_area(heap, area) == B_OK) {
1744				areasToDelete[areasToDeleteIndex++] = area->area;
1745				heap->empty_areas--;
1746			}
1747
1748			area = next;
1749		}
1750
1751		pageLocker.Unlock();
1752		areaWriteLocker.Unlock();
1753
1754		for (int32 i = 0; i < areasToDeleteIndex; i++)
1755			delete_area(areasToDelete[i]);
1756	}
1757
1758	return B_OK;
1759}
1760
1761
1762#if KERNEL_HEAP_LEAK_CHECK
1763extern "C" void
1764heap_set_get_caller(heap_allocator* heap, addr_t (*getCaller)())
1765{
1766	heap->get_caller = getCaller;
1767}
1768#endif
1769
1770
1771#if USE_DEBUG_HEAP_FOR_MALLOC
1772
1773
1774static status_t
1775heap_realloc(heap_allocator *heap, void *address, void **newAddress,
1776	size_t newSize, uint32 flags)
1777{
1778	ReadLocker areaReadLocker(heap->area_lock);
1779	heap_area *area = heap->all_areas;
1780	while (area) {
1781		// since the all_areas list is ordered by base with the biggest
1782		// base at the top, we need only find the first area with a base
1783		// smaller than our address to become our only candidate for
1784		// reallocating
1785		if (area->base <= (addr_t)address) {
1786			if ((addr_t)address >= area->base + area->size) {
1787				// none of the other areas can contain the address as the list
1788				// is ordered
1789				return B_ENTRY_NOT_FOUND;
1790			}
1791
1792			// this area contains the allocation, we're done searching
1793			break;
1794		}
1795
1796		area = area->all_next;
1797	}
1798
1799	if (area == NULL) {
1800		// this address does not belong to us
1801		return B_ENTRY_NOT_FOUND;
1802	}
1803
1804	TRACE(("realloc(address = %p, newSize = %lu)\n", address, newSize));
1805
1806	heap_page *page = &area->page_table[((addr_t)address - area->base)
1807		/ heap->page_size];
1808	if (page->bin_index > heap->bin_count) {
1809		panic("realloc(): page %p: invalid bin_index %d\n", page,
1810			page->bin_index);
1811		return B_ERROR;
1812	}
1813
1814	// find out the size of the old allocation first
1815	size_t minSize = 0;
1816	size_t maxSize = 0;
1817	if (page->bin_index < heap->bin_count) {
1818		// this was a small allocation
1819		heap_bin *bin = &heap->bins[page->bin_index];
1820		maxSize = bin->element_size;
1821		if (page->bin_index > 0)
1822			minSize = heap->bins[page->bin_index - 1].element_size + 1;
1823	} else {
1824		// this was a large allocation
1825		uint32 allocationID = page->allocation_id;
1826		uint32 maxPages = area->page_count - page->index;
1827		maxSize = heap->page_size;
1828
1829		MutexLocker pageLocker(heap->page_lock);
1830		for (uint32 i = 1; i < maxPages; i++) {
1831			if (!page[i].in_use || page[i].bin_index != heap->bin_count
1832				|| page[i].allocation_id != allocationID)
1833				break;
1834
1835			minSize += heap->page_size;
1836			maxSize += heap->page_size;
1837		}
1838	}
1839
1840	areaReadLocker.Unlock();
1841
1842#if KERNEL_HEAP_LEAK_CHECK
1843	newSize += sizeof(heap_leak_check_info);
1844#endif
1845
1846	// does the new allocation simply fit in the old allocation?
1847	if (newSize > minSize && newSize <= maxSize) {
1848#if KERNEL_HEAP_LEAK_CHECK
1849		// update the size info (the info is at the end so stays where it is)
1850		heap_leak_check_info *info = (heap_leak_check_info *)((addr_t)address
1851			+ maxSize - sizeof(heap_leak_check_info));
1852		info->size = newSize - sizeof(heap_leak_check_info);
1853		newSize -= sizeof(heap_leak_check_info);
1854#endif
1855
1856		T(Reallocate((addr_t)address, (addr_t)address, newSize));
1857		*newAddress = address;
1858		return B_OK;
1859	}
1860
1861#if KERNEL_HEAP_LEAK_CHECK
1862	// new leak check info will be created with the malloc below
1863	newSize -= sizeof(heap_leak_check_info);
1864#endif
1865
1866	// if not, allocate a new chunk of memory
1867	*newAddress = malloc_etc(newSize, flags);
1868	T(Reallocate((addr_t)address, (addr_t)*newAddress, newSize));
1869	if (*newAddress == NULL) {
1870		// we tried but it didn't work out, but still the operation is done
1871		return B_OK;
1872	}
1873
1874	// copy the old data and free the old allocation
1875	memcpy(*newAddress, address, min_c(maxSize, newSize));
1876	heap_free(heap, address);
1877	return B_OK;
1878}
1879
1880
1881inline uint32
1882heap_index_for(size_t size, int32 cpu)
1883{
1884#if KERNEL_HEAP_LEAK_CHECK
1885	// take the extra info size into account
1886	size += sizeof(heap_leak_check_info_s);
1887#endif
1888
1889	uint32 index = 0;
1890	for (; index < HEAP_CLASS_COUNT - 1; index++) {
1891		if (size <= sHeapClasses[index].max_allocation_size)
1892			break;
1893	}
1894
1895	return (index + cpu * HEAP_CLASS_COUNT) % sHeapCount;
1896}
1897
1898
1899static void *
1900memalign_nogrow(size_t alignment, size_t size)
1901{
1902	// use dedicated memory in the grow thread by default
1903	if (thread_get_current_thread_id() == sHeapGrowThread) {
1904		void *result = heap_memalign(sGrowHeap, alignment, size);
1905		if (!sAddGrowHeap && heap_should_grow(sGrowHeap)) {
1906			// hopefully the heap grower will manage to create a new heap
1907			// before running out of private memory...
1908			dprintf("heap: requesting new grow heap\n");
1909			sAddGrowHeap = true;
1910			release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
1911		}
1912
1913		if (result != NULL)
1914			return result;
1915	}
1916
1917	// try public memory, there might be something available
1918	void *result = NULL;
1919	int32 cpuCount = MIN(smp_get_num_cpus(),
1920		(int32)sHeapCount / HEAP_CLASS_COUNT);
1921	int32 cpuNumber = smp_get_current_cpu();
1922	for (int32 i = 0; i < cpuCount; i++) {
1923		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
1924		heap_allocator *heap = sHeaps[heapIndex];
1925		result = heap_memalign(heap, alignment, size);
1926		if (result != NULL)
1927			return result;
1928	}
1929
1930	// no memory available
1931	if (thread_get_current_thread_id() == sHeapGrowThread)
1932		panic("heap: all heaps have run out of memory while growing\n");
1933	else
1934		dprintf("heap: all heaps have run out of memory\n");
1935
1936	return NULL;
1937}
1938
1939
1940static status_t
1941heap_create_new_heap_area(heap_allocator *heap, const char *name, size_t size)
1942{
1943	void *address = NULL;
1944	area_id heapArea = create_area(name, &address,
1945		B_ANY_KERNEL_BLOCK_ADDRESS, size, B_FULL_LOCK,
1946		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1947	if (heapArea < B_OK) {
1948		TRACE(("heap: couldn't allocate heap area \"%s\"\n", name));
1949		return heapArea;
1950	}
1951
1952	heap_add_area(heap, heapArea, (addr_t)address, size);
1953#if PARANOID_HEAP_VALIDATION
1954	heap_validate_heap(heap);
1955#endif
1956	return B_OK;
1957}
1958
1959
1960static int32
1961heap_grow_thread(void *)
1962{
1963	while (true) {
1964		// wait for a request to grow the heap list
1965		if (acquire_sem(sHeapGrowSem) < B_OK)
1966			continue;
1967
1968		if (sAddGrowHeap) {
1969			// the grow heap is going to run full soon, try to allocate a new
1970			// one to make some room.
1971			TRACE(("heap_grower: grow heaps will run out of memory soon\n"));
1972			if (heap_create_new_heap_area(sGrowHeap, "additional grow heap",
1973					HEAP_DEDICATED_GROW_SIZE) != B_OK)
1974				dprintf("heap_grower: failed to create new grow heap area\n");
1975		}
1976
1977		for (uint32 i = 0; i < sHeapCount; i++) {
1978			heap_allocator *heap = sHeaps[i];
1979			if (sLastGrowRequest[i] > sLastHandledGrowRequest[i]
1980				|| heap_should_grow(heap)) {
1981				// grow this heap if it is nearly full or if a grow was
1982				// explicitly requested for this heap (happens when a large
1983				// allocation cannot be fulfilled due to lack of contiguous
1984				// pages)
1985				if (heap_create_new_heap_area(heap, "additional heap",
1986						HEAP_GROW_SIZE) != B_OK)
1987					dprintf("heap_grower: failed to create new heap area\n");
1988				sLastHandledGrowRequest[i] = sLastGrowRequest[i];
1989			}
1990		}
1991
1992		// notify anyone waiting for this request
1993		release_sem_etc(sHeapGrownNotify, -1, B_RELEASE_ALL);
1994	}
1995
1996	return 0;
1997}
1998
1999
2000#endif	// USE_DEBUG_HEAP_FOR_MALLOC
2001
2002
2003static void
2004deferred_deleter(void *arg, int iteration)
2005{
2006	// move entries and deletables to on-stack lists
2007	InterruptsSpinLocker locker(sDeferredFreeListLock);
2008	if (sDeferredFreeList.IsEmpty() && sDeferredDeletableList.IsEmpty())
2009		return;
2010
2011	DeferredFreeList entries;
2012	entries.MoveFrom(&sDeferredFreeList);
2013
2014	DeferredDeletableList deletables;
2015	deletables.MoveFrom(&sDeferredDeletableList);
2016
2017	locker.Unlock();
2018
2019	// free the entries
2020	while (DeferredFreeListEntry* entry = entries.RemoveHead())
2021		free(entry);
2022
2023	// delete the deletables
2024	while (DeferredDeletable* deletable = deletables.RemoveHead())
2025		delete deletable;
2026}
2027
2028
2029//	#pragma mark -
2030
2031
2032#if USE_DEBUG_HEAP_FOR_MALLOC
2033
2034
2035status_t
2036heap_init(addr_t base, size_t size)
2037{
2038	for (uint32 i = 0; i < HEAP_CLASS_COUNT; i++) {
2039		size_t partSize = size * sHeapClasses[i].initial_percentage / 100;
2040		sHeaps[i] = heap_create_allocator(sHeapClasses[i].name, base, partSize,
2041			&sHeapClasses[i], false);
2042		sLastGrowRequest[i] = sLastHandledGrowRequest[i] = 0;
2043		base += partSize;
2044		sHeapCount++;
2045	}
2046
2047	// set up some debug commands
2048	add_debugger_command_etc("heap", &dump_heap_list,
2049		"Dump infos about the kernel heap(s)",
2050		"[(\"grow\" | \"stats\" | <heap>)]\n"
2051		"Dump infos about the kernel heap(s). If \"grow\" is specified, only\n"
2052		"infos about the dedicated grow heap are printed. If \"stats\" is\n"
2053		"given as the argument, currently only the heap count is printed.\n"
2054		"If <heap> is given, it is interpreted as the address of the heap to\n"
2055		"print infos about.\n", 0);
2056#if !KERNEL_HEAP_LEAK_CHECK
2057	add_debugger_command_etc("allocations", &dump_allocations,
2058		"Dump current heap allocations",
2059		"[\"stats\"] [<heap>]\n"
2060		"If no parameters are given, all current alloactions are dumped.\n"
2061		"If the optional argument \"stats\" is specified, only the allocation\n"
2062		"counts and no individual allocations are printed\n"
2063		"If a specific heap address is given, only allocations of this\n"
2064		"allocator are dumped\n", 0);
2065#else // !KERNEL_HEAP_LEAK_CHECK
2066	add_debugger_command_etc("allocations", &dump_allocations,
2067		"Dump current heap allocations",
2068		"[(\"team\" | \"thread\") <id>] [\"caller\" <address>] [\"address\" <address>] [\"stats\"]\n"
2069		"If no parameters are given, all current alloactions are dumped.\n"
2070		"If \"team\", \"thread\", \"caller\", and/or \"address\" is specified as the first\n"
2071		"argument, only allocations matching the team ID, thread ID, caller\n"
2072		"address or allocated address given in the second argument are printed.\n"
2073		"If the optional argument \"stats\" is specified, only the allocation\n"
2074		"counts and no individual allocations are printed.\n", 0);
2075	add_debugger_command_etc("allocations_per_caller",
2076		&dump_allocations_per_caller,
2077		"Dump current heap allocations summed up per caller",
2078		"[ \"-c\" ] [ -h <heap> ]\n"
2079		"The current allocations will by summed up by caller (their count and\n"
2080		"size) printed in decreasing order by size or, if \"-c\" is\n"
2081		"specified, by allocation count. If given <heap> specifies the\n"
2082		"address of the heap for which to print the allocations.\n", 0);
2083#endif // KERNEL_HEAP_LEAK_CHECK
2084	return B_OK;
2085}
2086
2087
2088status_t
2089heap_init_post_area()
2090{
2091	void *address = NULL;
2092	area_id growHeapArea = create_area("dedicated grow heap", &address,
2093		B_ANY_KERNEL_BLOCK_ADDRESS, HEAP_DEDICATED_GROW_SIZE, B_FULL_LOCK,
2094		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2095	if (growHeapArea < 0) {
2096		panic("heap_init_post_area(): couldn't allocate dedicate grow heap "
2097			"area");
2098		return growHeapArea;
2099	}
2100
2101	sGrowHeap = heap_create_allocator("grow", (addr_t)address,
2102		HEAP_DEDICATED_GROW_SIZE, &sHeapClasses[0], false);
2103	if (sGrowHeap == NULL) {
2104		panic("heap_init_post_area(): failed to create dedicated grow heap\n");
2105		return B_ERROR;
2106	}
2107
2108	// create the VIP heap
2109	static const heap_class heapClass = {
2110		"VIP I/O",					/* name */
2111		100,						/* initial percentage */
2112		B_PAGE_SIZE / 8,			/* max allocation size */
2113		B_PAGE_SIZE,				/* page size */
2114		8,							/* min bin size */
2115		4,							/* bin alignment */
2116		8,							/* min count per page */
2117		16							/* max waste per page */
2118	};
2119
2120	area_id vipHeapArea = create_area("VIP heap", &address,
2121		B_ANY_KERNEL_ADDRESS, VIP_HEAP_SIZE, B_FULL_LOCK,
2122		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2123	if (vipHeapArea < 0) {
2124		panic("heap_init_post_area(): couldn't allocate VIP heap area");
2125		return B_ERROR;
2126	}
2127
2128	sVIPHeap = heap_create_allocator("VIP heap", (addr_t)address,
2129		VIP_HEAP_SIZE, &heapClass, false);
2130	if (sVIPHeap == NULL) {
2131		panic("heap_init_post_area(): failed to create VIP heap\n");
2132		return B_ERROR;
2133	}
2134
2135	dprintf("heap_init_post_area(): created VIP heap: %p\n", sVIPHeap);
2136
2137	return B_OK;
2138}
2139
2140
2141status_t
2142heap_init_post_sem()
2143{
2144	sHeapGrowSem = create_sem(0, "heap_grow_sem");
2145	if (sHeapGrowSem < 0) {
2146		panic("heap_init_post_sem(): failed to create heap grow sem\n");
2147		return B_ERROR;
2148	}
2149
2150	sHeapGrownNotify = create_sem(0, "heap_grown_notify");
2151	if (sHeapGrownNotify < 0) {
2152		panic("heap_init_post_sem(): failed to create heap grown notify sem\n");
2153		return B_ERROR;
2154	}
2155
2156	return B_OK;
2157}
2158
2159
2160#endif	// USE_DEBUG_HEAP_FOR_MALLOC
2161
2162
2163status_t
2164heap_init_post_thread()
2165{
2166#if	USE_DEBUG_HEAP_FOR_MALLOC
2167	sHeapGrowThread = spawn_kernel_thread(heap_grow_thread, "heap grower",
2168		B_URGENT_PRIORITY, NULL);
2169	if (sHeapGrowThread < 0) {
2170		panic("heap_init_post_thread(): cannot create heap grow thread\n");
2171		return sHeapGrowThread;
2172	}
2173
2174	// create per-cpu heaps if there's enough memory
2175	int32 heapCount = MIN(smp_get_num_cpus(),
2176		(int32)vm_page_num_pages() / 60 / 1024);
2177	for (int32 i = 1; i < heapCount; i++) {
2178		addr_t base = 0;
2179		size_t size = HEAP_GROW_SIZE * HEAP_CLASS_COUNT;
2180		area_id perCPUHeapArea = create_area("per cpu initial heap",
2181			(void **)&base, B_ANY_KERNEL_ADDRESS, size, B_FULL_LOCK,
2182			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2183		if (perCPUHeapArea < 0)
2184			break;
2185
2186		for (uint32 j = 0; j < HEAP_CLASS_COUNT; j++) {
2187			int32 heapIndex = i * HEAP_CLASS_COUNT + j;
2188			size_t partSize = size * sHeapClasses[j].initial_percentage / 100;
2189			sHeaps[heapIndex] = heap_create_allocator(sHeapClasses[j].name,
2190				base, partSize, &sHeapClasses[j], false);
2191			sLastGrowRequest[heapIndex] = 0;
2192			sLastHandledGrowRequest[heapIndex] = 0;
2193			base += partSize;
2194			sHeapCount++;
2195		}
2196	}
2197
2198	resume_thread(sHeapGrowThread);
2199
2200#else	// USE_DEBUG_HEAP_FOR_MALLOC
2201
2202	// set up some debug commands
2203	add_debugger_command_etc("heap", &dump_heap_list,
2204		"Dump infos about a specific heap",
2205		"[\"stats\"] <heap>\n"
2206		"Dump infos about the specified kernel heap. If \"stats\" is given\n"
2207		"as the argument, currently only the heap count is printed.\n", 0);
2208#if !KERNEL_HEAP_LEAK_CHECK
2209	add_debugger_command_etc("heap_allocations", &dump_allocations,
2210		"Dump current heap allocations",
2211		"[\"stats\"] <heap>\n"
2212		"If the optional argument \"stats\" is specified, only the allocation\n"
2213		"counts and no individual allocations are printed.\n", 0);
2214#endif	// KERNEL_HEAP_LEAK_CHECK
2215#endif	// !USE_DEBUG_HEAP_FOR_MALLOC
2216
2217	// run the deferred deleter roughly once a second
2218	if (register_kernel_daemon(deferred_deleter, NULL, 10) != B_OK)
2219		panic("heap_init_post_thread(): failed to init deferred deleter");
2220
2221	return B_OK;
2222}
2223
2224
2225//	#pragma mark - Public API
2226
2227
2228#if USE_DEBUG_HEAP_FOR_MALLOC
2229
2230
2231void *
2232memalign(size_t alignment, size_t size)
2233{
2234	if (!gKernelStartup && !are_interrupts_enabled()) {
2235		panic("memalign(): called with interrupts disabled\n");
2236		return NULL;
2237	}
2238
2239	if (!gKernelStartup && size > HEAP_AREA_USE_THRESHOLD) {
2240		// don't even attempt such a huge allocation - use areas instead
2241		size_t areaSize = ROUNDUP(size + sizeof(area_allocation_info)
2242			+ alignment, B_PAGE_SIZE);
2243		if (areaSize < size) {
2244			// the size overflowed
2245			return NULL;
2246		}
2247
2248		void *address = NULL;
2249		area_id allocationArea = create_area("memalign area", &address,
2250			B_ANY_KERNEL_BLOCK_ADDRESS, areaSize, B_FULL_LOCK,
2251			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
2252		if (allocationArea < B_OK) {
2253			dprintf("heap: failed to create area for huge allocation\n");
2254			return NULL;
2255		}
2256
2257		area_allocation_info *info = (area_allocation_info *)address;
2258		info->magic = kAreaAllocationMagic;
2259		info->area = allocationArea;
2260		info->base = address;
2261		info->size = areaSize;
2262		info->allocation_size = size;
2263		info->allocation_alignment = alignment;
2264
2265		address = (void *)((addr_t)address + sizeof(area_allocation_info));
2266		if (alignment != 0) {
2267			address = (void *)ROUNDUP((addr_t)address, alignment);
2268			ASSERT((addr_t)address % alignment == 0);
2269			ASSERT((addr_t)address + size - 1 < (addr_t)info + areaSize - 1);
2270		}
2271
2272		TRACE(("heap: allocated area %ld for huge allocation of %lu bytes\n",
2273			allocationArea, size));
2274
2275		info->allocation_base = address;
2276
2277#if PARANOID_KERNEL_MALLOC
2278		memset(address, 0xcc, size);
2279#endif
2280		return address;
2281	}
2282
2283	void *result = NULL;
2284	bool shouldGrow = false;
2285	int32 cpuCount = MIN(smp_get_num_cpus(),
2286		(int32)sHeapCount / HEAP_CLASS_COUNT);
2287	int32 cpuNumber = smp_get_current_cpu();
2288	for (int32 i = 0; i < cpuCount; i++) {
2289		uint32 heapIndex = heap_index_for(size, cpuNumber++ % cpuCount);
2290		heap_allocator *heap = sHeaps[heapIndex];
2291		result = heap_memalign(heap, alignment, size);
2292		if (result != NULL) {
2293			shouldGrow = heap_should_grow(heap);
2294			break;
2295		}
2296
2297#if PARANOID_HEAP_VALIDATION
2298		heap_validate_heap(heap);
2299#endif
2300	}
2301
2302	if (result == NULL) {
2303		// request an urgent grow and wait - we don't do it ourselfs here to
2304		// serialize growing through the grow thread, as otherwise multiple
2305		// threads hitting this situation (likely when memory ran out) would
2306		// all add areas
2307		uint32 heapIndex = heap_index_for(size, smp_get_current_cpu());
2308		sLastGrowRequest[heapIndex]++;
2309		switch_sem(sHeapGrowSem, sHeapGrownNotify);
2310
2311		// and then try again
2312		result = heap_memalign(sHeaps[heapIndex], alignment, size);
2313	} else if (shouldGrow) {
2314		// should grow sometime soon, notify the grower
2315		release_sem_etc(sHeapGrowSem, 1, B_DO_NOT_RESCHEDULE);
2316	}
2317
2318	if (result == NULL)
2319		panic("heap: kernel heap has run out of memory\n");
2320	return result;
2321}
2322
2323
2324void *
2325memalign_etc(size_t alignment, size_t size, uint32 flags)
2326{
2327	if ((flags & HEAP_PRIORITY_VIP) != 0)
2328		return heap_memalign(sVIPHeap, alignment, size);
2329
2330	if ((flags & (HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE))
2331			!= 0) {
2332		return memalign_nogrow(alignment, size);
2333	}
2334
2335	return memalign(alignment, size);
2336}
2337
2338
2339void
2340free_etc(void *address, uint32 flags)
2341{
2342	if ((flags & HEAP_PRIORITY_VIP) != 0)
2343		heap_free(sVIPHeap, address);
2344	else
2345		free(address);
2346}
2347
2348
2349void *
2350malloc(size_t size)
2351{
2352	return memalign_etc(0, size, 0);
2353}
2354
2355
2356void
2357free(void *address)
2358{
2359	if (!gKernelStartup && !are_interrupts_enabled()) {
2360		panic("free(): called with interrupts disabled\n");
2361		return;
2362	}
2363
2364	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2365	for (uint32 i = 0; i < sHeapCount; i++) {
2366		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2367		if (heap_free(heap, address) == B_OK) {
2368#if PARANOID_HEAP_VALIDATION
2369			heap_validate_heap(heap);
2370#endif
2371			return;
2372		}
2373	}
2374
2375	// maybe it was allocated from the dedicated grow heap
2376	if (heap_free(sGrowHeap, address) == B_OK)
2377		return;
2378
2379	// or maybe it was allocated from the VIP heap
2380	if (heap_free(sVIPHeap, address) == B_OK)
2381		return;
2382
2383	// or maybe it was a huge allocation using an area
2384	area_info areaInfo;
2385	area_id area = area_for(address);
2386	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2387		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2388
2389		// just make extra sure it was allocated by us
2390		if (info->magic == kAreaAllocationMagic && info->area == area
2391			&& info->size == areaInfo.size && info->base == areaInfo.address
2392			&& info->allocation_size < areaInfo.size) {
2393			delete_area(area);
2394			TRACE(("free(): freed huge allocation by deleting area %ld\n",
2395				area));
2396			return;
2397		}
2398	}
2399
2400	panic("free(): free failed for address %p\n", address);
2401}
2402
2403
2404void *
2405realloc_etc(void *address, size_t newSize, uint32 flags)
2406{
2407	if (!gKernelStartup && !are_interrupts_enabled()) {
2408		panic("realloc(): called with interrupts disabled\n");
2409		return NULL;
2410	}
2411
2412	if (address == NULL)
2413		return malloc_etc(newSize, flags);
2414
2415	if (newSize == 0) {
2416		free_etc(address, flags);
2417		return NULL;
2418	}
2419
2420	void *newAddress = NULL;
2421	int32 offset = smp_get_current_cpu() * HEAP_CLASS_COUNT;
2422	for (uint32 i = 0; i < sHeapCount; i++) {
2423		heap_allocator *heap = sHeaps[(i + offset) % sHeapCount];
2424		if (heap_realloc(heap, address, &newAddress, newSize, flags) == B_OK) {
2425#if PARANOID_HEAP_VALIDATION
2426			heap_validate_heap(heap);
2427#endif
2428			return newAddress;
2429		}
2430	}
2431
2432	// maybe it was allocated from the dedicated grow heap
2433	if (heap_realloc(sGrowHeap, address, &newAddress, newSize, flags) == B_OK)
2434		return newAddress;
2435
2436	// or maybe it was a huge allocation using an area
2437	area_info areaInfo;
2438	area_id area = area_for(address);
2439	if (area >= B_OK && get_area_info(area, &areaInfo) == B_OK) {
2440		area_allocation_info *info = (area_allocation_info *)areaInfo.address;
2441
2442		// just make extra sure it was allocated by us
2443		if (info->magic == kAreaAllocationMagic && info->area == area
2444			&& info->size == areaInfo.size && info->base == areaInfo.address
2445			&& info->allocation_size < areaInfo.size) {
2446			size_t available = info->size - ((addr_t)info->allocation_base
2447				- (addr_t)info->base);
2448
2449			if (available >= newSize) {
2450				// there is enough room available for the newSize
2451				TRACE(("realloc(): new size %ld fits in old area %ld with %ld "
2452					"available\n", newSize, area, available));
2453				info->allocation_size = newSize;
2454				return address;
2455			}
2456
2457			// have to allocate/copy/free - TODO maybe resize the area instead?
2458			newAddress = malloc(newSize);
2459			if (newAddress == NULL) {
2460				dprintf("realloc(): failed to allocate new block of %ld bytes\n",
2461					newSize);
2462				return NULL;
2463			}
2464
2465			memcpy(newAddress, address, min_c(newSize, info->allocation_size));
2466			delete_area(area);
2467			TRACE(("realloc(): allocated new block %p for size %ld and deleted "
2468				"old area %ld\n", newAddress, newSize, area));
2469			return newAddress;
2470		}
2471	}
2472
2473	panic("realloc(): failed to realloc address %p to size %lu\n", address,
2474		newSize);
2475	return NULL;
2476}
2477
2478
2479void *
2480realloc(void *address, size_t newSize)
2481{
2482	return realloc_etc(address, newSize, 0);
2483}
2484
2485
2486#endif	// USE_DEBUG_HEAP_FOR_MALLOC
2487
2488
2489void *
2490calloc(size_t numElements, size_t size)
2491{
2492	if (size != 0 && numElements > (((size_t)(-1)) / size))
2493		return NULL;
2494
2495	void *address = malloc(numElements * size);
2496	if (address != NULL)
2497		memset(address, 0, numElements * size);
2498
2499	return address;
2500}
2501
2502
2503void *
2504aligned_alloc(size_t alignment, size_t size)
2505{
2506	if ((size % alignment) != 0)
2507		return NULL;
2508
2509	return memalign(alignment, size);
2510}
2511
2512
2513void
2514deferred_free(void *block)
2515{
2516	if (block == NULL)
2517		return;
2518
2519	DeferredFreeListEntry *entry = new(block) DeferredFreeListEntry;
2520
2521	InterruptsSpinLocker _(sDeferredFreeListLock);
2522	sDeferredFreeList.Add(entry);
2523}
2524
2525
2526DeferredDeletable::~DeferredDeletable()
2527{
2528}
2529
2530
2531void
2532deferred_delete(DeferredDeletable *deletable)
2533{
2534	if (deletable == NULL)
2535		return;
2536
2537	InterruptsSpinLocker _(sDeferredFreeListLock);
2538	sDeferredDeletableList.Add(deletable);
2539}
2540