1/*-
2 * Copyright (c) 2015 Antti Kantee.  All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
14 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
19 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 */
25
26/*-
27 ****************************************************************************
28 * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge
29 * (C) 2005 - Grzegorz Milos - Intel Research Cambridge
30 ****************************************************************************
31 *
32 *        File: mm.c
33 *      Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk)
34 *     Changes: Grzegorz Milos
35 *
36 *        Date: Aug 2003, chages Aug 2005
37 *
38 * Environment: Xen Minimal OS
39 * Description: memory management related functions
40 *              contains buddy page allocator from Xen.
41 *
42 ****************************************************************************
43 * Permission is hereby granted, free of charge, to any person obtaining a copy
44 * of this software and associated documentation files (the "Software"), to
45 * deal in the Software without restriction, including without limitation the
46 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
47 * sell copies of the Software, and to permit persons to whom the Software is
48 * furnished to do so, subject to the following conditions:
49 *
50 * The above copyright notice and this permission notice shall be included in
51 * all copies or substantial portions of the Software.
52 *
53 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
54 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
55 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
56 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
57 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
58 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
59 * DEALINGS IN THE SOFTWARE.
60 */
61
62#include <bmk-core/core.h>
63#include <bmk-core/null.h>
64#include <bmk-core/pgalloc.h>
65#include <bmk-core/platform.h>
66#include <bmk-core/printf.h>
67#include <bmk-core/queue.h>
68#include <bmk-core/string.h>
69
70#include <bmk-pcpu/pcpu.h>
71
72#ifndef BMK_PGALLOC_DEBUG
73#define DPRINTF(x)
74#define SANITY_CHECK()
75#else
76int bmk_pgalloc_debug = 0;
77#define DPRINTF(x) if (bmk_pgalloc_debug) bmk_printf x
78#define SANITY_CHECK() sanity_check()
79#endif
80
81unsigned long pgalloc_totalkb, pgalloc_usedkb;
82
83/*
84 * The allocation bitmap is offset to the first page loaded, which is
85 * nice if someone loads memory starting in high ranges.  Notably,
86 * we don't need a pg->va operation since allocation is always done
87 * through the freelists in va, and the pgmap is used only as a lookup
88 * table for coalescing entries when pages are freed.
89 */
90static void *minpage_addr, *maxpage_addr;
91#define va_to_pg(x) \
92    (((unsigned long)x - (unsigned long)minpage_addr)>>BMK_PCPU_PAGE_SHIFT)
93
94#define addr2chunk(_addr_,_offset_) \
95    ((struct chunk *)(((char *)_addr_)+(_offset_)))
96
97#define order2size(_order_) (1UL<<(_order_ + BMK_PCPU_PAGE_SHIFT))
98
99/*
100 * ALLOCATION BITMAP
101 *  One bit per page of memory. Bit set => page is allocated.
102 */
103
104static unsigned long *alloc_bitmap;
105#define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
106bmk_ctassert((PAGES_PER_MAPWORD & (PAGES_PER_MAPWORD-1)) == 0);
107
108static int
109addr_is_managed(void *addr)
110{
111
112	return addr >= minpage_addr && addr < maxpage_addr;
113}
114
115static int
116allocated_in_map(void *addr)
117{
118	unsigned long pagenum;
119
120	bmk_assert(addr_is_managed(addr));
121	pagenum = va_to_pg(addr);
122	return (alloc_bitmap[pagenum/PAGES_PER_MAPWORD] \
123	    & (1UL<<(pagenum&(PAGES_PER_MAPWORD-1)))) != 0;
124}
125
126/*
127 * Hint regarding bitwise arithmetic in map_{alloc,free}:
128 *  -(1<<n)  sets all bits >= n.
129 *  (1<<n)-1 sets all bits <  n.
130 * Variable names in map_{alloc,free}:
131 *  *_idx == Index into `alloc_bitmap' array.
132 *  *_off == Bit offset within an element of the `alloc_bitmap' array.
133 */
134
135#define PAGES_TO_MAPOPVARS(va, np)					\
136	unsigned long start, end, curr_idx, end_idx;			\
137	unsigned long first_page = va_to_pg(va);			\
138	curr_idx= first_page / PAGES_PER_MAPWORD;			\
139	start	= first_page & (PAGES_PER_MAPWORD-1);			\
140	end_idx	= (first_page + nr_pages) / PAGES_PER_MAPWORD;		\
141	end	= (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
142
143static void
144map_alloc(void *virt, unsigned long nr_pages)
145{
146	PAGES_TO_MAPOPVARS(virt, nr_pages);
147
148	if (curr_idx == end_idx) {
149		alloc_bitmap[curr_idx] |= ((1UL<<end)-1) & -(1UL<<start);
150	} else {
151		alloc_bitmap[curr_idx] |= -(1UL<<start);
152		while (++curr_idx < end_idx)
153			alloc_bitmap[curr_idx] = ~0UL;
154		alloc_bitmap[curr_idx] |= (1UL<<end)-1;
155	}
156}
157
158static void
159map_free(void *virt, unsigned long nr_pages)
160{
161	PAGES_TO_MAPOPVARS(virt, nr_pages);
162
163	if (curr_idx == end_idx) {
164		alloc_bitmap[curr_idx] &= -(1UL<<end) | ((1UL<<start)-1);
165	} else {
166		alloc_bitmap[curr_idx] &= (1UL<<start)-1;
167		while (++curr_idx != end_idx)
168			alloc_bitmap[curr_idx] = 0;
169		alloc_bitmap[curr_idx] &= -(1UL<<end);
170	}
171}
172
173/*
174 * BINARY BUDDY ALLOCATOR
175 */
176
177#define CHUNKMAGIC 0x11020217
178struct chunk {
179	int level;
180	int magic;
181
182	LIST_ENTRY(chunk) entries;
183};
184
185static int
186chunklevel(struct chunk *ch)
187{
188
189	bmk_assert(ch->magic == CHUNKMAGIC);
190	return ch->level;
191}
192
193/*
194 * Linked lists of free chunks of different powers-of-two in size.
195 * The assumption is that pointer size * NBBY = va size.  It's
196 * a pretty reasonable assumption, except that we really don't need
197 * that much address space.  The order of what most CPUs implement (48bits)
198 * would be more than plenty.  But since the unused levels don't consume
199 * much space, leave it be for now.
200 */
201#define FREELIST_LEVELS (8*(sizeof(void*))-BMK_PCPU_PAGE_SHIFT)
202static LIST_HEAD(, chunk) freelist[FREELIST_LEVELS];
203
204static void
205freechunk_link(void *addr, int order)
206{
207	struct chunk *ch = addr;
208
209	ch->level = order;
210	ch->magic = CHUNKMAGIC;
211
212	LIST_INSERT_HEAD(&freelist[order], ch, entries);
213}
214
215#ifdef BMK_PGALLOC_DEBUG
216static void __attribute__((used))
217print_allocation(void *start, unsigned nr_pages)
218{
219	unsigned long addr = (unsigned long)start;
220
221	for (; nr_pages > 0; nr_pages--, addr += BMK_PCPU_PAGE_SIZE) {
222		if (allocated_in_map((void *)addr))
223			bmk_printf("1");
224		else
225			bmk_printf("0");
226	}
227
228	bmk_printf("\n");
229}
230
231static void
232sanity_check(void)
233{
234	unsigned int x;
235	struct chunk *head;
236
237	for (x = 0; x < FREELIST_LEVELS; x++) {
238		LIST_FOREACH(head, &freelist[x], entries) {
239			bmk_assert(!allocated_in_map(head));
240			bmk_assert(head->magic == CHUNKMAGIC);
241		}
242	}
243}
244#endif
245
246void
247bmk_pgalloc_dumpstats(void)
248{
249	struct chunk *ch;
250	unsigned long remainingkb;
251	unsigned i;
252
253	remainingkb = pgalloc_totalkb - pgalloc_usedkb;
254	bmk_printf("pgalloc total %ld kB, used %ld kB (remaining %ld kB)\n",
255	    pgalloc_totalkb, pgalloc_usedkb, remainingkb);
256
257	bmk_printf("available chunks:\n");
258	for (i = 0; i < FREELIST_LEVELS; i++) {
259		unsigned long chunks, levelhas;
260
261		if (LIST_EMPTY(&freelist[i]))
262			continue;
263
264		chunks = 0;
265		LIST_FOREACH(ch, &freelist[i], entries) {
266			chunks++;
267		}
268		levelhas = chunks * (order2size(i)>>10);
269		bmk_printf("%8ld kB: %8ld chunks, %12ld kB\t(%2ld%%)\n",
270		    order2size(i)>>10, chunks, levelhas,
271		    (100*levelhas)/remainingkb);
272	}
273}
274
275static void
276carverange(unsigned long addr, unsigned long range)
277{
278	struct chunk *ch;
279	unsigned i, r;
280
281	while (range) {
282		/*
283		 * Next chunk is limited by alignment of addr, but also
284		 * must not be bigger than remaining range.
285		 */
286		i = __builtin_ctzl(addr);
287		r = 8*sizeof(range) - (__builtin_clzl(range)+1);
288		if (i > r) {
289			i = r;
290		}
291		i -= BMK_PCPU_PAGE_SHIFT;
292
293		ch = addr2chunk(addr, 0);
294		freechunk_link(ch, i);
295		addr += order2size(i);
296		range -= order2size(i);
297
298		DPRINTF(("bmk_pgalloc: carverange chunk 0x%lx at %p\n",
299		    order2size(i), ch));
300	}
301}
302
303/*
304 * Load [min,max] as available addresses.
305 */
306void
307bmk_pgalloc_loadmem(unsigned long min, unsigned long max)
308{
309	static int called;
310	unsigned long range, bitmap_size;
311	unsigned int i;
312
313	if (called)
314		bmk_platform_halt("bmk_pgalloc_loadmem called more than once");
315	called = 1;
316
317	bmk_assert(max > min);
318
319	min = bmk_round_page(min);
320	max = bmk_trunc_page(max);
321
322	DPRINTF(("bmk_pgalloc_loadmem: available memory [0x%lx,0x%lx]\n",
323	    min, max));
324
325	for (i = 0; i < FREELIST_LEVELS; i++) {
326		LIST_INIT(&freelist[i]);
327	}
328
329	/* Allocate space for the allocation bitmap. */
330	bitmap_size  = ((max-min) >> (BMK_PCPU_PAGE_SHIFT+3)) + 1;
331	bitmap_size  = bmk_round_page(bitmap_size);
332	alloc_bitmap = (unsigned long *)min;
333	min         += bitmap_size;
334	range        = max - min;
335
336	minpage_addr = (void *)min;
337	maxpage_addr = (void *)max;
338
339	pgalloc_totalkb = range >> 10;
340	pgalloc_usedkb = 0;
341
342	/* All allocated by default. */
343	bmk_memset(alloc_bitmap, ~0, bitmap_size);
344	/* Free up the memory we've been given to play with. */
345	map_free((void *)min, range>>BMK_PCPU_PAGE_SHIFT);
346
347	carverange(min, range);
348}
349
350/* can we allocate for given align from freelist index i? */
351static struct chunk *
352satisfies_p(int i, unsigned long align)
353{
354	struct chunk *ch;
355	unsigned long p;
356
357	LIST_FOREACH(ch, &freelist[i], entries) {
358		p = (unsigned long)ch;
359		if ((p & (align-1)) == 0)
360			return ch;
361	}
362
363	return NULL;
364}
365
366void *
367bmk_pgalloc(int order)
368{
369
370	return bmk_pgalloc_align(order, BMK_PCPU_PAGE_SIZE);
371}
372
373void *
374bmk_pgalloc_align(int order, unsigned long align)
375{
376	struct chunk *alloc_ch;
377	unsigned long p, len;
378	unsigned int bucket;
379
380	bmk_assert(align >= BMK_PCPU_PAGE_SIZE && (align & (align-1)) == 0);
381	bmk_assert((unsigned)order < FREELIST_LEVELS);
382
383	for (bucket = order; bucket < FREELIST_LEVELS; bucket++) {
384		if ((alloc_ch = satisfies_p(bucket, align)) != NULL)
385			break;
386	}
387	if (!alloc_ch) {
388		bmk_printf("cannot handle page request order %d/0x%lx!\n",
389		    order, align);
390		return 0;
391	}
392	/* Unlink the chunk. */
393	LIST_REMOVE(alloc_ch, entries);
394
395	bmk_assert(alloc_ch->magic == CHUNKMAGIC);
396	alloc_ch->magic = 0;
397
398	/*
399	 * TODO: figure out if we can cheaply carve the block without
400	 * using the best alignment.
401	 */
402	len = order2size(order);
403	p = (unsigned long)alloc_ch;
404
405	/* carve up leftovers (if any) */
406	carverange(p+len, order2size(bucket) - len);
407
408	map_alloc(alloc_ch, 1UL<<order);
409	DPRINTF(("bmk_pgalloc: allocated 0x%lx bytes at %p\n",
410	    order2size(order), alloc_ch));
411	pgalloc_usedkb += len>>10;
412
413#ifdef BMK_PGALLOC_DEBUG
414	{
415		unsigned npgs = 1<<order;
416		char *p = (char *)alloc_ch;
417
418		for (npgs = 1<<order; npgs; npgs--, p += BMK_PCPU_PAGE_SIZE) {
419			bmk_assert(allocated_in_map(p));
420		}
421	}
422#endif
423
424	SANITY_CHECK();
425
426	bmk_assert(((unsigned long)alloc_ch & (align-1)) == 0);
427	return alloc_ch;
428}
429
430void
431bmk_pgfree(void *pointer, int order)
432{
433	struct chunk *freed_ch, *to_merge_ch;
434	unsigned long mask;
435
436	DPRINTF(("bmk_pgfree: freeing 0x%lx bytes at %p\n",
437	    order2size(order), pointer));
438
439#ifdef BMK_PGALLOC_DEBUG
440	{
441		unsigned npgs = 1<<order;
442		char *p = (char *)pointer;
443
444		for (npgs = 1<<order; npgs; npgs--, p += BMK_PCPU_PAGE_SIZE) {
445			bmk_assert(allocated_in_map(p));
446		}
447	}
448#endif
449
450	/* free the allocation in the bitmap */
451	map_free(pointer, 1UL << order);
452	pgalloc_usedkb -= order2size(order)>>10;
453
454	/* create as large a free chunk as we can */
455	for (freed_ch = pointer; (unsigned)order < FREELIST_LEVELS; ) {
456		mask = order2size(order);
457		if ((unsigned long)freed_ch & mask) {
458			to_merge_ch = addr2chunk(freed_ch, -mask);
459			if (!addr_is_managed(to_merge_ch)
460			    || allocated_in_map(to_merge_ch)
461			    || chunklevel(to_merge_ch) != order)
462				break;
463			freed_ch->magic = 0;
464
465			/* merge with predecessor, point freed chuck there */
466			freed_ch = to_merge_ch;
467		} else {
468			to_merge_ch = addr2chunk(freed_ch, mask);
469			if (!addr_is_managed(to_merge_ch)
470			    || allocated_in_map(to_merge_ch)
471			    || chunklevel(to_merge_ch) != order)
472				break;
473			freed_ch->magic = 0;
474
475			/* merge with successor, freed chuck already correct */
476		}
477
478		to_merge_ch->magic = 0;
479		LIST_REMOVE(to_merge_ch, entries);
480
481		order++;
482	}
483
484	freechunk_link(freed_ch, order);
485
486	SANITY_CHECK();
487}
488