1/*- 2 * Copyright (c) 2015 Antti Kantee. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 14 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 19 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26/*- 27 **************************************************************************** 28 * (C) 2003 - Rolf Neugebauer - Intel Research Cambridge 29 * (C) 2005 - Grzegorz Milos - Intel Research Cambridge 30 **************************************************************************** 31 * 32 * File: mm.c 33 * Author: Rolf Neugebauer (neugebar@dcs.gla.ac.uk) 34 * Changes: Grzegorz Milos 35 * 36 * Date: Aug 2003, chages Aug 2005 37 * 38 * Environment: Xen Minimal OS 39 * Description: memory management related functions 40 * contains buddy page allocator from Xen. 41 * 42 **************************************************************************** 43 * Permission is hereby granted, free of charge, to any person obtaining a copy 44 * of this software and associated documentation files (the "Software"), to 45 * deal in the Software without restriction, including without limitation the 46 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 47 * sell copies of the Software, and to permit persons to whom the Software is 48 * furnished to do so, subject to the following conditions: 49 * 50 * The above copyright notice and this permission notice shall be included in 51 * all copies or substantial portions of the Software. 52 * 53 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 54 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 55 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 56 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 57 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 58 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 59 * DEALINGS IN THE SOFTWARE. 60 */ 61 62#include <bmk-core/core.h> 63#include <bmk-core/null.h> 64#include <bmk-core/pgalloc.h> 65#include <bmk-core/platform.h> 66#include <bmk-core/printf.h> 67#include <bmk-core/queue.h> 68#include <bmk-core/string.h> 69 70#include <bmk-pcpu/pcpu.h> 71 72#ifndef BMK_PGALLOC_DEBUG 73#define DPRINTF(x) 74#define SANITY_CHECK() 75#else 76int bmk_pgalloc_debug = 0; 77#define DPRINTF(x) if (bmk_pgalloc_debug) bmk_printf x 78#define SANITY_CHECK() sanity_check() 79#endif 80 81unsigned long pgalloc_totalkb, pgalloc_usedkb; 82 83/* 84 * The allocation bitmap is offset to the first page loaded, which is 85 * nice if someone loads memory starting in high ranges. Notably, 86 * we don't need a pg->va operation since allocation is always done 87 * through the freelists in va, and the pgmap is used only as a lookup 88 * table for coalescing entries when pages are freed. 89 */ 90static void *minpage_addr, *maxpage_addr; 91#define va_to_pg(x) \ 92 (((unsigned long)x - (unsigned long)minpage_addr)>>BMK_PCPU_PAGE_SHIFT) 93 94#define addr2chunk(_addr_,_offset_) \ 95 ((struct chunk *)(((char *)_addr_)+(_offset_))) 96 97#define order2size(_order_) (1UL<<(_order_ + BMK_PCPU_PAGE_SHIFT)) 98 99/* 100 * ALLOCATION BITMAP 101 * One bit per page of memory. Bit set => page is allocated. 102 */ 103 104static unsigned long *alloc_bitmap; 105#define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8) 106bmk_ctassert((PAGES_PER_MAPWORD & (PAGES_PER_MAPWORD-1)) == 0); 107 108static int 109addr_is_managed(void *addr) 110{ 111 112 return addr >= minpage_addr && addr < maxpage_addr; 113} 114 115static int 116allocated_in_map(void *addr) 117{ 118 unsigned long pagenum; 119 120 bmk_assert(addr_is_managed(addr)); 121 pagenum = va_to_pg(addr); 122 return (alloc_bitmap[pagenum/PAGES_PER_MAPWORD] \ 123 & (1UL<<(pagenum&(PAGES_PER_MAPWORD-1)))) != 0; 124} 125 126/* 127 * Hint regarding bitwise arithmetic in map_{alloc,free}: 128 * -(1<<n) sets all bits >= n. 129 * (1<<n)-1 sets all bits < n. 130 * Variable names in map_{alloc,free}: 131 * *_idx == Index into `alloc_bitmap' array. 132 * *_off == Bit offset within an element of the `alloc_bitmap' array. 133 */ 134 135#define PAGES_TO_MAPOPVARS(va, np) \ 136 unsigned long start, end, curr_idx, end_idx; \ 137 unsigned long first_page = va_to_pg(va); \ 138 curr_idx= first_page / PAGES_PER_MAPWORD; \ 139 start = first_page & (PAGES_PER_MAPWORD-1); \ 140 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD; \ 141 end = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1); 142 143static void 144map_alloc(void *virt, unsigned long nr_pages) 145{ 146 PAGES_TO_MAPOPVARS(virt, nr_pages); 147 148 if (curr_idx == end_idx) { 149 alloc_bitmap[curr_idx] |= ((1UL<<end)-1) & -(1UL<<start); 150 } else { 151 alloc_bitmap[curr_idx] |= -(1UL<<start); 152 while (++curr_idx < end_idx) 153 alloc_bitmap[curr_idx] = ~0UL; 154 alloc_bitmap[curr_idx] |= (1UL<<end)-1; 155 } 156} 157 158static void 159map_free(void *virt, unsigned long nr_pages) 160{ 161 PAGES_TO_MAPOPVARS(virt, nr_pages); 162 163 if (curr_idx == end_idx) { 164 alloc_bitmap[curr_idx] &= -(1UL<<end) | ((1UL<<start)-1); 165 } else { 166 alloc_bitmap[curr_idx] &= (1UL<<start)-1; 167 while (++curr_idx != end_idx) 168 alloc_bitmap[curr_idx] = 0; 169 alloc_bitmap[curr_idx] &= -(1UL<<end); 170 } 171} 172 173/* 174 * BINARY BUDDY ALLOCATOR 175 */ 176 177#define CHUNKMAGIC 0x11020217 178struct chunk { 179 int level; 180 int magic; 181 182 LIST_ENTRY(chunk) entries; 183}; 184 185static int 186chunklevel(struct chunk *ch) 187{ 188 189 bmk_assert(ch->magic == CHUNKMAGIC); 190 return ch->level; 191} 192 193/* 194 * Linked lists of free chunks of different powers-of-two in size. 195 * The assumption is that pointer size * NBBY = va size. It's 196 * a pretty reasonable assumption, except that we really don't need 197 * that much address space. The order of what most CPUs implement (48bits) 198 * would be more than plenty. But since the unused levels don't consume 199 * much space, leave it be for now. 200 */ 201#define FREELIST_LEVELS (8*(sizeof(void*))-BMK_PCPU_PAGE_SHIFT) 202static LIST_HEAD(, chunk) freelist[FREELIST_LEVELS]; 203 204static void 205freechunk_link(void *addr, int order) 206{ 207 struct chunk *ch = addr; 208 209 ch->level = order; 210 ch->magic = CHUNKMAGIC; 211 212 LIST_INSERT_HEAD(&freelist[order], ch, entries); 213} 214 215#ifdef BMK_PGALLOC_DEBUG 216static void __attribute__((used)) 217print_allocation(void *start, unsigned nr_pages) 218{ 219 unsigned long addr = (unsigned long)start; 220 221 for (; nr_pages > 0; nr_pages--, addr += BMK_PCPU_PAGE_SIZE) { 222 if (allocated_in_map((void *)addr)) 223 bmk_printf("1"); 224 else 225 bmk_printf("0"); 226 } 227 228 bmk_printf("\n"); 229} 230 231static void 232sanity_check(void) 233{ 234 unsigned int x; 235 struct chunk *head; 236 237 for (x = 0; x < FREELIST_LEVELS; x++) { 238 LIST_FOREACH(head, &freelist[x], entries) { 239 bmk_assert(!allocated_in_map(head)); 240 bmk_assert(head->magic == CHUNKMAGIC); 241 } 242 } 243} 244#endif 245 246void 247bmk_pgalloc_dumpstats(void) 248{ 249 struct chunk *ch; 250 unsigned long remainingkb; 251 unsigned i; 252 253 remainingkb = pgalloc_totalkb - pgalloc_usedkb; 254 bmk_printf("pgalloc total %ld kB, used %ld kB (remaining %ld kB)\n", 255 pgalloc_totalkb, pgalloc_usedkb, remainingkb); 256 257 bmk_printf("available chunks:\n"); 258 for (i = 0; i < FREELIST_LEVELS; i++) { 259 unsigned long chunks, levelhas; 260 261 if (LIST_EMPTY(&freelist[i])) 262 continue; 263 264 chunks = 0; 265 LIST_FOREACH(ch, &freelist[i], entries) { 266 chunks++; 267 } 268 levelhas = chunks * (order2size(i)>>10); 269 bmk_printf("%8ld kB: %8ld chunks, %12ld kB\t(%2ld%%)\n", 270 order2size(i)>>10, chunks, levelhas, 271 (100*levelhas)/remainingkb); 272 } 273} 274 275static void 276carverange(unsigned long addr, unsigned long range) 277{ 278 struct chunk *ch; 279 unsigned i, r; 280 281 while (range) { 282 /* 283 * Next chunk is limited by alignment of addr, but also 284 * must not be bigger than remaining range. 285 */ 286 i = __builtin_ctzl(addr); 287 r = 8*sizeof(range) - (__builtin_clzl(range)+1); 288 if (i > r) { 289 i = r; 290 } 291 i -= BMK_PCPU_PAGE_SHIFT; 292 293 ch = addr2chunk(addr, 0); 294 freechunk_link(ch, i); 295 addr += order2size(i); 296 range -= order2size(i); 297 298 DPRINTF(("bmk_pgalloc: carverange chunk 0x%lx at %p\n", 299 order2size(i), ch)); 300 } 301} 302 303/* 304 * Load [min,max] as available addresses. 305 */ 306void 307bmk_pgalloc_loadmem(unsigned long min, unsigned long max) 308{ 309 static int called; 310 unsigned long range, bitmap_size; 311 unsigned int i; 312 313 if (called) 314 bmk_platform_halt("bmk_pgalloc_loadmem called more than once"); 315 called = 1; 316 317 bmk_assert(max > min); 318 319 min = bmk_round_page(min); 320 max = bmk_trunc_page(max); 321 322 DPRINTF(("bmk_pgalloc_loadmem: available memory [0x%lx,0x%lx]\n", 323 min, max)); 324 325 for (i = 0; i < FREELIST_LEVELS; i++) { 326 LIST_INIT(&freelist[i]); 327 } 328 329 /* Allocate space for the allocation bitmap. */ 330 bitmap_size = ((max-min) >> (BMK_PCPU_PAGE_SHIFT+3)) + 1; 331 bitmap_size = bmk_round_page(bitmap_size); 332 alloc_bitmap = (unsigned long *)min; 333 min += bitmap_size; 334 range = max - min; 335 336 minpage_addr = (void *)min; 337 maxpage_addr = (void *)max; 338 339 pgalloc_totalkb = range >> 10; 340 pgalloc_usedkb = 0; 341 342 /* All allocated by default. */ 343 bmk_memset(alloc_bitmap, ~0, bitmap_size); 344 /* Free up the memory we've been given to play with. */ 345 map_free((void *)min, range>>BMK_PCPU_PAGE_SHIFT); 346 347 carverange(min, range); 348} 349 350/* can we allocate for given align from freelist index i? */ 351static struct chunk * 352satisfies_p(int i, unsigned long align) 353{ 354 struct chunk *ch; 355 unsigned long p; 356 357 LIST_FOREACH(ch, &freelist[i], entries) { 358 p = (unsigned long)ch; 359 if ((p & (align-1)) == 0) 360 return ch; 361 } 362 363 return NULL; 364} 365 366void * 367bmk_pgalloc(int order) 368{ 369 370 return bmk_pgalloc_align(order, BMK_PCPU_PAGE_SIZE); 371} 372 373void * 374bmk_pgalloc_align(int order, unsigned long align) 375{ 376 struct chunk *alloc_ch; 377 unsigned long p, len; 378 unsigned int bucket; 379 380 bmk_assert(align >= BMK_PCPU_PAGE_SIZE && (align & (align-1)) == 0); 381 bmk_assert((unsigned)order < FREELIST_LEVELS); 382 383 for (bucket = order; bucket < FREELIST_LEVELS; bucket++) { 384 if ((alloc_ch = satisfies_p(bucket, align)) != NULL) 385 break; 386 } 387 if (!alloc_ch) { 388 bmk_printf("cannot handle page request order %d/0x%lx!\n", 389 order, align); 390 return 0; 391 } 392 /* Unlink the chunk. */ 393 LIST_REMOVE(alloc_ch, entries); 394 395 bmk_assert(alloc_ch->magic == CHUNKMAGIC); 396 alloc_ch->magic = 0; 397 398 /* 399 * TODO: figure out if we can cheaply carve the block without 400 * using the best alignment. 401 */ 402 len = order2size(order); 403 p = (unsigned long)alloc_ch; 404 405 /* carve up leftovers (if any) */ 406 carverange(p+len, order2size(bucket) - len); 407 408 map_alloc(alloc_ch, 1UL<<order); 409 DPRINTF(("bmk_pgalloc: allocated 0x%lx bytes at %p\n", 410 order2size(order), alloc_ch)); 411 pgalloc_usedkb += len>>10; 412 413#ifdef BMK_PGALLOC_DEBUG 414 { 415 unsigned npgs = 1<<order; 416 char *p = (char *)alloc_ch; 417 418 for (npgs = 1<<order; npgs; npgs--, p += BMK_PCPU_PAGE_SIZE) { 419 bmk_assert(allocated_in_map(p)); 420 } 421 } 422#endif 423 424 SANITY_CHECK(); 425 426 bmk_assert(((unsigned long)alloc_ch & (align-1)) == 0); 427 return alloc_ch; 428} 429 430void 431bmk_pgfree(void *pointer, int order) 432{ 433 struct chunk *freed_ch, *to_merge_ch; 434 unsigned long mask; 435 436 DPRINTF(("bmk_pgfree: freeing 0x%lx bytes at %p\n", 437 order2size(order), pointer)); 438 439#ifdef BMK_PGALLOC_DEBUG 440 { 441 unsigned npgs = 1<<order; 442 char *p = (char *)pointer; 443 444 for (npgs = 1<<order; npgs; npgs--, p += BMK_PCPU_PAGE_SIZE) { 445 bmk_assert(allocated_in_map(p)); 446 } 447 } 448#endif 449 450 /* free the allocation in the bitmap */ 451 map_free(pointer, 1UL << order); 452 pgalloc_usedkb -= order2size(order)>>10; 453 454 /* create as large a free chunk as we can */ 455 for (freed_ch = pointer; (unsigned)order < FREELIST_LEVELS; ) { 456 mask = order2size(order); 457 if ((unsigned long)freed_ch & mask) { 458 to_merge_ch = addr2chunk(freed_ch, -mask); 459 if (!addr_is_managed(to_merge_ch) 460 || allocated_in_map(to_merge_ch) 461 || chunklevel(to_merge_ch) != order) 462 break; 463 freed_ch->magic = 0; 464 465 /* merge with predecessor, point freed chuck there */ 466 freed_ch = to_merge_ch; 467 } else { 468 to_merge_ch = addr2chunk(freed_ch, mask); 469 if (!addr_is_managed(to_merge_ch) 470 || allocated_in_map(to_merge_ch) 471 || chunklevel(to_merge_ch) != order) 472 break; 473 freed_ch->magic = 0; 474 475 /* merge with successor, freed chuck already correct */ 476 } 477 478 to_merge_ch->magic = 0; 479 LIST_REMOVE(to_merge_ch, entries); 480 481 order++; 482 } 483 484 freechunk_link(freed_ch, order); 485 486 SANITY_CHECK(); 487} 488