1/** 2 * \file 3 * \brief Memory server 4 */ 5 6/* 7 * Copyright (c) 2007, 2008, 2009, 2010, 2011, ETH Zurich. 8 * All rights reserved. 9 * 10 * This file is distributed under the terms in the attached LICENSE file. 11 * If you do not find this file, copies can be found by writing to: 12 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group. 13 */ 14 15#include <stdlib.h> 16#include <stdio.h> 17#include <inttypes.h> 18#include <barrelfish/barrelfish.h> 19#include <barrelfish/dispatch.h> 20#include <skb/skb.h> 21#include <mm/mm.h> 22#include <trace/trace.h> 23#include <trace_definitions/trace_defs.h> 24#include <barrelfish/morecore.h> 25#include <barrelfish/monitor_client.h> 26 27#include <if/mem_defs.h> 28#include <if/monitor_defs.h> 29 30 31//#define OSDI18_PAPER_HACK 1 32 33size_t mem_total = 0, mem_avail = 0; 34 35 36//// XXX HACK for OSDI PAPER!!! BAD! 37#ifdef OSID18_PAPER_HACK 38static struct capref model_mem_cap; 39static genpaddr_t model_mem_base = 0; 40static genpaddr_t model_mem_limit = 0; 41#endif 42 43/* parameters for size of supported RAM and thus required storage */ 44// architecture, we use paddr_t as the type to represent region 45// limits, which limits us its size. 46#if defined(__x86_64__) 47// x86_64 usually supports 48 bits of physical address space, maybe figure 48// this out dynamically? -SG,2014-04-30 49# define MAXSIZEBITS 48 ///< Max size of memory in allocator 50#elif defined(__i386__) 51# define MAXSIZEBITS 32 52#elif defined(__arm__) 53/* XXX This is better if < 32! - but there were no compile time warnings! */ 54# define MAXSIZEBITS 31 55#elif defined(__aarch64__) 56# define MAXSIZEBITS 48 57#else 58# error Unknown architecture 59#endif 60 61#define OBJBITS_DISPATCHER 10 62#define MINSIZEBITS OBJBITS_DISPATCHER ///< Min size of each allocation 63#define MAXCHILDBITS 4 ///< Max branching of BTree nodes 64 65/// Maximum depth of the BTree, assuming only branching by two at each level 66#define MAXDEPTH (MAXSIZEBITS - MINSIZEBITS + 1) 67/// Maximum number of BTree nodes 68#define NNODES ((1UL << MAXDEPTH) - 1) 69 70/* Parameters for per-core memserv */ 71#define PERCORE_BITS 24 72#define PERCORE_MEM (1UL<<PERCORE_BITS) ///< How much memory per-core 73 74//static struct multi_slot_allocator msa; 75static struct bootinfo *bi; 76 77/** 78 * \brief Size of CNodes to be created by slot allocator. 79 * 80 * Must satisfy both: 81 * #CNODE_BITS >= MAXCHILDBITS (cnode enough for max branching factor) 82 * (1UL << #CNODE_BITS) ** 2 >= #NNODES (total number of slots is enough) 83 */ 84#define CNODE_BITS 13 85#define NCNODES (1UL << CNODE_BITS) ///< Maximum number of CNodes 86 87/// Watermark at which we must refill the slab allocator used for nodes 88#define MINSPARENODES (MAXDEPTH * 8) // XXX: FIXME: experimentally determined! 89 90/// MM allocator instance data 91static struct mm mm_ram; 92 93/// Slot allocator for MM 94static struct slot_prealloc ram_slot_alloc; 95 96static errval_t mymm_alloc(struct capref *ret, uint8_t bits, genpaddr_t minbase, 97 genpaddr_t maxlimit) 98{ 99 errval_t err; 100 101 assert(bits >= MINSIZEBITS); 102 103 if(maxlimit == 0) { 104 err = mm_alloc(&mm_ram, bits, ret, NULL); 105 } else { 106 err = mm_alloc_range(&mm_ram, bits, minbase, maxlimit, ret, NULL); 107 } 108 109 return err; 110} 111 112static errval_t mymm_free(struct capref ramcap, genpaddr_t base, uint8_t bits) 113{ 114 errval_t ret; 115 genpaddr_t mem_to_add; 116 117 mem_to_add = (genpaddr_t)1 << bits; 118 119 ret = mm_free(&mm_ram, ramcap, base, bits); 120 if (err_is_fail(ret)) { 121 if (err_no(ret) == MM_ERR_NOT_FOUND) { 122 // memory wasn't there initially, add it 123 ret = mm_add(&mm_ram, ramcap, bits, base); 124 if (err_is_fail(ret)) { 125 /* DEBUG_ERR(ret, "failed to add RAM to allocator"); */ 126 return ret; 127 } 128 mem_total += mem_to_add; 129 } else { 130 /* DEBUG_ERR(ret, "failed to free RAM in allocator"); */ 131 return ret; 132 } 133 } 134 135 mem_avail += mem_to_add; 136 137 return SYS_ERR_OK; 138} 139 140 141/// state for a pending reply 142// because we have only one message that we send to a client, and there can only 143// be one outstanding per binding (because this is an RPC interface) this is 144// quite simple 145struct pending_reply { 146 struct mem_binding *b; 147 errval_t err; 148 struct capref *cap; 149}; 150 151 152static void retry_free_reply(void *arg) 153{ 154 struct pending_reply *r = arg; 155 assert(r != NULL); 156 struct mem_binding *b = r->b; 157 errval_t err; 158 159 err = b->tx_vtbl.free_monitor_response(b, NOP_CONT, r->err); 160 if (err_is_ok(err)) { 161 b->st = NULL; 162 free(r); 163 } else if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { 164 err = b->register_send(b, get_default_waitset(), 165 MKCONT(retry_free_reply,r)); 166 } 167 168 if (err_is_fail(err)) { 169 DEBUG_ERR(err, "failed to reply to free request"); 170 free(r); 171 } 172} 173 174static void allocate_response_done(void *arg) 175{ 176 struct capref *cap = arg; 177 178 if(!capref_is_null(*cap)) { 179 errval_t err = cap_delete(*cap); 180 if(err_is_fail(err) && err_no(err) != SYS_ERR_CAP_NOT_FOUND) { 181 DEBUG_ERR(err, "cap_delete after send. This memory will leak."); 182 } 183 } 184 185 free(cap); 186} 187 188static void retry_reply(void *arg) 189{ 190 struct pending_reply *r = arg; 191 assert(r != NULL); 192 struct mem_binding *b = r->b; 193 errval_t err; 194 195 err = b->tx_vtbl.allocate_response(b, MKCONT(allocate_response_done, r->cap), 196 r->err, *r->cap); 197 if (err_is_ok(err)) { 198 b->st = NULL; 199 free(r); 200 } else if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { 201 err = b->register_send(b, get_default_waitset(), MKCONT(retry_reply,r)); 202 assert(err_is_ok(err)); 203 } else { 204 DEBUG_ERR(err, "failed to reply to memory request"); 205 allocate_response_done(r->cap); 206 } 207} 208 209 210 211static void mem_free_handler(struct mem_binding *b, 212 struct capref ramcap, genpaddr_t base, 213 uint8_t bits) 214{ 215 errval_t ret; 216 errval_t err; 217 218#ifdef OSDI18_PAPER_HACK 219 if (base >= model_mem_base && base + (1UL << bits) - 1 <= model_mem_limit) { 220 debug_printf( 221 "//// XXX HACK for OSDI PAPER!!! Use mem cap for [%lx..%lx]\n", 222 base, base + (1UL << bits) - 1); 223 ret = SYS_ERR_OK; 224 } else { 225 ret = mymm_free(ramcap, base, bits); 226 } 227#else 228 ret = mymm_free(ramcap, base, bits); 229#endif 230 231 err = b->tx_vtbl.free_monitor_response(b, NOP_CONT, ret); 232 if (err_is_fail(err)) { 233 if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { 234 struct pending_reply *r = malloc(sizeof(struct pending_reply)); 235 assert(r != NULL); 236 r->b = b; 237 r->err = ret; 238 err = b->register_send(b, get_default_waitset(), 239 MKCONT(retry_free_reply,r)); 240 assert(err_is_ok(err)); 241 } else { 242 DEBUG_ERR(err, "failed to reply to free request"); 243 } 244 } 245} 246 247 248static void mem_available_handler(struct mem_binding *b) 249{ 250 errval_t err; 251 /* Reply */ 252 err = b->tx_vtbl.available_response(b, NOP_CONT, mem_avail, mem_total); 253 if (err_is_fail(err)) { 254 // FIXME: handle FLOUNDER_ERR_TX_BUSY 255 DEBUG_ERR(err, "failed to reply to memory request"); 256 } 257 258} 259 260// FIXME: error handling (not asserts) needed in this function 261static void mem_allocate_handler(struct mem_binding *b, uint8_t bits, 262 genpaddr_t minbase, genpaddr_t maxlimit) 263{ 264 struct capref *cap = malloc(sizeof(struct capref)); 265 errval_t err, ret; 266 267 // TODO: do this properly and inform caller, -SG 2016-04-20 268 // XXX: Do we even want to have this restriction here? It's not necessary 269 // for types that are not mappable (e.g. Dispatcher) 270 //if (bits < BASE_PAGE_BITS) { 271 // bits = BASE_PAGE_BITS; 272 //} 273 //if (bits < BASE_PAGE_BITS) { 274 // debug_printf("WARNING: ALLOCATING RAM CAP WITH %u BITS\n", bits); 275 //} 276 277 trace_event(TRACE_SUBSYS_MEMSERV, TRACE_EVENT_MEMSERV_ALLOC, bits); 278 279 /* refill slot allocator if needed */ 280 err = slot_prealloc_refill(mm_ram.slot_alloc_inst); 281 if (err_is_fail(err)) { 282 DEBUG_ERR(err, "slot_prealloc_refill in mem_allocate_handler"); 283 } 284 assert(err_is_ok(err)); 285 286 /* refill slab allocator if needed */ 287 while (slab_freecount(&mm_ram.slabs) <= MINSPARENODES) { 288 struct capref frame; 289 err = slot_alloc(&frame); 290 assert(err_is_ok(err)); 291 err = frame_create(frame, BASE_PAGE_SIZE * 8, NULL); 292 assert(err_is_ok(err)); 293 void *buf; 294 err = vspace_map_one_frame(&buf, BASE_PAGE_SIZE * 8, frame, NULL, NULL); 295 if (err_is_fail(err)) { 296 DEBUG_ERR(err, "vspace_map_one_frame failed"); 297 assert(buf); 298 } 299 slab_grow(&mm_ram.slabs, buf, BASE_PAGE_SIZE * 8); 300 } 301 302#ifdef OSDI18_PAPER_HACK 303 //// XXX HACK for OSDI PAPER!!! BAD! 304 if (minbase >= model_mem_base && maxlimit <= model_mem_limit) { 305 debug_printf("//// XXX HACK for OSDI PAPER!!! Use mem cap for [%lx..%lx]\n", 306 minbase, maxlimit); 307 308 ret = slot_alloc_prealloc(mm_ram.slot_alloc_inst, 1, cap); 309 if (err_is_ok(ret)) { 310 debug_printf("//// XXX HACK for OSDI PAPER!!! %lx Offset=%lu M, bits=%u\n", 311 (minbase - model_mem_base), (minbase - model_mem_base) >> 20, bits); 312 313 debug_printf("//// XXX HACK for OSDI PAPER!!! [%lx..%lx]\n", 314 model_mem_base, model_mem_limit); 315 debug_printf("//// XXX HACK for OSDI PAPER!!! [%lx..%lx]\n", 316 minbase, maxlimit); 317 debug_printf("//// XXX HACK for OSDI PAPER!!! [%lx..%lx]\n", 318 minbase, (1UL << bits)); 319 320 321 ret = cap_retype(*cap, model_mem_cap, (minbase - model_mem_base), 322 ObjType_RAM, (1UL << bits), 1); 323 if (err_is_fail(ret)) { 324 *cap = NULL_CAP; 325 } 326 } 327 } else { 328#endif 329 ret = mymm_alloc(cap, bits, minbase, maxlimit); 330 if (err_is_ok(ret)) { 331 mem_avail -= 1UL << bits; 332 } else { 333 // DEBUG_ERR(ret, "allocation of %d bits in % " PRIxGENPADDR "-%" PRIxGENPADDR " failed", 334 // bits, minbase, maxlimit); 335 *cap = NULL_CAP; 336 } 337#ifdef OSDI18_PAPER_HACK 338 } 339#endif 340 341 342 343 /* Reply */ 344 err = b->tx_vtbl.allocate_response(b, MKCONT(allocate_response_done, cap), 345 ret, *cap); 346 if (err_is_fail(err)) { 347 if (err_no(err) == FLOUNDER_ERR_TX_BUSY) { 348 struct pending_reply *r = malloc(sizeof(struct pending_reply)); 349 assert(r != NULL); 350 r->b = b; 351 r->err = ret; 352 r->cap = cap; 353 err = b->register_send(b, get_default_waitset(), MKCONT(retry_reply,r)); 354 assert(err_is_ok(err)); 355 } else { 356 DEBUG_ERR(err, "failed to reply to memory request"); 357 allocate_response_done(cap); 358 } 359 } 360} 361 362static void dump_ram_region(int idx, struct mem_region* m) 363{ 364#if 0 365 uintptr_t start, limit; 366 367 start = (uintptr_t)m->mr_base; 368 limit = start + m->mr_bytes; 369 370 char prefix = ' '; 371 size_t quantity = m->mr_bytes; 372 373 if (m->mr_bytes >= (1UL << 30)) { 374 prefix = 'G'; 375 quantity >>= 30; 376 } 377 else if (m->mr_bytes >= (1UL << 20)) { 378 prefix = 'M'; 379 quantity >>= 20; 380 } 381 else if (m->mr_bytes >= (1UL << 10)) { 382 prefix = 'K'; 383 quantity >>= 10; 384 } 385 386 printf("RAM region %d: 0x%" PRIxPTR 387 " - 0x%" PRIxPTR " (%zu %cB, %u bits)\n", 388 idx, start, limit, quantity, prefix, log2ceil(m->mr_bytes)); 389#endif // 0 390} 391 392static genpaddr_t find_smallest_address(void) 393{ 394 bool isFirst = true; 395 genpaddr_t smallest_addr = 0; 396// 397 for (int i = 0; i < bi->regions_length; i++) { 398 if (bi->regions[i].mr_type != RegionType_Empty) { 399 continue; 400 } 401 402 if (bi->regions[i].mr_consumed) { 403 continue; 404 } 405 406 if (isFirst) { 407 smallest_addr = bi->regions[i].mr_base; 408 isFirst = false; 409 continue; 410 } 411 412 if (smallest_addr > bi->regions[i].mr_base) { 413 smallest_addr = bi->regions[i].mr_base; 414 } 415 } // end for: for every record 416 return smallest_addr; 417} // end function: find_smallest_address 418 419static genpaddr_t guess_physical_addr_start(void) 420{ 421 genpaddr_t start_physical = find_smallest_address(); 422#if defined(__arm__) 423 if (start_physical > 0x80000000) { 424 // This is most probably a pandaboard! 425 start_physical = 0x80000000; 426 } else { 427 // This is gem5 or some other architecture 428 start_physical = 0; 429 } 430#else 431 start_physical = 0; 432#endif 433 return start_physical; 434} // end function: guess_physical_addr_start 435 436// FIXME: error handling (not asserts) needed in this function 437//XXX: workaround for inline bug of arm-gcc 4.6.1 and lower 438#if defined(__ARM_ARCH_7A__) && defined(__GNUC__) \ 439 && __GNUC__ == 4 && __GNUC_MINOR__ <= 6 && __GNUC_PATCHLEVEL__ <= 1 440static __attribute__((noinline)) errval_t 441#else 442static errval_t 443#endif 444initialize_ram_alloc(void) 445{ 446 errval_t err; 447 448 /* Initialize slot allocator by passing a L2 cnode cap for it to start with */ 449 // Use ROOTCN_SLOT_SLOT_ALLOC0 as initial cnode for mm slot allocator 450 struct capref cnode_start_cap = { 451 .cnode = { 452 .croot = CPTR_ROOTCN, 453 .cnode = ROOTCN_SLOT_ADDR(ROOTCN_SLOT_SLOT_ALLOC0), 454 .level = CNODE_TYPE_OTHER, 455 }, 456 .slot = 0, 457 }; 458 459 /* init slot allocator */ 460 err = slot_prealloc_init(&ram_slot_alloc, MAXCHILDBITS, 461 cnode_start_cap, L2_CNODE_SLOTS, 462 &mm_ram); 463 assert(err_is_ok(err)); 464 465 err = mm_init(&mm_ram, ObjType_RAM, guess_physical_addr_start(), 466 MAXSIZEBITS, MAXCHILDBITS, NULL, 467 slot_alloc_prealloc, NULL, &ram_slot_alloc, true); 468 assert(err_is_ok(err)); 469 470 /* give MM allocator static storage to get it started */ 471 static char nodebuf[SLAB_STATIC_SIZE(MINSPARENODES, MM_NODE_SIZE(MAXCHILDBITS))]; 472 slab_grow(&mm_ram.slabs, nodebuf, sizeof(nodebuf)); 473 474 /* walk bootinfo and add all unused RAM caps to allocator */ 475 struct capref mem_cap = { 476 .cnode = cnode_super, 477 .slot = 0, 478 }; 479 480 for (int i = 0; i < bi->regions_length; i++) { 481 if (bi->regions[i].mr_type == RegionType_Empty) { 482 483#ifdef OSDI18_PAPER_HACK 484 //// XXX HACK for OSDI PAPER!!! BAD! 485 if ( bi->regions[i].mr_base >= (4UL << 30)) { 486 //// XXX HACK for OSDI PAPER!!! BAD! 487 debug_printf("//// XXX HACK for OSDI PAPER!!! Use mem cap for model allocs [%lx..%lx]\n", 488 bi->regions[i].mr_base, bi->regions[i].mr_base + bi->regions[i].mr_bytes - 1); 489 dump_ram_region(i, bi->regions + i); 490 491 model_mem_cap = mem_cap; 492 model_mem_base = bi->regions[i].mr_base; 493 model_mem_limit = model_mem_base + bi->regions[i].mr_bytes - 1; 494 break; 495 } 496 497 debug_printf("Adding region to memory allocator:\n"); 498#endif 499 dump_ram_region(i, bi->regions + i); 500 501 mem_total += bi->regions[i].mr_bytes; 502 503 if (bi->regions[i].mr_consumed) { 504 // region consumed by init, skipped 505 mem_cap.slot++; 506 continue; 507 } 508 509 err = mm_add_multi(&mm_ram, mem_cap, bi->regions[i].mr_bytes, 510 bi->regions[i].mr_base); 511 if (err_is_ok(err)) { 512 mem_avail += bi->regions[i].mr_bytes; 513 } else { 514 DEBUG_ERR(err, "Warning: adding RAM region %d (%p/%zu) FAILED", 515 i, bi->regions[i].mr_base, bi->regions[i].mr_bytes); 516 } 517 518 /* try to refill slot allocator (may fail if the mem allocator is empty) */ 519 err = slot_prealloc_refill(mm_ram.slot_alloc_inst); 520 if (err_is_fail(err) && err_no(err) != MM_ERR_SLOT_MM_ALLOC) { 521 DEBUG_ERR(err, "in slot_prealloc_refill() while initialising" 522 " memory allocator"); 523 abort(); 524 } 525 526 /* refill slab allocator if needed and possible */ 527 if (slab_freecount(&mm_ram.slabs) <= MINSPARENODES 528 && mem_avail > (1UL << (CNODE_BITS + OBJBITS_CTE)) * 2 529 + 10 * BASE_PAGE_SIZE) { 530 slab_default_refill(&mm_ram.slabs); // may fail 531 } 532 533 mem_cap.slot++; 534 } 535 } 536 537 err = slot_prealloc_refill(mm_ram.slot_alloc_inst); 538 if (err_is_fail(err)) { 539 printf("Fatal internal error in RAM allocator: failed to initialise " 540 "slot allocator\n"); 541 DEBUG_ERR(err, "failed to init slot allocator"); 542 abort(); 543 } 544 545 printf("RAM allocator initialised, %zd MB (of %zd MB) available\n", 546 mem_avail / 1024 / 1024, mem_total / 1024 / 1024); 547 548 return SYS_ERR_OK; 549} 550 551static void export_callback(void *st, errval_t err, iref_t iref) 552{ 553 assert(err_is_ok(err)); 554 struct monitor_binding *mb = get_monitor_binding(); 555 err = mb->tx_vtbl. set_mem_iref_request(mb, NOP_CONT, iref); 556 assert(err_is_ok(err)); 557} 558 559static struct mem_rx_vtbl rx_vtbl = { 560 .allocate_call = mem_allocate_handler, 561 .available_call = mem_available_handler, 562 .free_monitor_call = mem_free_handler, 563}; 564 565static bool do_rpc_init = false; 566 567static errval_t connect_callback(void *st, struct mem_binding *b) 568{ 569 do_rpc_init = true; 570 b->rx_vtbl = rx_vtbl; 571 // TODO: set error handler 572 return SYS_ERR_OK; 573} 574 575int main(int argc, char ** argv) 576{ 577 errval_t err; 578 struct waitset *ws = get_default_waitset(); 579 580 if(argc < 2) { 581 fprintf(stderr, "Usage: %s <bootinfo_location>\n", argv[0]); 582 return EXIT_FAILURE; 583 } 584 585 // First argument contains the bootinfo location 586 bi = (struct bootinfo*)strtol(argv[1], NULL, 10); 587 588 /* construct special-case LMP connection to monitor */ 589 static struct monitor_lmp_binding mcb; 590 set_monitor_binding(&mcb.b); 591 592 err = monitor_client_lmp_accept(&mcb, ws, DEFAULT_LMP_BUF_WORDS); 593 if(err_is_fail(err)) { 594 USER_PANIC_ERR(err, "monitor_client_lmp_accept"); 595 } 596 597 idc_init(); 598 599 /* Send the cap for this endpoint to init, who will pass it to 600 the monitor */ 601 err = lmp_ep_send0(cap_initep, 0, mcb.chan.local_cap); 602 if(err_is_fail(err)) { 603 USER_PANIC_ERR(err, "lmp_ep_send0"); 604 } 605 606 // XXX: handle messages (ie. block) until the monitor binding is ready 607 while (capref_is_null(mcb.chan.remote_cap)) { 608 err = event_dispatch(ws); 609 if (err_is_fail(err)) { 610 DEBUG_ERR(err, "in event_dispatch while waiting for monitor"); 611 return EXIT_FAILURE; 612 } 613 } 614 615 /* Initialize our own memory allocator */ 616 err = ram_alloc_set(mymm_alloc); 617 if(err_is_fail(err)) { 618 USER_PANIC_ERR(err, "ram_alloc_set"); 619 } 620 621 err = initialize_ram_alloc(); 622 if(err_is_fail(err)) { 623 USER_PANIC_ERR(err, "initialize_ram_alloc"); 624 } 625 626 err = mem_export(NULL, export_callback, connect_callback, ws, 627 IDC_EXPORT_FLAGS_DEFAULT); 628 if(err_is_fail(err)) { 629 USER_PANIC_ERR(err, "mem_export"); 630 } 631 632 /* initialise tracing */ 633#if defined(TRACING_EXISTS) && defined(CONFIG_TRACE) 634 err = trace_my_setup(); 635 if (err_is_fail(err)) { 636 DEBUG_ERR(err, "initialising tracing"); 637 // return EXIT_FAILURE; 638 } 639 trace_init_disp(); 640#endif 641 642 643 // handle messages on this thread 644 while (true) { 645 err = event_dispatch(ws); 646 if (err_is_fail(err)) { 647 DEBUG_ERR(err, "in main event_dispatch loop"); 648 return EXIT_FAILURE; 649 } 650 651#if 0 652 static bool in_rpc_init = false; 653 if (do_rpc_init && !in_rpc_init && !get_monitor_blocking_binding()) { 654 // XXX: this is an ugly hack try and get a monitor rpc client once 655 // the monitor is ready 656 in_rpc_init = true; 657 do_rpc_init = false; 658 /* Bind with monitor's blocking rpc channel */ 659 err = monitor_client_blocking_rpc_init(); 660 if (err_is_fail(err)) { 661 DEBUG_ERR(err, "monitor_client_blocking_rpc_init"); 662 } 663 else { 664 debug_printf("got monitor_blocking_rpc_client\n"); 665 } 666 in_rpc_init = false; 667 } 668#endif 669 } 670} 671