1/**
2 * \file
3 * \brief Memory server
4 */
5
6/*
7 * Copyright (c) 2007, 2008, 2009, 2010, 2011, ETH Zurich.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15#include <stdlib.h>
16#include <stdio.h>
17#include <inttypes.h>
18#include <barrelfish/barrelfish.h>
19#include <barrelfish/dispatch.h>
20#include <skb/skb.h>
21#include <mm/mm.h>
22#include <trace/trace.h>
23#include <trace_definitions/trace_defs.h>
24#include <barrelfish/morecore.h>
25#include <barrelfish/monitor_client.h>
26
27#include <if/mem_defs.h>
28#include <if/monitor_defs.h>
29
30
31//#define OSDI18_PAPER_HACK 1
32
33size_t mem_total = 0, mem_avail = 0;
34
35
36//// XXX HACK for OSDI PAPER!!! BAD!
37#ifdef OSID18_PAPER_HACK
38static struct capref model_mem_cap;
39static genpaddr_t model_mem_base = 0;
40static genpaddr_t model_mem_limit = 0;
41#endif
42
43/* parameters for size of supported RAM and thus required storage */
44// architecture, we use paddr_t as the type to represent region
45// limits, which limits us its size.
46#if defined(__x86_64__)
47// x86_64 usually supports 48 bits of physical address space, maybe figure
48// this out dynamically? -SG,2014-04-30
49#       define MAXSIZEBITS     48              ///< Max size of memory in allocator
50#elif defined(__i386__)
51#       define MAXSIZEBITS     32
52#elif defined(__arm__)
53/* XXX This is better if < 32! - but there were no compile time warnings! */
54#       define MAXSIZEBITS     31
55#elif defined(__aarch64__)
56#       define MAXSIZEBITS     48
57#else
58#       error Unknown architecture
59#endif
60
61#define OBJBITS_DISPATCHER 10
62#define MINSIZEBITS     OBJBITS_DISPATCHER ///< Min size of each allocation
63#define MAXCHILDBITS    4               ///< Max branching of BTree nodes
64
65/// Maximum depth of the BTree, assuming only branching by two at each level
66#define MAXDEPTH        (MAXSIZEBITS - MINSIZEBITS + 1)
67/// Maximum number of BTree nodes
68#define NNODES          ((1UL << MAXDEPTH) - 1)
69
70/* Parameters for per-core memserv */
71#define PERCORE_BITS 24
72#define PERCORE_MEM (1UL<<PERCORE_BITS)           ///< How much memory per-core
73
74//static struct multi_slot_allocator msa;
75static struct bootinfo *bi;
76
77/**
78 * \brief Size of CNodes to be created by slot allocator.
79 *
80 * Must satisfy both:
81 *    #CNODE_BITS >= MAXCHILDBITS           (cnode enough for max branching factor)
82 *    (1UL << #CNODE_BITS) ** 2 >= #NNODES  (total number of slots is enough)
83 */
84#define CNODE_BITS      13
85#define NCNODES         (1UL << CNODE_BITS)     ///< Maximum number of CNodes
86
87/// Watermark at which we must refill the slab allocator used for nodes
88#define MINSPARENODES   (MAXDEPTH * 8) // XXX: FIXME: experimentally determined!
89
90/// MM allocator instance data
91static struct mm mm_ram;
92
93/// Slot allocator for MM
94static struct slot_prealloc ram_slot_alloc;
95
96static errval_t mymm_alloc(struct capref *ret, uint8_t bits, genpaddr_t minbase,
97                           genpaddr_t maxlimit)
98{
99    errval_t err;
100
101    assert(bits >= MINSIZEBITS);
102
103    if(maxlimit == 0) {
104        err = mm_alloc(&mm_ram, bits, ret, NULL);
105    } else {
106        err = mm_alloc_range(&mm_ram, bits, minbase, maxlimit, ret, NULL);
107    }
108
109    return err;
110}
111
112static errval_t mymm_free(struct capref ramcap, genpaddr_t base, uint8_t bits)
113{
114    errval_t ret;
115    genpaddr_t mem_to_add;
116
117    mem_to_add = (genpaddr_t)1 << bits;
118
119    ret = mm_free(&mm_ram, ramcap, base, bits);
120    if (err_is_fail(ret)) {
121        if (err_no(ret) == MM_ERR_NOT_FOUND) {
122            // memory wasn't there initially, add it
123            ret = mm_add(&mm_ram, ramcap, bits, base);
124            if (err_is_fail(ret)) {
125                /* DEBUG_ERR(ret, "failed to add RAM to allocator"); */
126                return ret;
127            }
128            mem_total += mem_to_add;
129        } else {
130            /* DEBUG_ERR(ret, "failed to free RAM in allocator"); */
131            return ret;
132        }
133    }
134
135    mem_avail += mem_to_add;
136
137    return SYS_ERR_OK;
138}
139
140
141/// state for a pending reply
142// because we have only one message that we send to a client, and there can only
143// be one outstanding per binding (because this is an RPC interface) this is
144// quite simple
145struct pending_reply {
146    struct mem_binding *b;
147    errval_t err;
148    struct capref *cap;
149};
150
151
152static void retry_free_reply(void *arg)
153{
154    struct pending_reply *r = arg;
155    assert(r != NULL);
156    struct mem_binding *b = r->b;
157    errval_t err;
158
159    err = b->tx_vtbl.free_monitor_response(b, NOP_CONT, r->err);
160    if (err_is_ok(err)) {
161        b->st = NULL;
162        free(r);
163    } else if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
164        err = b->register_send(b, get_default_waitset(),
165                               MKCONT(retry_free_reply,r));
166    }
167
168    if (err_is_fail(err)) {
169        DEBUG_ERR(err, "failed to reply to free request");
170        free(r);
171    }
172}
173
174static void allocate_response_done(void *arg)
175{
176    struct capref *cap = arg;
177
178    if(!capref_is_null(*cap)) {
179        errval_t err = cap_delete(*cap);
180        if(err_is_fail(err) && err_no(err) != SYS_ERR_CAP_NOT_FOUND) {
181            DEBUG_ERR(err, "cap_delete after send. This memory will leak.");
182        }
183    }
184
185    free(cap);
186}
187
188static void retry_reply(void *arg)
189{
190    struct pending_reply *r = arg;
191    assert(r != NULL);
192    struct mem_binding *b = r->b;
193    errval_t err;
194
195    err = b->tx_vtbl.allocate_response(b, MKCONT(allocate_response_done, r->cap),
196                                       r->err, *r->cap);
197    if (err_is_ok(err)) {
198        b->st = NULL;
199        free(r);
200    } else if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
201        err = b->register_send(b, get_default_waitset(), MKCONT(retry_reply,r));
202        assert(err_is_ok(err));
203    } else {
204        DEBUG_ERR(err, "failed to reply to memory request");
205        allocate_response_done(r->cap);
206    }
207}
208
209
210
211static void mem_free_handler(struct mem_binding *b,
212                             struct capref ramcap, genpaddr_t base,
213                             uint8_t bits)
214{
215    errval_t ret;
216    errval_t err;
217
218#ifdef OSDI18_PAPER_HACK
219    if (base >= model_mem_base && base + (1UL << bits) - 1 <= model_mem_limit) {
220        debug_printf(
221                "//// XXX HACK for OSDI PAPER!!! Use mem cap for [%lx..%lx]\n",
222                base, base + (1UL << bits) - 1);
223        ret = SYS_ERR_OK;
224    } else {
225        ret = mymm_free(ramcap, base, bits);
226    }
227#else
228    ret = mymm_free(ramcap, base, bits);
229#endif
230
231    err = b->tx_vtbl.free_monitor_response(b, NOP_CONT, ret);
232    if (err_is_fail(err)) {
233        if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
234            struct pending_reply *r = malloc(sizeof(struct pending_reply));
235            assert(r != NULL);
236            r->b = b;
237            r->err = ret;
238            err = b->register_send(b, get_default_waitset(),
239                                   MKCONT(retry_free_reply,r));
240            assert(err_is_ok(err));
241        } else {
242            DEBUG_ERR(err, "failed to reply to free request");
243        }
244    }
245}
246
247
248static void mem_available_handler(struct mem_binding *b)
249{
250    errval_t err;
251    /* Reply */
252    err = b->tx_vtbl.available_response(b, NOP_CONT, mem_avail, mem_total);
253    if (err_is_fail(err)) {
254        // FIXME: handle FLOUNDER_ERR_TX_BUSY
255        DEBUG_ERR(err, "failed to reply to memory request");
256    }
257
258}
259
260// FIXME: error handling (not asserts) needed in this function
261static void mem_allocate_handler(struct mem_binding *b, uint8_t bits,
262                                 genpaddr_t minbase, genpaddr_t maxlimit)
263{
264    struct capref *cap = malloc(sizeof(struct capref));
265    errval_t err, ret;
266
267    // TODO: do this properly and inform caller, -SG 2016-04-20
268    // XXX: Do we even want to have this restriction here? It's not necessary
269    // for types that are not mappable (e.g. Dispatcher)
270    //if (bits < BASE_PAGE_BITS) {
271    //    bits = BASE_PAGE_BITS;
272    //}
273    //if (bits < BASE_PAGE_BITS) {
274    //    debug_printf("WARNING: ALLOCATING RAM CAP WITH %u BITS\n", bits);
275    //}
276
277    trace_event(TRACE_SUBSYS_MEMSERV, TRACE_EVENT_MEMSERV_ALLOC, bits);
278
279    /* refill slot allocator if needed */
280    err = slot_prealloc_refill(mm_ram.slot_alloc_inst);
281    if (err_is_fail(err)) {
282        DEBUG_ERR(err, "slot_prealloc_refill in mem_allocate_handler");
283    }
284    assert(err_is_ok(err));
285
286    /* refill slab allocator if needed */
287    while (slab_freecount(&mm_ram.slabs) <= MINSPARENODES) {
288        struct capref frame;
289        err = slot_alloc(&frame);
290        assert(err_is_ok(err));
291        err = frame_create(frame, BASE_PAGE_SIZE * 8, NULL);
292        assert(err_is_ok(err));
293        void *buf;
294        err = vspace_map_one_frame(&buf, BASE_PAGE_SIZE * 8, frame, NULL, NULL);
295        if (err_is_fail(err)) {
296            DEBUG_ERR(err, "vspace_map_one_frame failed");
297            assert(buf);
298        }
299        slab_grow(&mm_ram.slabs, buf, BASE_PAGE_SIZE * 8);
300    }
301
302#ifdef OSDI18_PAPER_HACK
303    //// XXX HACK for OSDI PAPER!!! BAD!
304    if (minbase >= model_mem_base && maxlimit <= model_mem_limit) {
305        debug_printf("//// XXX HACK for OSDI PAPER!!! Use mem cap for [%lx..%lx]\n",
306                     minbase, maxlimit);
307
308        ret = slot_alloc_prealloc(mm_ram.slot_alloc_inst, 1, cap);
309        if (err_is_ok(ret)) {
310            debug_printf("//// XXX HACK for OSDI PAPER!!! %lx Offset=%lu M, bits=%u\n",
311                         (minbase - model_mem_base), (minbase - model_mem_base) >> 20, bits);
312
313            debug_printf("//// XXX HACK for OSDI PAPER!!! [%lx..%lx]\n",
314                         model_mem_base, model_mem_limit);
315            debug_printf("//// XXX HACK for OSDI PAPER!!! [%lx..%lx]\n",
316                         minbase, maxlimit);
317            debug_printf("//// XXX HACK for OSDI PAPER!!! [%lx..%lx]\n",
318                         minbase, (1UL << bits));
319
320
321            ret = cap_retype(*cap, model_mem_cap, (minbase - model_mem_base),
322                             ObjType_RAM, (1UL << bits), 1);
323            if (err_is_fail(ret)) {
324                *cap = NULL_CAP;
325            }
326        }
327    } else {
328#endif
329        ret = mymm_alloc(cap, bits, minbase, maxlimit);
330        if (err_is_ok(ret)) {
331            mem_avail -= 1UL << bits;
332        } else {
333            // DEBUG_ERR(ret, "allocation of %d bits in % " PRIxGENPADDR "-%" PRIxGENPADDR " failed",
334            //          bits, minbase, maxlimit);
335            *cap = NULL_CAP;
336        }
337#ifdef OSDI18_PAPER_HACK
338    }
339#endif
340
341
342
343    /* Reply */
344    err = b->tx_vtbl.allocate_response(b, MKCONT(allocate_response_done, cap),
345                                       ret, *cap);
346    if (err_is_fail(err)) {
347        if (err_no(err) == FLOUNDER_ERR_TX_BUSY) {
348            struct pending_reply *r = malloc(sizeof(struct pending_reply));
349            assert(r != NULL);
350            r->b = b;
351            r->err = ret;
352            r->cap = cap;
353            err = b->register_send(b, get_default_waitset(), MKCONT(retry_reply,r));
354            assert(err_is_ok(err));
355        } else {
356            DEBUG_ERR(err, "failed to reply to memory request");
357            allocate_response_done(cap);
358        }
359    }
360}
361
362static void dump_ram_region(int idx, struct mem_region* m)
363{
364#if 0
365    uintptr_t start, limit;
366
367    start = (uintptr_t)m->mr_base;
368    limit = start + m->mr_bytes;
369
370    char prefix = ' ';
371    size_t quantity = m->mr_bytes;
372
373    if (m->mr_bytes >= (1UL << 30)) {
374        prefix = 'G';
375        quantity >>= 30;
376    }
377    else if (m->mr_bytes >= (1UL << 20)) {
378        prefix = 'M';
379        quantity >>= 20;
380    }
381    else if (m->mr_bytes >= (1UL << 10)) {
382        prefix = 'K';
383        quantity >>= 10;
384    }
385
386    printf("RAM region %d: 0x%" PRIxPTR
387           " - 0x%" PRIxPTR " (%zu %cB, %u bits)\n",
388           idx, start, limit, quantity, prefix, log2ceil(m->mr_bytes));
389#endif // 0
390}
391
392static genpaddr_t find_smallest_address(void)
393{
394    bool isFirst = true;
395    genpaddr_t smallest_addr = 0;
396//
397    for (int i = 0; i < bi->regions_length; i++) {
398        if (bi->regions[i].mr_type != RegionType_Empty) {
399            continue;
400        }
401
402        if (bi->regions[i].mr_consumed) {
403            continue;
404        }
405
406        if (isFirst) {
407            smallest_addr = bi->regions[i].mr_base;
408            isFirst = false;
409            continue;
410        }
411
412        if (smallest_addr > bi->regions[i].mr_base) {
413            smallest_addr = bi->regions[i].mr_base;
414        }
415    } // end for: for every record
416    return smallest_addr;
417} // end function: find_smallest_address
418
419static genpaddr_t guess_physical_addr_start(void)
420{
421    genpaddr_t start_physical = find_smallest_address();
422#if defined(__arm__)
423    if (start_physical > 0x80000000) {
424        // This is most probably a pandaboard!
425        start_physical = 0x80000000;
426    } else {
427        // This is gem5 or some other architecture
428        start_physical = 0;
429    }
430#else
431    start_physical = 0;
432#endif
433    return start_physical;
434} // end function: guess_physical_addr_start
435
436// FIXME: error handling (not asserts) needed in this function
437//XXX: workaround for inline bug of arm-gcc 4.6.1 and lower
438#if defined(__ARM_ARCH_7A__) && defined(__GNUC__) \
439	&& __GNUC__ == 4 && __GNUC_MINOR__ <= 6 && __GNUC_PATCHLEVEL__ <= 1
440static __attribute__((noinline)) errval_t
441#else
442static errval_t
443#endif
444initialize_ram_alloc(void)
445{
446    errval_t err;
447
448    /* Initialize slot allocator by passing a L2 cnode cap for it to start with */
449    // Use ROOTCN_SLOT_SLOT_ALLOC0 as initial cnode for mm slot allocator
450    struct capref cnode_start_cap = {
451        .cnode = {
452            .croot = CPTR_ROOTCN,
453            .cnode = ROOTCN_SLOT_ADDR(ROOTCN_SLOT_SLOT_ALLOC0),
454            .level = CNODE_TYPE_OTHER,
455        },
456        .slot  = 0,
457    };
458
459    /* init slot allocator */
460    err = slot_prealloc_init(&ram_slot_alloc, MAXCHILDBITS,
461                             cnode_start_cap, L2_CNODE_SLOTS,
462                             &mm_ram);
463    assert(err_is_ok(err));
464
465    err = mm_init(&mm_ram, ObjType_RAM, guess_physical_addr_start(),
466                  MAXSIZEBITS, MAXCHILDBITS, NULL,
467                  slot_alloc_prealloc, NULL, &ram_slot_alloc, true);
468    assert(err_is_ok(err));
469
470    /* give MM allocator static storage to get it started */
471    static char nodebuf[SLAB_STATIC_SIZE(MINSPARENODES, MM_NODE_SIZE(MAXCHILDBITS))];
472    slab_grow(&mm_ram.slabs, nodebuf, sizeof(nodebuf));
473
474    /* walk bootinfo and add all unused RAM caps to allocator */
475    struct capref mem_cap = {
476        .cnode = cnode_super,
477        .slot  = 0,
478    };
479
480    for (int i = 0; i < bi->regions_length; i++) {
481        if (bi->regions[i].mr_type == RegionType_Empty) {
482
483#ifdef OSDI18_PAPER_HACK
484            //// XXX HACK for OSDI PAPER!!! BAD!
485            if ( bi->regions[i].mr_base >= (4UL << 30)) {
486                //// XXX HACK for OSDI PAPER!!! BAD!
487                debug_printf("//// XXX HACK for OSDI PAPER!!! Use mem cap for model allocs [%lx..%lx]\n",
488                             bi->regions[i].mr_base, bi->regions[i].mr_base + bi->regions[i].mr_bytes - 1);
489                dump_ram_region(i, bi->regions + i);
490
491                model_mem_cap = mem_cap;
492                model_mem_base = bi->regions[i].mr_base;
493                model_mem_limit = model_mem_base + bi->regions[i].mr_bytes - 1;
494                break;
495            }
496
497            debug_printf("Adding region to memory allocator:\n");
498#endif
499            dump_ram_region(i, bi->regions + i);
500
501            mem_total += bi->regions[i].mr_bytes;
502
503            if (bi->regions[i].mr_consumed) {
504                // region consumed by init, skipped
505                mem_cap.slot++;
506                continue;
507            }
508
509            err = mm_add_multi(&mm_ram, mem_cap, bi->regions[i].mr_bytes,
510                               bi->regions[i].mr_base);
511            if (err_is_ok(err)) {
512                mem_avail += bi->regions[i].mr_bytes;
513            } else {
514                DEBUG_ERR(err, "Warning: adding RAM region %d (%p/%zu) FAILED",
515                        i, bi->regions[i].mr_base, bi->regions[i].mr_bytes);
516            }
517
518            /* try to refill slot allocator (may fail if the mem allocator is empty) */
519            err = slot_prealloc_refill(mm_ram.slot_alloc_inst);
520            if (err_is_fail(err) && err_no(err) != MM_ERR_SLOT_MM_ALLOC) {
521                DEBUG_ERR(err, "in slot_prealloc_refill() while initialising"
522                        " memory allocator");
523                abort();
524            }
525
526            /* refill slab allocator if needed and possible */
527            if (slab_freecount(&mm_ram.slabs) <= MINSPARENODES
528                    && mem_avail > (1UL << (CNODE_BITS + OBJBITS_CTE)) * 2
529                    + 10 * BASE_PAGE_SIZE) {
530                slab_default_refill(&mm_ram.slabs); // may fail
531            }
532
533            mem_cap.slot++;
534        }
535    }
536
537    err = slot_prealloc_refill(mm_ram.slot_alloc_inst);
538    if (err_is_fail(err)) {
539        printf("Fatal internal error in RAM allocator: failed to initialise "
540               "slot allocator\n");
541        DEBUG_ERR(err, "failed to init slot allocator");
542        abort();
543    }
544
545    printf("RAM allocator initialised, %zd MB (of %zd MB) available\n",
546           mem_avail / 1024 / 1024, mem_total / 1024 / 1024);
547
548    return SYS_ERR_OK;
549}
550
551static void export_callback(void *st, errval_t err, iref_t iref)
552{
553    assert(err_is_ok(err));
554    struct monitor_binding *mb = get_monitor_binding();
555    err = mb->tx_vtbl. set_mem_iref_request(mb, NOP_CONT, iref);
556    assert(err_is_ok(err));
557}
558
559static struct mem_rx_vtbl rx_vtbl = {
560    .allocate_call = mem_allocate_handler,
561    .available_call = mem_available_handler,
562    .free_monitor_call = mem_free_handler,
563};
564
565static bool do_rpc_init = false;
566
567static errval_t connect_callback(void *st, struct mem_binding *b)
568{
569    do_rpc_init = true;
570    b->rx_vtbl = rx_vtbl;
571    // TODO: set error handler
572    return SYS_ERR_OK;
573}
574
575int main(int argc, char ** argv)
576{
577    errval_t err;
578    struct waitset *ws = get_default_waitset();
579
580    if(argc < 2) {
581        fprintf(stderr, "Usage: %s <bootinfo_location>\n", argv[0]);
582        return EXIT_FAILURE;
583    }
584
585    // First argument contains the bootinfo location
586    bi = (struct bootinfo*)strtol(argv[1], NULL, 10);
587
588    /* construct special-case LMP connection to monitor */
589    static struct monitor_lmp_binding mcb;
590    set_monitor_binding(&mcb.b);
591
592    err = monitor_client_lmp_accept(&mcb, ws, DEFAULT_LMP_BUF_WORDS);
593    if(err_is_fail(err)) {
594        USER_PANIC_ERR(err, "monitor_client_lmp_accept");
595    }
596
597    idc_init();
598
599    /* Send the cap for this endpoint to init, who will pass it to
600       the monitor */
601    err = lmp_ep_send0(cap_initep, 0, mcb.chan.local_cap);
602    if(err_is_fail(err)) {
603        USER_PANIC_ERR(err, "lmp_ep_send0");
604    }
605
606    // XXX: handle messages (ie. block) until the monitor binding is ready
607    while (capref_is_null(mcb.chan.remote_cap)) {
608        err = event_dispatch(ws);
609        if (err_is_fail(err)) {
610            DEBUG_ERR(err, "in event_dispatch while waiting for monitor");
611            return EXIT_FAILURE;
612        }
613    }
614
615    /* Initialize our own memory allocator */
616    err = ram_alloc_set(mymm_alloc);
617    if(err_is_fail(err)) {
618        USER_PANIC_ERR(err, "ram_alloc_set");
619    }
620
621    err = initialize_ram_alloc();
622    if(err_is_fail(err)) {
623        USER_PANIC_ERR(err, "initialize_ram_alloc");
624    }
625
626    err = mem_export(NULL, export_callback, connect_callback, ws,
627                     IDC_EXPORT_FLAGS_DEFAULT);
628    if(err_is_fail(err)) {
629        USER_PANIC_ERR(err, "mem_export");
630    }
631
632    /* initialise tracing */
633#if defined(TRACING_EXISTS) && defined(CONFIG_TRACE)
634    err = trace_my_setup();
635    if (err_is_fail(err)) {
636        DEBUG_ERR(err, "initialising tracing");
637        // return EXIT_FAILURE;
638    }
639    trace_init_disp();
640#endif
641
642
643    // handle messages on this thread
644    while (true) {
645        err = event_dispatch(ws);
646        if (err_is_fail(err)) {
647            DEBUG_ERR(err, "in main event_dispatch loop");
648            return EXIT_FAILURE;
649        }
650
651#if 0
652        static bool in_rpc_init = false;
653        if (do_rpc_init && !in_rpc_init && !get_monitor_blocking_binding()) {
654            // XXX: this is an ugly hack try and get a monitor rpc client once
655            // the monitor is ready
656            in_rpc_init = true;
657            do_rpc_init = false;
658            /* Bind with monitor's blocking rpc channel */
659            err = monitor_client_blocking_rpc_init();
660            if (err_is_fail(err)) {
661                DEBUG_ERR(err, "monitor_client_blocking_rpc_init");
662            }
663            else {
664                debug_printf("got monitor_blocking_rpc_client\n");
665            }
666            in_rpc_init = false;
667        }
668#endif
669    }
670}
671