1/**
2 * \file
3 * \brief Barrelfish library initialization.
4 */
5
6/*
7 * Copyright (c) 2007-2012, ETH Zurich.
8 * Copyright (c) 2014, HP Labs.
9 * All rights reserved.
10 *
11 * This file is distributed under the terms in the attached LICENSE file.
12 * If you do not find this file, copies can be found by writing to:
13 * ETH Zurich D-INFK, CAB F.78, Universitaetstrasse 6, CH-8092 Zurich,
14 * Attn: Systems Group.
15 */
16
17#include <stdio.h>
18#include <barrelfish/barrelfish.h>
19#include <barrelfish/idc.h>
20#include <barrelfish/dispatch.h>
21#include <barrelfish/curdispatcher_arch.h>
22#include <barrelfish/dispatcher_arch.h>
23#include <barrelfish_kpi/dispatcher_shared.h>
24#include <barrelfish/terminal.h>
25#include <barrelfish/morecore.h>
26#include <barrelfish/monitor_client.h>
27#include <barrelfish/nameservice_client.h>
28#include <barrelfish/spawn_client.h>
29#include <barrelfish/systime.h>
30#include <barrelfish_kpi/domain_params.h>
31#include <if/monitor_defs.h>
32#ifdef ARRAKIS
33#include <if/hyper_defs.h>
34#endif
35#include <trace/trace.h>
36#include <octopus/init.h>
37#include "threads_priv.h"
38#include "init.h"
39
40/// Are we the init domain (and thus need to take some special paths)?
41static bool init_domain;
42
43extern size_t (*_libc_terminal_read_func)(char *, size_t);
44extern size_t (*_libc_terminal_write_func)(const char *, size_t);
45extern void (*_libc_exit_func)(int);
46extern void (*_libc_assert_func)(const char *, const char *, const char *, int);
47
48static bool pagesize_ok(size_t pagesize)
49{
50    if (pagesize == BASE_PAGE_SIZE
51#ifdef __x86_64__
52        || pagesize == HUGE_PAGE_SIZE
53#endif
54        || pagesize == LARGE_PAGE_SIZE)
55    {
56        return true;
57    }
58    return false;
59}
60
61void libc_exit(int);
62
63__weak_reference(libc_exit, _exit);
64void libc_exit(int status)
65{
66    errval_t err;
67
68    if (!init_domain) {
69        terminal_exit();
70    }
71
72    // Use spawnd if spawned through spawnd
73    if(disp_get_domain_id() == 0) {
74#if 0 // XXX: revocation goes through the mon, but monitor ep is revoked in the process
75        err = cap_revoke(cap_dispatcher);
76        if (err_is_fail(err)) {
77            DEBUG_ERR(err, "revoking dispatcher failed in _Exit, spinning!");
78	    //sys_print("revoking dispatcher failed in _Exit, spinning!", 100);
79	    while (1) {}
80        }
81        err = cap_delete(cap_dispatcher);
82        DEBUG_ERR(err, "deleting dispatcher failed in _Exit, spinning!");
83        //sys_print("deleting dispatcher failed in _Exit, spinning!", 100);
84#endif
85
86        // XXX: Leak all other domain allocations
87    } else {
88        err = spawn_exit(status);
89        if (err_is_fail(err)) {
90            DEBUG_ERR(err, "spawn_exit");
91        }
92    }
93
94    thread_exit(status);
95    // If we're not dead by now, we wait
96    while (1) {}
97}
98
99static void libc_assert(const char *expression, const char *file,
100                        const char *function, int line)
101{
102    char buf[512];
103    size_t len;
104
105    /* Formatting as per suggestion in C99 spec 7.2.1.1 */
106    len = snprintf(buf, sizeof(buf), "Assertion failed on core %d in %.*s: %s,"
107                   " function %s, file %s, line %d.\n",
108                   disp_get_core_id(), DISP_NAME_LEN,
109                   disp_name(), expression, function, file, line);
110    sys_print(buf, len < sizeof(buf) ? len : sizeof(buf));
111}
112
113/* Set libc function pointers */
114void barrelfish_libc_glue_init(void)
115{
116    _libc_terminal_read_func = terminal_read;
117    _libc_terminal_write_func = terminal_write;
118    _libc_exit_func = libc_exit;
119    _libc_assert_func = libc_assert;
120    /* morecore func is setup by morecore_init() */
121
122    // XXX: set a static buffer for stdout
123    // this avoids an implicit call to malloc() on the first printf
124    static char buf[BUFSIZ];
125    setvbuf(stdout, buf, _IOLBF, sizeof(buf));
126}
127
128static void monitor_bind_cont(void *st, errval_t err, struct monitor_binding *b);
129#ifdef ARRAKIS
130static void hyper_bind_cont(void *st, errval_t err, struct hyper_binding *b);
131#endif
132
133#ifdef CONFIG_TRACE
134errval_t trace_my_setup(void)
135{
136#ifndef TRACING_EXISTS
137    return SYS_ERR_OK;
138#else
139    errval_t err;
140
141    struct capref cap = {
142        .cnode  = cnode_task,
143        .slot   = TASKCN_SLOT_TRACEBUF
144    };
145
146    if (disp_get_core_id() >= TRACE_COREID_LIMIT) {
147        // can't support tracing on this core. sorry :(
148        return TRACE_ERR_UNAVAIL;
149    }
150
151    err = vspace_map_one_frame((void**)&trace_buffer_master, TRACE_ALLOC_SIZE,
152                               cap, NULL, NULL);
153    if (err_is_fail(err)) {
154        DEBUG_ERR(err, "vspace_map_one_frame for master trace buffer failed");
155        return err;
156    }
157    assert(trace_buffer_master != 0);
158
159    trace_buffer_va = trace_buffer_master +
160        (disp_get_core_id() * TRACE_PERCORE_BUF_SIZE);
161
162    dispatcher_handle_t handle = curdispatcher();
163    struct dispatcher_generic *disp = get_dispatcher_generic(handle);
164    // Update pointer to trace buffer in child's dispatcher
165    disp->trace_buf = (struct trace_buffer *)trace_buffer_va;
166
167    return SYS_ERR_OK;
168#endif
169}
170#endif
171
172static bool request_done = false;
173
174static bool parse_argv(struct spawn_domain_params *params, size_t *morecore_alignment)
175{
176    assert(params);
177    // grab pagesize config from argv if available
178    size_t morecore_pagesize = MORECORE_PAGESIZE;
179    int i = 1;
180    bool found = false;
181    for (; i < params->argc; i++) {
182        if (!found) {
183            if (!strncmp(params->argv[i], "morecore=", 9)) {
184                morecore_pagesize = strtol(params->argv[i]+9, NULL, 0);
185                // check for valid page size
186                switch (morecore_pagesize) {
187#ifdef __x86_64__
188                    case HUGE_PAGE_SIZE:
189#endif
190                    case BASE_PAGE_SIZE:
191                    case LARGE_PAGE_SIZE:
192                        break;
193                    default:
194                        morecore_pagesize = MORECORE_PAGESIZE;
195                }
196                found = true;
197            }
198        } else {
199            // found so move all other args one to the front
200            params->argv[i-1] = params->argv[i];
201        }
202    }
203    if (found) {
204        params->argc -= 1;
205    }
206
207    if (morecore_alignment) {
208        *morecore_alignment = morecore_pagesize;
209    }
210
211    return found;
212}
213
214/** \brief Initialise libbarrelfish.
215 *
216 * This runs on a thread in every domain, after the dispatcher is setup but
217 * before main() runs.
218 */
219errval_t barrelfish_init_onthread(struct spawn_domain_params *params)
220{
221    errval_t err;
222
223    // do we have an environment?
224    if (params != NULL && params->envp[0] != NULL) {
225        extern char **environ;
226        environ = params->envp;
227    }
228
229    // Init default waitset for this dispatcher
230    struct waitset *default_ws = get_default_waitset();
231    waitset_init(default_ws);
232
233    // Initialize ram_alloc state
234    ram_alloc_init();
235    /* All domains use smallcn to initialize */
236    err = ram_alloc_set(ram_alloc_fixed);
237    if (err_is_fail(err)) {
238        return err_push(err, LIB_ERR_RAM_ALLOC_SET);
239    }
240
241    err = vspace_current_init(init_domain);
242    if (err_is_fail(err)) {
243        return err_push(err, LIB_ERR_VSPACE_INIT);
244    }
245
246    err = slot_alloc_init();
247    if (err_is_fail(err)) {
248        return err_push(err, LIB_ERR_SLOT_ALLOC_INIT);
249    }
250
251    // reconstruct our pmap from data passed to us by our parent
252    if (params != NULL && params->vspace_buf != NULL) {
253        struct pmap *pmap = get_current_pmap();
254        err = pmap->f.deserialise(pmap, params->vspace_buf,
255                                  params->vspace_buf_len);
256        if (err_is_fail(err)) {
257            return err_push(err, LIB_ERR_VSPACE_INIT);
258        }
259    } else if (init_domain) {
260        // TODO: the kernel boots us with a deterministic pmap structure: use it
261    }
262
263    if (init_domain) {
264        // we cannot use large pages in the init domains because we are not
265        // connected to the memory server and need to work with the 4k pages
266        // in the base cn.
267        err = morecore_init(BASE_PAGE_SIZE);
268    } else {
269        /* if there is a pagesize supplied, use this one */
270        size_t morecore_pagesize = 0;
271        if (params != NULL && params->pagesize) {
272            morecore_pagesize =  params->pagesize;
273
274            debug_printf("%s: Using supplied pagesize: %zu\n", __FUNCTION__, morecore_pagesize);
275
276            if (!pagesize_ok(morecore_pagesize)) {
277                debug_printf("Supplied pagesize not available on current arch, falling back to 4kB pages\n");
278                morecore_pagesize = BASE_PAGE_SIZE;
279            }
280
281            assert(pagesize_ok(morecore_pagesize));
282
283        } else {
284            parse_argv(params, &morecore_pagesize);
285#if defined(__i386__) && !defined(CONFIG_PSE)
286            morecore_pagesize = BASE_PAGE_SIZE;
287#endif
288        }
289        err = morecore_init(morecore_pagesize);
290    }
291    if (err_is_fail(err)) {
292        return err_push(err, LIB_ERR_MORECORE_INIT);
293    }
294
295    lmp_endpoint_init();
296
297    // init domains only get partial init
298    if (init_domain) {
299        return SYS_ERR_OK;
300    }
301
302    /* bind to monitor */
303    struct monitor_lmp_binding *mcb =
304        malloc(sizeof(struct monitor_lmp_binding));
305    assert(mcb != NULL);
306    set_monitor_binding(&mcb->b);
307
308    errval_t init_complete_err;
309
310    request_done = false;
311    err = monitor_client_lmp_bind(mcb, monitor_bind_cont, &init_complete_err,
312                                  default_ws, DEFAULT_LMP_BUF_WORDS);
313    if (err_is_fail(err)) {
314        return err_push(err, LIB_ERR_MONITOR_CLIENT_BIND);
315    }
316
317    // dispatch events on the waitset until monitor binding completes
318    while (!request_done) {
319        err = event_dispatch(default_ws);
320        if (err_is_fail(err)) {
321            return err_push(err, LIB_ERR_EVENT_DISPATCH);
322        }
323    }
324
325    if(err_is_fail(init_complete_err)) {
326        USER_PANIC_ERR(err, "during initialization");
327    }
328
329    idc_init();
330
331    /* Bind with monitor's blocking rpc channel */
332    err = monitor_client_blocking_rpc_init();
333    if (err_is_fail(err)) {
334        return err_push(err, LIB_ERR_MONITOR_RPC_BIND);
335    }
336
337#ifndef ARRAKIS
338    // should only do this for arrakis domains after we have connection to
339    // hypervisor service
340    /* XXX: Setup the channel with mem_serv and use the channel instead */
341    err = ram_alloc_set(NULL);
342    if (err_is_fail(err)) {
343        return err_push(err, LIB_ERR_RAM_ALLOC_SET);
344    }
345
346    // switch morecore to intended configuration
347    err = morecore_reinit();
348    if (err_is_fail(err)) {
349        return err_push(err, LIB_ERR_MORECORE_INIT);
350    }
351#endif
352
353#ifdef CONFIG_TRACE
354    err = trace_my_setup();
355    if (err_no(err) == TRACE_ERR_UNAVAIL) {
356        debug_printf("Tracing not available for core %d, consider increasing TRACE_COREID_LIMIT\n",
357                disp_get_core_id());
358    } else if (err_is_fail(err)) {
359        DEBUG_ERR(err, "trace_my_setup failed");
360        return err;
361    }
362#endif
363
364    // try to connect to name service (may fail if we are the skb or ramfsd!)
365    err = nameservice_client_blocking_bind();
366    if (err_is_fail(err)) {
367        if (err_no(err) == LIB_ERR_GET_NAME_IREF) {
368            // skip everything else if we don't have a nameservice
369            return SYS_ERR_OK;
370        } else {
371            return err_push(err, LIB_ERR_NAMESERVICE_CLIENT_INIT);
372        }
373    }
374
375#ifdef ARRAKIS
376    /* connect to hypervisor service */
377    char hyper[256];
378    snprintf(hyper, 256, "arrakis.%d.hyper", disp_get_core_id());
379    hyper[255] = 0;
380    iref_t hyper_iref;
381    err = nameservice_blocking_lookup(hyper, &hyper_iref);
382    if (err_is_fail(err)) {
383        DEBUG_ERR(err, "hyper ns lookup");
384    }
385    assert(err_is_ok(err));
386    request_done = false;
387    struct hyper_binding *hb;
388    err = hyper_bind(hyper_iref, hyper_bind_cont, &hb, default_ws,
389            IDC_BIND_FLAG_RPC_CAP_TRANSFER);
390    assert(err_is_ok(err));
391    while (!request_done) {
392        messages_wait_and_handle_next();
393    }
394    set_hyper_binding(hb);
395    hyper_rpc_client_init(hb);
396    struct capref dispframe = {
397        .cnode = cnode_task,
398        .slot = TASKCN_SLOT_DISPFRAME,
399    };
400    struct frame_identity fi;
401    err = frame_identify(dispframe, &fi);
402    assert(err_is_ok(err));
403    debug_printf("registering with hypervisor using %"PRIu64"\n",
404            fi.base);
405    err = hb->rpc_tx_vtbl.register_client(hb, fi.base);
406    assert(err_is_ok(err));
407
408    // connect to mem_serv
409    err = ram_alloc_set(NULL);
410    if (err_is_fail(err)) {
411        return err_push(err, LIB_ERR_RAM_ALLOC_SET);
412    }
413
414    // switch morecore to intended configuration
415    err = morecore_reinit();
416    if (err_is_fail(err)) {
417        return err_push(err, LIB_ERR_MORECORE_INIT);
418    }
419#endif
420
421    // init terminal
422    err = terminal_init();
423    if (err_is_fail(err)) {
424        return err_push(err, LIB_ERR_TERMINAL_INIT);
425    }
426
427    // Init domain spanning code
428    err = domain_init();
429    if (err_is_fail(err)) {
430        return err_push(err, LIB_ERR_DOMAIN_INIT);
431    }
432
433    // XXX: Record text/data mappings from environment
434    char *p = getenv("ARRAKIS_PMAP");
435    if(p != NULL) {
436        struct morecore_state *mcstate = get_morecore_state();
437        for(mcstate->v2p_entries = 0; *p != '\0'; mcstate->v2p_entries++) {
438            assert(mcstate->v2p_entries < MAX_V2P_MAPPINGS);
439            struct v2pmap *e = &mcstate->v2p_mappings[mcstate->v2p_entries];
440            int r = sscanf(p, "%" PRIxGENVADDR ":%" PRIxGENPADDR ":%zx ", &e->va, &e->pa, &e->size);
441            assert(r == 3);
442            p = strchr(p, ' ') + 1;
443        }
444    }
445    return err;
446}
447
448static void monitor_bind_cont(void *st, errval_t err, struct monitor_binding *b)
449{
450    // hacky errval_t state pointer used to signal completion
451    errval_t *init_complete_err = st;
452
453    assert(!init_domain);
454    *init_complete_err = err;
455
456    // signal completion
457    request_done = true;
458}
459
460#ifdef ARRAKIS
461static void hyper_bind_cont(void *st, errval_t err, struct hyper_binding *b)
462{
463    struct hyper_binding **hb = st;
464    *hb = b;
465    request_done = true;
466}
467#endif
468
469/**
470 *  \brief Initialise libbarrelfish, while disabled.
471 *
472 * This runs on the dispatcher's stack, while disabled, before the dispatcher is
473 * setup. We can't call anything that needs to be enabled (ie. cap invocations)
474 * or uses threads. This is called from crt0.
475 */
476void barrelfish_init_disabled(dispatcher_handle_t handle, bool init_dom_arg);
477void barrelfish_init_disabled(dispatcher_handle_t handle, bool init_dom_arg)
478{
479    init_domain = init_dom_arg;
480    disp_init_disabled(handle);
481    thread_init_disabled(handle, init_dom_arg);
482}
483