1/**
2 * \file
3 * \brief Implementation of backend functions on barrelfish
4 */
5
6/*
7 * Copyright (c) 2007, 2008, 2009, 2010, ETH Zurich.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15#include <barrelfish/barrelfish.h>
16#include <barrelfish/dispatch.h>
17#include <stdio.h>
18#include <inttypes.h>
19#include <barrelfish/sys_debug.h>
20#include <barrelfish/resource_ctrl.h>
21#include <bomp_internal.h>
22
23// 1MB stack
24#define STACK_BYTES (1UL << 20)
25
26static struct bomp_state *bomp_st;
27
28static rsrcid_t my_rsrc_id;
29
30//static const char *my_manifest = "B 1\n"             // Normal phase
31//                "G 80 160 80 480\n";// Gang phase
32
33static void set_numa(unsigned id)
34{
35    /* nop */
36}
37
38static size_t thread_stack_size = 0;
39
40static void bomp_run_on(int core_id,
41                        void* cfunc,
42                        void *arg)
43{
44    int actual_id = core_id + disp_get_core_id();
45    thread_func_t func = (thread_func_t) cfunc;
46
47    errval_t err = domain_thread_create_on_varstack(actual_id, func, arg,
48                                                    thread_stack_size, NULL);
49    if (err_is_fail(err)) {
50        DEBUG_ERR(err, "domain_thread_create_on failed");
51        printf("domain_thread_create_on failed on %d\n", actual_id);
52        assert(err_is_ok(err));
53    }
54}
55
56static int bomp_thread_fn(void *xdata)
57{
58    struct bomp_work *work_data = xdata;
59
60    g_bomp_state->backend.set_numa(work_data->thread_id);
61
62    bomp_set_tls(work_data);
63    work_data->fn(work_data->data);
64    /* Wait for the Barrier */
65    bomp_barrier_wait(work_data->barrier);
66    thread_detach(thread_self());
67    thread_exit(0); // XXX: should return work_fn return value?
68    return 0;
69}
70
71#define THREAD_OFFSET   0
72/* #define THREAD_OFFSET   12 */
73
74void bomp_start_processing(void (*fn)(void *),
75                           void *data,
76                           unsigned nthreads)
77{
78    assert(g_bomp_state);
79
80    /* Create Threads and ask them to process the function specified */
81    /* Let them die as soon as they are done */
82    unsigned i;
83    struct bomp_work *xdata;
84    struct bomp_barrier *barrier;
85
86    g_bomp_state->num_threads = nthreads;
87
88    char *memory = calloc(
89                    1,
90                    nthreads * sizeof(struct bomp_thread_local_data *)
91                                    + sizeof(struct bomp_barrier)
92                                    + nthreads * sizeof(struct bomp_work));
93    assert(memory != NULL);
94
95    g_bomp_state->tld = (struct bomp_thread_local_data **) memory;
96    memory += nthreads * sizeof(struct bomp_thread_local_data *);
97
98    /* Create a barier for the work that will be carried out by the threads */
99    barrier = (struct bomp_barrier *) memory;
100    memory += sizeof(struct bomp_barrier);
101    bomp_barrier_init(barrier, nthreads);
102
103    /* For main thread */
104    xdata = (struct bomp_work *) memory;
105    memory += sizeof(struct bomp_work);
106
107    xdata->fn = fn;
108    xdata->data = data;
109    xdata->thread_id = 0;
110    xdata->barrier = barrier;
111    bomp_set_tls(xdata);
112
113    for (i = 1; i < nthreads; i++) {
114        xdata = (struct bomp_work *) memory;
115        memory += sizeof(struct bomp_work);
116
117        xdata->fn = fn;
118        xdata->data = data;
119        xdata->thread_id = i;
120        xdata->barrier = barrier;
121
122        /* Create threads */
123        bomp_run_on(i * BOMP_DEFAULT_CORE_STRIDE + THREAD_OFFSET, bomp_thread_fn,
124                    xdata);
125    }
126}
127
128void bomp_end_processing(void)
129{
130    /* Cleaning of thread_local and work data structures */
131    int i = 0;
132
133    bomp_barrier_wait(g_bomp_state->tld[i]->work->barrier);
134
135    /* Clear the barrier created */
136    bomp_clear_barrier(g_bomp_state->tld[i]->work->barrier);
137
138    free(g_bomp_state->tld);
139    g_bomp_state->tld = NULL;
140    g_bomp_state->num_threads = 1;
141}
142
143static struct thread_sem init_sem = THREAD_SEM_INITIALIZER
144;
145
146static int remote_init(void *dumm)
147{
148//    errval_t err = rsrc_join(my_rsrc_id);
149    //assert(err_is_ok(err));
150
151    thread_sem_post(&init_sem);
152    thread_detach(thread_self());
153    return 0;
154}
155
156static int cores_initialized = 1;
157
158static void domain_init_done(void *arg,
159                             errval_t err)
160{
161    assert(err_is_ok(err));
162    cores_initialized++;
163}
164
165static void bomp_span_domain(int nos_threads,
166                             size_t stack_size)
167{
168    int my_core_id = disp_get_core_id();
169
170    errval_t err;
171
172    // Remember default stack size
173    thread_stack_size = stack_size;
174
175    // Submit manifest (derived from program)
176//     = rsrc_manifest(my_manifest, &my_rsrc_id);
177//
178//    if (err_is_fail(err)) {
179 //       DEBUG_ERR(err, "rsrc_manifest");
180 //       abort();
181 //   }
182
183    /* Span domain to all cores */
184    for (int i = 1; i < nos_threads; ++i) {
185        //for (int i = my_core_id + BOMP_DEFAULT_CORE_STRIDE; i < nos_threads + my_core_id; i++) {
186        coreid_t core = my_core_id + (i * BOMP_DEFAULT_CORE_STRIDE);
187        err = domain_new_dispatcher(core, domain_init_done, NULL);
188        if (err_is_fail(err)) {
189            DEBUG_ERR(err, "failed to span domain");
190            printf("Failed to span domain to %d\n", i);
191            assert(err_is_ok(err));
192        }
193    }
194
195    while (cores_initialized < nos_threads) {
196        thread_yield();
197    }
198
199    /* Run a remote init function on remote cores */
200    //for (int i = my_core_id + 1; i < nos_threads + my_core_id; i++) {
201    for (int i = 1; i < nos_threads; ++i) {
202        coreid_t core = my_core_id + (i * BOMP_DEFAULT_CORE_STRIDE);
203        err = domain_thread_create_on(core, remote_init, NULL, NULL);
204        if (err_is_fail(err)) {
205            DEBUG_ERR(err, "domain_thread_create_on failed");
206            printf("domain_thread_create_on failed on %d\n", i);
207            assert(err_is_ok(err));
208        }
209        thread_sem_wait(&init_sem);
210    }
211}
212
213static void bomp_synchronize(void)
214{
215    /* if(GOMP_single_start()) { */
216    errval_t err = rsrc_phase(my_rsrc_id, 1);
217    assert(err_is_ok(err));
218    /* } */
219}
220
221int bomp_bomp_init(uint32_t nthreads)
222{
223    return bomp_bomp_init_varstack(nthreads, STACK_BYTES);
224}
225
226int bomp_bomp_init_varstack(uint32_t nthreads, size_t stack_size)
227{
228
229    if (bomp_st != NULL) {
230        debug_printf("bomp_bomp_init: already initialized\n");
231        return 0;
232    }
233
234    debug_printf("bomp_bomp_init: nthreads=%u\n", nthreads);
235
236    bomp_st = calloc(1, sizeof(struct bomp_state));
237    if (bomp_st == NULL) {
238        return -1;
239    }
240
241    // make sure stack size is multiple of page size
242    stack_size = ROUND_UP(stack_size, BASE_PAGE_SIZE);
243
244    g_thread_limit = nthreads;
245
246    bomp_st->backend_type = BOMP_BACKEND_BOMP;
247    bomp_st->backend.get_thread = (backend_get_thread_fn_t) thread_self;
248    bomp_st->backend.get_tls = thread_get_tls;
249    bomp_st->backend.set_tls = thread_set_tls;
250    bomp_st->backend.set_numa = set_numa;
251    bomp_st->backend.thread_exit = thread_exit;
252    bomp_st->backend.synchronize = bomp_synchronize;
253    bomp_st->backend.start_processing = bomp_start_processing;
254    bomp_st->backend.end_processing = bomp_end_processing;
255    bomp_common_init(bomp_st);
256    g_bomp_state = bomp_st;
257    bomp_span_domain(nthreads, stack_size);
258    return 0;
259}
260
261int bomp_run_main(main_func_t mainfunc, void *mainarg, size_t stacksize)
262{
263    // we need to create a thread with a big enough stack on Barrelfish as the
264    // default stack size for threads is 64kB which is nowhere near enough for
265    // the fft code.
266    int retval;
267    struct thread *mt = thread_create_varstack(mainfunc, mainarg, stacksize);
268    errval_t err = thread_join(mt, &retval);
269    assert(err_is_ok(err));
270    return retval;
271}
272
273struct bomp_state * bomp_get_backend_state_bomp(void)
274{
275    return bomp_st;
276}
277