1/**
2 * \file
3 * \brief API to use the bomp library
4 */
5
6/*
7 * Copyright (c)2014 ETH Zurich.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
13 */
14#include <barrelfish/barrelfish.h>
15
16
17#include <bomp_internal.h>
18/*
19 * NOTES
20 *
21 * - nodes and worker store their local state in the TLS or binding
22 * - master thread stores it in global variable
23 *
24 *      master threads
25 *         - list of nodes -> execute on
26 *         - if having list of workers -> execute on
27 *
28 *
29 */
30
31
32/**
33 * \brief initializes the BOMP library using the indicated cores of the BM
34 *
35 * \param coresbm		bitmap representing the cores to run on
36 * \param stack_size	size of the thread's stack in bytes
37 *
38 * \returns	0 on SUCCESS
39 *          non-zero on FAILURE
40 */
41int bomp_init_cores(void*coresbm, size_t stack_size)
42{
43	assert(!"NYI");
44	return 0;
45}
46
47/**
48 * \brief initializes the BOMP library with the given stack sizes
49 *
50 * \param stack_size	size of the thread's stack in bytes
51 *
52 * \returns	0 on SUCCESS
53 *          non-zero on FAILURE
54 *
55 * This function will use the first nthreads cores to run on
56 */
57int bomp_init_varstack(unsigned int nthreads, size_t stack_size)
58{
59	/// get the maximum number of cores
60	if (numa_available() != 0) {
61        return -1;
62	}
63
64	if (nthreads == BOMP_THREADS_ALL) {
65	    nthreads = numa_num_configured_cpus();
66	}
67
68    nodeid_t node_current = numa_current_node();
69    nodeid_t node_count = 1;
70
71#if 0
72
73    if (nthreads > numa_num_node_cpus(node_current)) {
74        coreid_t threads_remaining = nthreads - numa_num_node_cpus(node_current);
75        /* determine the number of needed nodes */
76        for (nodeid_t node = 0; node <= numa_max_node(); ++node) {
77            if (node == node_current) {
78                continue;
79            }
80            node_count++;
81            if (threads_remaining < numa_num_node_cpus(node)) {
82                threads_remaining = 0;
83                break;
84            } else {
85                threads_remaining -= numa_num_node_cpus(node);
86            }
87
88        }
89
90        if (threads_remaining) {
91            /* Not enough cores availabel to serve the nthreads request */
92            BOMP_ERROR("not enough cores available: need %" PRIuCOREID " more \n",
93                       threads_remaining);
94            return -2;
95        }
96    }
97#endif
98
99
100    BOMP_DEBUG_INIT("Initializing BOMP with a %" PRIuNODEID " nodes of %" PRIuCOREID
101                    " threads\n", node_count, nthreads);
102
103    bomp_icv_init_default((coreid_t)nthreads);
104
105    struct bomp_tls *tls = calloc(1, sizeof(struct bomp_tls));
106    if (tls == NULL) {
107        return LIB_ERR_MALLOC_FAIL;
108    }
109
110    tls->role = BOMP_THREAD_ROLE_MASTER;
111    tls->self = thread_self();
112
113    if (node_count > 1) {
114        tls->r.master.nodes = calloc(node_count, sizeof(struct bomp_node));
115        if (tls->r.master.nodes == NULL) {
116            free(tls);
117            return LIB_ERR_MALLOC_FAIL;
118        }
119        tls->r.master.num_nodes = node_count - 1;
120    } else {
121        tls->r.master.num_nodes = 0;
122        tls->r.master.nodes = NULL;
123    }
124
125    tls->icv.global = &g_omp_icv_global_default;
126    tls->icv.device = &g_omp_icv_device_default;
127    tls->icv.task = NULL;
128
129    thread_set_tls(tls);
130
131    // divide the threads equally among the nodes
132    coreid_t threads_per_node = (coreid_t)(nthreads / node_count);
133
134#if 0
135    coreid_t master_threads = numa_num_node_cpus(node_current);
136#else
137    coreid_t master_threads = nthreads;
138#endif
139
140    if (master_threads > threads_per_node) {
141        master_threads = threads_per_node;
142    }
143
144    nthreads -= master_threads;
145
146    nodeid_t numa_node = 0;
147    for (nodeid_t node = 1; node < node_count; ++node) {
148        if (numa_node == node_current) {
149            numa_node++;
150        }
151        coreid_t num_threads = numa_num_node_cpus(node);
152        if (num_threads > threads_per_node) {
153            num_threads = threads_per_node;
154        }
155
156        bomp_node_init(BOMP_NODE_LOCAL, numa_node, node, num_threads,
157                       stack_size, &tls->r.master.nodes[node]);
158
159        nthreads -= num_threads;
160    }
161
162    /* now all the other threads should have been initialized */
163    assert(nthreads == 0);
164
165    /* initialize the local node */
166    bomp_node_init(BOMP_NODE_MASTER, node_current, 0, master_threads,
167                   stack_size, &tls->r.master.local);
168
169    // the master thread is active
170    tls->r.master.local.threads_active = 1;
171    tls->r.master.nodes_active = 1;
172
173    return 0;
174}
175