1/**
2 * \file
3 * \brief RCCE library
4 */
5
6/*
7 * Copyright (c) 2009, 2010, ETH Zurich.
8 * All rights reserved.
9 *
10 * This file is distributed under the terms in the attached LICENSE file.
11 * If you do not find this file, copies can be found by writing to:
12 * ETH Zurich D-INFK, Haldeneggsteig 4, CH-8092 Zurich. Attn: Systems Group.
13 */
14
15#include <barrelfish/barrelfish.h>
16#include <rcce/RCCE.h>
17#include <rcce/RCCE_lib.h>
18#include "internal.h"
19
20/// Define this to measure time spent in RCCE comm vs. program
21//#define MEASURE_TIME
22//#define MEASURE_DATA
23
24//......................................................................................
25// GLOBAL VARIABLES USED BY THE LIBRARY
26//......................................................................................
27int       RCCE_NP;               // number of participating cores
28double    RC_REFCLOCKGHZ;        // baseline CPU frequency (GHz)
29int       RC_MY_COREID;          // physical ID of calling core
30int       RC_COREID[RCCE_MAXNP]; // array of physical core IDs for all participating
31                                 // cores, sorted by rank
32int       RCCE_IAM=-1;           // rank of calling core (invalid by default)
33RCCE_COMM RCCE_COMM_WORLD;       // predefined global communicator
34int       RCCE_BUFF_SIZE;        // available MPB size
35t_vcharp  RCCE_comm_buffer[RCCE_MAXNP]; // starts of MPB, sorted by rank
36// ......................... non-GORY communication mode .............................
37// synchronization flags are predefined and maintained by the library
38RCCE_FLAG RCCE_sent_flag[RCCE_MAXNP], RCCE_ready_flag[RCCE_MAXNP];
39
40#ifdef MEASURE_TIME
41static double measure_start, measure_rcce_time;
42#endif
43
44#ifdef MEASURE_DATA
45#       define MAX_PHASES       10
46static size_t measure_rcce_data[MAX_PHASES][RCCE_MAXNP];
47#endif
48int rcce_curphase = 0;
49
50int id_compare(
51  const void *e1, // first element to be compared
52  const void *e2  // second element to be compared
53  ) {
54  int v1 = *(int *)e1;
55  int v2 = *(int *)e2;
56  return(v1<v2) ? -1 : (v1>v2) ? 1 : 0;
57}
58
59static int MYCOREID(void)
60{
61    return disp_get_core_id();
62}
63
64int RCCE_init(int *argc, char ***argv)
65{
66    int ue;
67    void *nothing = NULL;
68
69    assert(*argc >= 3);
70
71    setup_routes(*argc, *argv);
72
73    // save pointer to executable name for later insertion into the argument list
74    char *executable_name = (*argv)[0];
75
76    RCCE_NP        = atoi(*(++(*argv)));
77    RC_REFCLOCKGHZ = atof(*(++(*argv)));
78
79    if(RC_REFCLOCKGHZ == 0) {
80        printf("Barrelfish RCCE extension: Computing reference clock GHz automatically...\n");
81        uint64_t tscperms;
82        errval_t err = sys_debug_get_tsc_per_ms(&tscperms);
83        assert(err_is_ok(err));
84        RC_REFCLOCKGHZ = ((double)tscperms) / 1000000.0;
85        printf("Reference clock computed to be %.2g\n", RC_REFCLOCKGHZ);
86    }
87
88    // put the participating core ids (unsorted) into an array
89    for (ue=0; ue<RCCE_NP; ue++) {
90        RC_COREID[ue] = atoi(*(++(*argv)));
91    }
92
93    // make sure executable name is as expected
94    (*argv)[0] = executable_name;
95
96    RC_MY_COREID = MYCOREID();
97
98    // adjust apparent number of command line arguments, so it will appear to main
99    // program that number of UEs, clock frequency, and core ID list were not on
100    // command line
101    *argc -= RCCE_NP+2;
102
103    // sort array of participating phyical core IDs to determine their ranks
104    qsort((char *)RC_COREID, RCCE_NP, sizeof(int), id_compare);
105
106    // determine rank of calling core
107    for (ue=0; ue<RCCE_NP; ue++) {
108        if (RC_COREID[ue] == RC_MY_COREID) RCCE_IAM = ue;
109    }
110
111    // leave in one reassuring debug print
112    printf("My rank is %d, physical core ID is %d\n", RCCE_IAM, RC_MY_COREID);
113    if (RCCE_IAM<0) {
114        return(RCCE_ERROR_CORE_NOT_IN_HOSTFILE);
115    }
116
117    // create global communicator (equivalent of MPI_COMM_WORLD); this will also allocate
118    // the two synchronization flags associated with the global barrier
119    RCCE_comm_split(RCCE_global_color, nothing, &RCCE_COMM_WORLD);
120
121#ifdef MEASURE_TIME
122    measure_start = RCCE_wtime();
123    measure_rcce_time = 0.0;
124#endif
125
126#ifdef MEASURE_DATA
127    memset(measure_rcce_data, 0, sizeof(measure_rcce_data));
128#endif
129
130    return (RCCE_SUCCESS);
131}
132
133int RCCE_ue(void)
134{
135    return (RCCE_IAM);
136}
137
138int RCCE_num_ues(void)
139{
140    return RCCE_NP;
141}
142
143double RCCE_wtime(void)
144{
145  return ( ((double)rdtsc())/((double)RC_REFCLOCKGHZ*1.e9));
146}
147
148int RCCE_barrier(RCCE_COMM *comm)
149{
150    assert(comm == &RCCE_COMM_WORLD);
151    if(RCCE_debug_synch) {
152        printf("UE %d has checked into barrier\n", RCCE_IAM);
153    }
154    if(RCCE_NP != 1) {
155        barrier_wait();
156    }
157    if(RCCE_debug_synch) {
158        printf("UE %d has cleared barrier\n", RCCE_IAM);
159    }
160    return (RCCE_SUCCESS);
161}
162
163int RCCE_send(char *privbuf, size_t size, int dest)
164{
165#ifdef MEASURE_TIME
166    double send_start = RCCE_wtime();
167#endif
168
169    if (dest<0 || dest >= RCCE_NP) {
170        return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
171    }
172
173    errval_t err = send_message(privbuf, size, RC_COREID[dest]);
174    assert(err_is_ok(err));
175
176#ifdef MEASURE_TIME
177    measure_rcce_time += RCCE_wtime() - send_start;
178#endif
179
180#ifdef MEASURE_DATA
181    measure_rcce_data[rcce_curphase][dest] += size;
182#endif
183
184    return (RCCE_SUCCESS);
185}
186
187#ifdef RCCE_PERF_MEASURE
188#       include <barrelfish/dispatcher_arch.h>
189#       include <barrelfish/curdispatcher_arch.h>
190#       define PERF(x)  d->timestamp[x] = rdtsc()
191#       define PERFM(x) x
192#else
193#       define PERF(x)
194#       define PERFM(x)
195#endif
196
197int RCCE_recv(char *privbuf, size_t size, int source)
198{
199    errval_t err;
200#ifdef MEASURE_TIME
201    double recv_start = RCCE_wtime();
202#endif
203
204#ifdef RCCE_PERF_MEASURE
205    dispatcher_handle_t handle = curdispatcher();
206    struct dispatcher_shared_generic* d =
207        get_dispatcher_shared_generic(handle);
208#endif
209
210    if (source<0 || source >= RCCE_NP) {
211        return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
212    }
213
214    int core_id = RC_COREID[source];
215    struct msg_buf *mb = &msgbuf[core_id];
216#ifdef BULK_TRANSFER_ENABLED
217    mb->bulk_ready = true;
218    mb->length = size;
219    mb->current = 0;
220    mb->msg = privbuf;
221#endif
222
223    dprintf("%d: R(%lu,%d,%p,%d,%p)\n", my_core_id, size, source, mb, mb->pending, privbuf);
224
225#ifdef BULK_TRANSFER_ENABLED
226    err = barray[core_id]->tx_vtbl.bulk_recv_ready(barray[core_id], NOP_CONT,
227                                                   my_core_id, size);
228    assert(err_is_ok(err));
229#endif
230
231    PERF(30);
232
233    while(!mb->pending) {
234        messages_wait_and_handle_next();
235    }
236
237    PERF(31);
238
239    dprintf("%d: msg arrived\n", my_core_id);
240
241    /* if(size <= DEFAULT_UMP_BUFLEN) { */
242#ifndef BULK_TRANSFER_ENABLED
243        assert(size == mb->length);
244        memcpy(privbuf, mb->msg, size);
245    /* } else { */
246#else
247        assert(mb->bulk);
248#endif
249    /* } */
250    mb->pending = false;
251
252#ifndef BULK_TRANSFER_ENABLED
253    assert(!mb->bulk);
254    free(mb->msg);
255    PERF(32);
256    err = barray[core_id]->tx_vtbl.message_reply(barray[core_id],
257                                                 NOP_CONT, my_core_id);
258    PERF(33);
259    assert(err_is_ok(err));
260#else
261    assert(mb->bulk);
262#endif
263
264#ifdef MEASURE_TIME
265    measure_rcce_time += RCCE_wtime() - recv_start;
266#endif
267
268    return (RCCE_SUCCESS);
269}
270
271int RCCE_finalize(void)
272{
273#ifdef MEASURE_TIME
274    double measure_end = RCCE_wtime();
275    printf("%d: Time spent in RCCE communication %.5g seconds. "
276           "%.5g seconds total program run-time.\n", RCCE_ue(),
277           measure_rcce_time, measure_end - measure_start);
278#endif
279
280#ifdef MEASURE_DATA
281    for(int phase = 0; phase < MAX_PHASES; phase++) {
282        printf("%d: Phase %d: ", RCCE_ue(), phase);
283        for(int i = 0; i < RCCE_NP; i++) {
284            printf("%lu ", measure_rcce_data[phase][i]);
285        }
286        printf("\n");
287    }
288#endif
289
290    return (RCCE_SUCCESS);
291}
292
293int RCCE_flag_alloc(RCCE_FLAG *flag)
294{
295    /* printf("Warning: RCCE_flag_alloc ignored\n"); */
296    return (RCCE_SUCCESS);
297}
298
299int RCCE_flag_free(RCCE_FLAG *flag)
300{
301    printf("Warning: RCCE_flag_free ignored\n");
302    return (RCCE_SUCCESS);
303}
304