1/*
2 * Copyright (c) 2014 ETH Zurich.
3 * All rights reserved.
4 *
5 * This file is distributed under the terms in the attached LICENSE file.
6 * If you do not find this file, copies can be found by writing to:
7 * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group.
8 */
9#include <stdio.h>
10#include <string.h>
11#include <stdlib.h>
12#include <limits.h>
13
14#include <barrelfish/barrelfish.h>
15#include <barrelfish/ump_chan.h>
16#include <bench/bench.h>
17#include <barrelfish/sys_debug.h>
18#include <dma/dma.h>
19#include <dma/dma_request.h>
20#include <dma/client/dma_client_device.h>
21#include <dma/dma_manager_client.h>
22
23#include "benchmark.h"
24
25
26static inline cycles_t calculate_time(cycles_t tsc_start,
27                                      cycles_t tsc_end)
28{
29    cycles_t result;
30    if (tsc_end < tsc_start) {
31        result = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead();
32    } else {
33        result = (tsc_end - tsc_start - bench_tscoverhead());
34    }
35    return result;
36}
37
38
39void xphi_bench_start_echo(struct ump_chan *chan)
40{
41    errval_t err;
42
43    volatile struct ump_message *msg;
44    volatile struct ump_message *msg_recv;
45
46    struct ump_control ctrl;
47    msg = ump_chan_get_next(chan, &ctrl);
48
49    // send initiator message
50    debug_printf("signal ready.\n");
51    msg->data[0] = 123;
52    msg->header.control = ctrl;
53
54    debug_printf("xphi_bench_start_echo: receiving messages.\n");
55#ifdef XPHI_BENCH_CHECK_STOP
56    uint64_t data = 0x0;
57    while (data != XPHI_BENCH_STOP_FLAG) {
58#else
59    while(true) {
60#endif
61        err = ump_chan_recv(chan, &msg_recv);
62        if (err_is_ok(err)) {
63            XPHI_BENCH_DBG("received ump message [%p]\n", msg_recv);
64            msg = ump_chan_get_next(chan, &ctrl);
65            msg->header.control = ctrl;
66#ifdef XPHI_BENCH_CHECK_STOP
67            data = msg_recv->data[0];
68#endif
69        }
70    }
71    if (data == XPHI_BENCH_STOP_FLAG) {
72        debug_printf("xphi_bench_start_echo: received stop flag.\n");
73    }
74}
75
76
77errval_t xphi_bench_start_initator_rtt(struct ump_chan *chan)
78{
79    errval_t err;
80    cycles_t tsc_start, tsc_end;
81    cycles_t result;
82    uint64_t tscperus;
83    bench_ctl_t *ctl;
84
85    volatile struct ump_message *msg;
86
87    bench_init();
88
89    err = sys_debug_get_tsc_per_ms(&tscperus);
90    assert(err_is_ok(err));
91    tscperus /= 1000;
92
93    ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, XPHI_BENCH_RTT_NUM_ROUNDS);
94
95    debug_printf("RTT benchmark: waiting for ready signal.\n");
96    while (1) {
97        err = ump_chan_recv(chan, &msg);
98        if (err_is_ok(err)) {
99            break;
100        }
101    }
102
103    struct ump_control ctrl;
104
105    debug_printf("Starting RTT benchmark tsc/us=%lu\n", tscperus);
106    uint32_t rep_counter = 0;
107    do {
108        if (!(rep_counter++ % (XPHI_BENCH_RTT_NUM_ROUNDS / 10))) {
109            debug_printf("  > run %u of %u...\n", rep_counter,
110                         XPHI_BENCH_RTT_NUM_ROUNDS);
111        }
112        tsc_start = bench_tsc();
113        msg = ump_chan_get_next(chan, &ctrl);
114        msg->header.control = ctrl;
115        do {
116            err = ump_chan_recv(chan, &msg);
117        } while (err_is_fail(err));
118        tsc_end = bench_tsc();
119        result = calculate_time(tsc_start, tsc_end);
120
121    } while (!bench_ctl_add_run(ctl, &result));
122
123#ifdef XPHI_BENCH_CHECK_STOP
124    msg = ump_chan_get_next(chan, &ctrl);
125    msg->data[0] = XPHI_BENCH_STOP_FLAG;
126    msg->header.control = ctrl;
127#endif
128    bench_ctl_dump_analysis(ctl, 0, "RTT", tscperus);
129
130    return SYS_ERR_OK;
131}
132
133#if 0
134errval_t xphi_bench_start_initator_sync(struct bench_bufs *bufs,
135                                        struct ump_chan *uc)
136{
137    errval_t err;
138
139    cycles_t tsc_start, tsc_end;
140    cycles_t result;
141    uint64_t tscperus;
142    bench_ctl_t *ctl;
143
144    volatile struct ump_message *msg;
145    uint64_t buf_idx;
146
147    bench_init();
148
149    uint32_t n_recv = 0;
150
151    err = sys_debug_get_tsc_per_ms(&tscperus);
152    assert(err_is_ok(err));
153    tscperus /= 1000;
154
155    ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, XPHI_BENCH_NUM_REPS);
156
157    debug_printf("Sync Throughput Benchmark: waiting for ready signal...\n");
158    while (1) {
159        err = ump_chan_recv(uc, &msg);
160        if (err_is_ok(err)) {
161            break;
162        }
163    }
164
165    struct ump_control ctrl;
166
167    debug_printf("Starting sync throughput benchmark. tsc/us=%lu\n", tscperus);
168    uint32_t rep_counter = 0;
169    do {
170        uint64_t b_idx = 0;
171
172        debug_printf("  > run %u of %u with %u moves...\n", rep_counter++,
173        XPHI_BENCH_NUM_REPS,
174                     XPHI_BENCH_NUM_RUNS);
175
176        tsc_start = bench_tsc();
177
178        msg = ump_chan_get_next(uc, &ctrl);
179        struct bench_buf *buf = &bufs->buf[b_idx];
180        xphi_bench_fill_buffer(buf, 1);
181
182        // send initiator message
183        XPHI_BENCH_DBG("sending message [%lu]\n", b_idx);
184        msg->data[0] = b_idx;
185        msg->header.control = ctrl;
186        n_recv = 0;
187        for (uint32_t irun = 0; irun < (XPHI_BENCH_NUM_RUNS - 1); ++irun) {
188            do {
189                err = ump_chan_recv(uc, &msg);
190            } while (err_is_fail(err));
191
192            n_recv++;
193            buf_idx = msg->data[0];
194            uint32_t ret_count = 0;
195            buf = &bufs->buf[b_idx];
196            xphi_bench_read_buffer(buf, 1, &ret_count);
197            XPHI_BENCH_DBG("received message [%lu]\n", buf_idx);
198            assert(buf_idx == b_idx);
199            b_idx = (b_idx + 1) & (bufs->num - 1);
200
201            buf = &bufs->buf[b_idx];
202            xphi_bench_fill_buffer(buf, 1);
203
204            XPHI_BENCH_DBG("sending message [%lu]\n", b_idx);
205            msg = ump_chan_get_next(uc, &ctrl);
206            assert(msg);
207            msg->data[0] = b_idx;
208            msg->header.control = ctrl;
209        }
210
211        while (n_recv < XPHI_BENCH_NUM_RUNS) {
212            err = ump_chan_recv(uc, &msg);
213            if (err_is_ok(err)) {
214                buf_idx = msg->data[0];
215                XPHI_BENCH_DBG("received message [%"PRIu64"]\n", buf_idx);
216                buf = &bufs->buf[buf_idx];
217                uint32_t ret_count = 0;
218                xphi_bench_read_buffer(buf, 1, &ret_count);
219                n_recv++;
220            }
221        }
222        tsc_end = bench_tsc();
223        result = calculate_time(tsc_start, tsc_end);
224    } while (!bench_ctl_add_run(ctl, &result));
225
226#ifdef XPHI_BENCH_CHECK_STOP
227    msg = ump_chan_get_next(uc, &ctrl);
228    msg->data[0] = XPHI_BENCH_STOP_FLAG;
229    msg->header.control = ctrl;
230#endif
231
232    double avg_s = bench_avg(ctl->data, ctl->result_count) / tscperus;
233    avg_s /= 1000000;
234
235// bench_ctl_dump_csv(ctl, "", tscperus);
236    bench_ctl_dump_analysis(ctl, 0, "Sync Throughput", tscperus);
237    printf("Average seconds: %f\n", avg_s);
238    printf("Average throughput: %f GByte/s\n",
239           (((double) (XPHI_BENCH_NUM_RUNS * XPHI_BENCH_BUF_SIZE)) / 1024 / 1024
240            / 1024)
241           / (avg_s));
242    printf("Average throughput (with processing): %f GByte/s\n",
243           (XPHI_BENCH_NUM_RUNS * ((double) (XPHI_BENCH_NUM_RUNS
244                           * XPHI_BENCH_BUF_SIZE))
245            / 1024 / 1024 / 1024)
246           / (avg_s));
247
248    return SYS_ERR_OK;
249}
250
251errval_t xphi_bench_start_initator_async(struct bench_bufs *bufs,
252                                         struct ump_chan *uc)
253{
254    volatile struct ump_message *msg;
255    uint64_t buf_idx;
256    uint32_t in_transit = 0;
257
258    errval_t err;
259
260    bench_init();
261
262    cycles_t tsc_start;
263    cycles_t result;
264    uint64_t tscperus;
265    bench_ctl_t *ctl;
266
267    err = sys_debug_get_tsc_per_ms(&tscperus);
268    assert(err_is_ok(err));
269    tscperus /= 1000;
270
271    debug_printf("tscperus = %lu\n", tscperus);
272
273    ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, XPHI_BENCH_NUM_REPS);
274
275    debug_printf("waiting for ready signal\n");
276    while (1) {
277        err = ump_chan_recv(uc, &msg);
278        if (err_is_ok(err)) {
279            break;
280        }
281    }
282
283    debug_printf("starting benchmark ASYNC...\n");
284
285    struct ump_control ctrl;
286
287    uint32_t rep_counter = 0;
288    do {
289        uint64_t b_idx = 0;
290        debug_printf("  > run %u of %u with %u moves...\n", rep_counter++,
291        XPHI_BENCH_NUM_REPS,
292                     XPHI_BENCH_NUM_RUNS);
293        tsc_start = bench_tsc();
294
295        uint32_t irun = 0;
296        uint32_t n_recv = 0;
297        struct bench_buf *buf;
298        while (irun < XPHI_BENCH_NUM_RUNS) {
299            if (in_transit < XPHI_BENCH_MSG_NUM) {
300                msg = ump_chan_get_next(uc, &ctrl);
301                if (!msg) {
302                    continue;
303                }
304                buf = &bufs->buf[b_idx];
305                xphi_bench_fill_buffer(buf, 1);
306                XPHI_BENCH_DBG("sending message [%lu] %p\n", b_idx, msg);
307                msg->data[0] = b_idx;
308                msg->header.control = ctrl;
309                irun++;
310                in_transit++;
311                b_idx = (b_idx + 1) & (bufs->num - 1);
312            }
313
314            err = ump_chan_recv(uc, &msg);
315            if (err_is_ok(err)) {
316                buf_idx = msg->data[0];
317                XPHI_BENCH_DBG("receiving message [%"PRIu64"]\n", buf_idx);
318                buf = &bufs->buf[buf_idx];
319                uint32_t ret_count = 0;
320                xphi_bench_read_buffer(buf, 1, &ret_count);
321                in_transit--;
322                n_recv++;
323            }
324        }
325
326        while (n_recv < XPHI_BENCH_NUM_RUNS) {
327            err = ump_chan_recv(uc, &msg);
328            if (err_is_ok(err)) {
329                buf_idx = msg->data[0];
330                buf = &bufs->buf[buf_idx];
331                uint32_t ret_count = 0;
332                XPHI_BENCH_DBG("receiving message [%lu]\n", buf_idx);
333                xphi_bench_read_buffer(buf, 1, &ret_count);
334                in_transit--;
335                n_recv++;
336            }
337        }
338
339        result = bench_tsc();
340        if (result - tsc_start > bench_tscoverhead()) {
341            debug_printf("%lu %lu", result - tsc_start, bench_tscoverhead());
342        }
343        if (result < tsc_start) {
344            result = (LONG_MAX - tsc_start) + result - bench_tscoverhead();
345        } else {
346            result = (result - tsc_start - bench_tscoverhead());
347        }
348
349        assert(in_transit == 0);
350    } while (!bench_ctl_add_run(ctl, &result));
351
352#ifdef XPHI_BENCH_CHECK_STOP
353    msg = ump_chan_get_next(uc, &ctrl);
354    msg->data[0] = XPHI_BENCH_STOP_FLAG;
355    msg->header.control = ctrl;
356#endif
357
358    double avg_s = bench_avg(ctl->data, ctl->result_count) / tscperus;
359    avg_s /= 1000000;
360
361// bench_ctl_dump_csv(ctl, "", tscperus);
362    bench_ctl_dump_analysis(ctl, 0, "ASync Throughput", tscperus);
363    printf("Average seconds: %f\n", avg_s);
364    printf("Average throughput: %f GByte/s\n",
365           (((double) (XPHI_BENCH_NUM_RUNS * XPHI_BENCH_BUF_SIZE)) / 1024 / 1024
366            / 1024)
367           / (avg_s));
368    printf("Average throughput (with processing): %f GByte/s\n",
369           (XPHI_BENCH_NUM_RUNS * ((double) (XPHI_BENCH_NUM_RUNS
370                           * XPHI_BENCH_BUF_SIZE))
371            / 1024 / 1024 / 1024)
372           / (avg_s));
373
374    return SYS_ERR_OK;
375}
376
377#endif
378