1/*
2 * Copyright (c) 2007-12 ETH Zurich.
3 * All rights reserved.
4 *
5 * This file is distributed under the terms in the attached LICENSE file.
6 * If you do not find this file, copies can be found by writing to:
7 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
8 */
9#include <stdio.h>
10#include <string.h>
11#include <stdlib.h>
12
13#include <barrelfish/barrelfish.h>
14#include <barrelfish/ump_chan.h>
15#include <bench/bench.h>
16#include <barrelfish/sys_debug.h>
17
18#include <xeon_phi/xeon_phi.h>
19#include <xeon_phi/xeon_phi_client.h>
20
21#include <dma/xeon_phi/xeon_phi_dma.h>
22#include <dma/dma_request.h>
23#include <dma/client/dma_client_device.h>
24#include <dma/dma_manager_client.h>
25
26#include "benchmark.h"
27
28uint8_t connected = 0;
29
30static void *card_buf;
31static struct capref card_frame;
32static lpaddr_t card_base;
33static size_t card_frame_sz;
34
35static void *host_buf;
36static struct capref host_frame;
37static lpaddr_t host_base;
38static size_t host_frame_sz;
39
40static struct bench_bufs bufs;
41static struct bench_bufs bufs_rev;
42
43static struct ump_chan uc;
44static struct ump_chan uc_rev;
45
46static void *inbuf;
47static void *outbuf;
48
49static void *inbuf_rev;
50static void *outbuf_rev;
51
52static volatile uint8_t dma_completed;
53
54static void dma_done_cb(errval_t err,
55                        dma_req_id_t id,
56                        void *st)
57{
58    size_t size = (host_frame_sz < card_frame_sz ? host_frame_sz : card_frame_sz);
59    size = (size <= (1UL << 21) ? size : (1UL << 21));
60    debug_printf("DMA request %016lx executed...[%08x]-[%08x] result: %s\n", id,
61                 *((uint32_t *) host_buf), *((uint32_t *) card_buf),
62                 (memcmp(host_buf, card_buf, size) ? "FAIL" : "SUCCESS"));
63    dma_completed = 0x1;
64}
65
66static errval_t dma_test(struct dma_device *dev)
67{
68    errval_t err;
69
70    debug_printf("''''''''''''''''''''''''''''''''''''''''''''\n");
71    debug_printf("DMA TEST & Verification\n");
72    debug_printf(",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,\n");
73
74    dma_req_id_t id;
75
76    uint32_t test = 0xcafebabe;
77
78    size_t size = (host_frame_sz < card_frame_sz ? host_frame_sz : card_frame_sz);
79
80    size = (size <= (1UL << 21) ? size : (1UL << 21));
81
82    struct dma_req_setup setup = {
83        .done_cb = dma_done_cb,
84        .cb_arg = &test,
85        .args = {
86            .memcpy = {
87                .src = host_base,
88                .dst = card_base,
89                .bytes = size
90            }
91        }
92    };
93
94    memset(host_buf, 0xA5, size);
95
96    dma_completed = 0x0;
97    debug_printf("issuing first request. [%08x]-[%08x]\n", *((uint32_t *) host_buf),
98                 *((uint32_t *) card_buf));
99    err = dma_request_memcpy(dev, &setup, &id);
100    if (err_is_fail(err)) {
101        USER_PANIC_ERR(err, "could not exec the transfer");
102    }
103    debug_printf("request %016lx issued.\n", id);
104
105    while (!dma_completed) {
106        messages_wait_and_handle_next();
107    }
108
109    dma_completed = 0x0;
110
111    memset(card_buf, 0x5A, size);
112
113    setup.args.memcpy.src = card_base;
114    setup.args.memcpy.dst = host_base;
115    debug_printf("issuing second request. [%08x]-[%08x]\n", *((uint32_t *) host_buf),
116                 *((uint32_t *) card_buf));
117    err = dma_request_memcpy(dev, &setup, &id);
118    if (err_is_fail(err)) {
119        USER_PANIC_ERR(err, "could not exec the transfer");
120    }
121
122    debug_printf("request %016lx issued.\n", id);
123
124    while (!dma_completed) {
125        messages_wait_and_handle_next();
126    }
127
128    return SYS_ERR_OK;
129}
130
131static errval_t alloc_local(void)
132{
133    errval_t err;
134
135    size_t frame_size = 0;
136
137    frame_size = XPHI_BENCH_FRAME_SIZE_CARD;
138
139    if (!frame_size) {
140        frame_size = 4096;
141    }
142
143    debug_printf("Allocating a frame of size: %lx\n", frame_size);
144    size_t alloced_size = 0;
145
146    uint64_t minbase, maxlimit;
147    ram_get_affinity(&minbase, &maxlimit);
148
149    /* set the ram affinity to make sure we are in the correct numa node */
150    ram_set_affinity(XPHI_BENCH_RAM_MINBASE, XPHI_BENCH_RAM_MAXLIMIT);
151    err = frame_alloc(&host_frame, frame_size, &alloced_size);
152    assert(err_is_ok(err));
153    assert(alloced_size >= frame_size);
154    ram_set_affinity(minbase, maxlimit);
155
156    assert(err_is_ok(err));
157    assert(alloced_size >= frame_size);
158
159    struct frame_identity id;
160    err = frame_identify(host_frame, &id);
161    assert(err_is_ok(err));
162    host_base = id.base;
163    host_frame_sz = alloced_size;
164
165    err = vspace_map_one_frame(&host_buf, alloced_size, host_frame, NULL, NULL);
166
167    return err;
168}
169
170static void init_buffer_c0(void)
171{
172#ifdef XPHI_BENCH_CHAN_HOST
173    inbuf = host_buf + XPHI_BENCH_MSG_FRAME_SIZE;
174    outbuf = host_buf;
175    inbuf_rev = card_buf + XPHI_BENCH_MSG_FRAME_SIZE;;
176    outbuf_rev = card_buf;
177#endif
178
179#ifdef XPHI_BENCH_CHAN_CARD
180    inbuf = host_buf;
181    outbuf = host_buf + XPHI_BENCH_MSG_FRAME_SIZE;
182    inbuf_rev = card_buf;
183    outbuf_rev = card_buf + XPHI_BENCH_MSG_FRAME_SIZE;
184#endif
185
186#ifdef XPHI_BENCH_CHAN_DEFAULT
187    inbuf = card_buf;
188    outbuf = host_buf;
189    inbuf_rev = outbuf + XPHI_BENCH_MSG_FRAME_SIZE;
190    outbuf_rev = inbuf + XPHI_BENCH_MSG_FRAME_SIZE;
191#ifdef XPHI_BENCH_BUFFER_CARD
192    bufs.buf = card_buf + 2 * XPHI_BENCH_MSG_FRAME_SIZE;
193    bufs_rev.buf = host_buf + 2 * XPHI_BENCH_MSG_FRAME_SIZE;
194#else
195    bufs.buf = host_buf + 2 * XPHI_BENCH_MSG_FRAME_SIZE;
196    bufs_rev.buf = card_buf + 2 * XPHI_BENCH_MSG_FRAME_SIZE;
197#endif
198#endif
199}
200
201static errval_t msg_open_cb(xphi_dom_id_t domain,
202                            uint64_t usrdata,
203                            struct capref msgframe,
204                            uint8_t type)
205{
206    errval_t err;
207
208    struct frame_identity id;
209    err = frame_identify(msgframe, &id);
210    if (err_is_fail(err)) {
211        USER_PANIC_ERR(err, "could not identify the frame");
212    }
213
214    debug_printf("msg_open_cb | Frame base: %016lx, size=%lx\n", id.base,
215                 1UL << id.bits);
216
217    assert((1UL << id.bits) >= XPHI_BENCH_FRAME_SIZE_CARD);
218
219    err = vspace_map_one_frame(&card_buf, 1UL << id.bits, msgframe, NULL, NULL);
220    if (err_is_fail(err)) {
221        USER_PANIC_ERR(err, "Could not map the frame");
222    }
223
224    debug_printf("initializing ump channel\n");
225
226    card_frame = msgframe;
227
228    card_base = id.base;
229
230    card_frame_sz = (1UL << id.bits);
231
232    init_buffer_c0();
233
234    connected = 0x1;
235
236    return SYS_ERR_OK;
237}
238
239static struct xeon_phi_callbacks callbacks = {
240    .open = msg_open_cb
241};
242
243int main(int argc,
244         char **argv)
245{
246    errval_t err;
247
248    debug_printf("XEON PHI BENCH STARTED (HOST).\n");
249
250    debug_printf("Msg Buf Size = %lx, Buf Frame Size = %lx\n",
251    XPHI_BENCH_MSG_FRAME_SIZE,
252                 XPHI_BENCH_BUF_FRAME_SIZE);
253
254    xeon_phi_client_set_callbacks(&callbacks);
255
256    coreid_t core = XPHI_BENCH_CORE_CARD;
257    char *name = "k1om/sbin/xeon_phi_test";
258
259    xphi_dom_id_t domid;
260    err = xeon_phi_client_spawn(0, core, name, NULL,  NULL_CAP, 0, &domid);
261    if (err_is_fail(err)) {
262        USER_PANIC_ERR(err, "could not send the spawn message");
263    }
264
265    char iface[30];
266    snprintf(iface, 30, "xeon_phi_test.%u", XPHI_BENCH_CORE_CARD);
267
268    err = alloc_local();
269    assert(err_is_ok(err));
270
271    err = xeon_phi_client_chan_open(0, domid, 0, host_frame, 2);
272    if (err_is_fail(err)) {
273        USER_PANIC_ERR(err, "could not open channel");
274    }
275
276    while (!connected) {
277        messages_wait_and_handle_next();
278    }
279
280    debug_printf("Initializing UMP channel...\n");
281
282    err = ump_chan_init(&uc, inbuf, XPHI_BENCH_MSG_FRAME_SIZE, outbuf,
283                        XPHI_BENCH_MSG_FRAME_SIZE);
284    err = ump_chan_init(&uc_rev, inbuf_rev,  XPHI_BENCH_MSG_FRAME_SIZE,
285                        outbuf_rev,XPHI_BENCH_MSG_FRAME_SIZE);
286
287#ifdef XPHI_BENCH_PROCESS_CARD
288#ifndef XPHI_BENCH_THROUGHPUT
289    debug_printf("---------------- normal run -----------------\n");
290    xphi_bench_start_initator_rtt(&bufs, &uc);
291    debug_printf("---------------- reversed run -----------------\n");
292    xphi_bench_start_initator_rtt(&bufs_rev, &uc_rev);
293#else
294#ifdef XPHI_BENCH_SEND_SYNC
295    debug_printf("---------------- normal run -----------------\n");
296    xphi_bench_start_initator_sync(&bufs, &uc);
297    debug_printf("---------------- reversed run -----------------\n");
298    xphi_bench_start_initator_sync(&bufs-rev, &uc_rev);
299#else
300    debug_printf("---------------- normal run -----------------\n");
301    xphi_bench_start_initator_async(&bufs, &uc);
302    debug_printf("---------------- reversed run -----------------\n");
303    xphi_bench_start_initator_async(&bufs_rev, &uc_rev);
304#endif
305#endif
306#else
307#ifdef XPHI_BENCH_BUFFER_CARD
308    printf("Testing with buffer card GDDR memory\n");
309#else
310    printf("Testing with buffer own RAM\n");
311#endif
312#ifndef XPHI_BENCH_THROUGHPUT
313    debug_printf("---------------- normal run -----------------\n");
314    xphi_bench_start_echo(&bufs, &uc);
315    debug_printf("---------------- reversed run -----------------\n");
316    xphi_bench_start_echo(&bufs_rev, &uc_rev);
317#else
318    debug_printf("---------------- normal run -----------------\n");
319    xphi_bench_start_processor(&bufs, &uc);
320    debug_printf("---------------- reversed run -----------------\n");
321    xphi_bench_start_processor(&bufs_rev, &uc_rev);
322#endif
323#endif
324
325#ifdef XPHI_BENCH_PROCESS_CARD
326    err = dma_manager_wait_for_driver(DMA_DEV_TYPE_XEON_PHI, 0);
327    if (err_is_fail(err)) {
328        USER_PANIC_ERR(err, "waiting for drive");
329    }
330
331    struct dma_client_info info = {
332        .type = DMA_CLIENT_INFO_TYPE_NAME,
333        .device_type = DMA_DEV_TYPE_XEON_PHI,
334        .args = {
335            .name = XEON_PHI_DMA_SERVICE_NAME".0"
336        }
337    };
338
339    struct dma_client_device *xdev;
340    err = dma_client_device_init(&info, &xdev);
341    if (err_is_fail(err)) {
342        USER_PANIC_ERR(err, "could not initialize client device");
343    }
344
345    struct dma_device *dev = (struct dma_device *) xdev;
346
347    err = dma_register_memory((struct dma_device *) dev, card_frame);
348    if (err_is_fail(err)) {
349        USER_PANIC_ERR(err, "could not register memory");
350    }
351
352    err = dma_register_memory((struct dma_device *) dev, host_frame);
353    if (err_is_fail(err)) {
354        USER_PANIC_ERR(err, "could not register memory");
355    }
356
357    debug_printf("+++++++ Verify DMA Functionality ++++++++\n");
358    dma_test(dev);
359
360    debug_printf("+++++++ DMA / MEMCOPY Benchmark ++++++++\n");
361    debug_printf("\n");
362    debug_printf("========================================\n");
363    debug_printf("\n");
364    debug_printf("DMA-BENCH: CARD -> HOST \n");
365    debug_printf("\n");
366    debug_printf("========================================\n");
367    debug_printf("\n");
368
369    xphi_bench_memcpy((struct dma_device *) dev,
370                      host_buf + 2 * XPHI_BENCH_MSG_FRAME_SIZE,
371                      card_buf + 2 * XPHI_BENCH_MSG_FRAME_SIZE,
372                      XPHI_BENCH_BUF_FRAME_SIZE / 2,
373                      host_base + 2 * XPHI_BENCH_MSG_FRAME_SIZE,
374                      card_base + 2 * XPHI_BENCH_MSG_FRAME_SIZE);
375    debug_printf("\n");
376    debug_printf("========================================\n");
377    debug_printf("\n");
378    debug_printf("DMA-BENCH: HOST -> CARD \n");
379    debug_printf("\n");
380    debug_printf("========================================\n");
381    debug_printf("\n");
382    xphi_bench_memcpy((struct dma_device *) dev,
383                      card_buf + 2 * XPHI_BENCH_MSG_FRAME_SIZE,
384                      host_buf + 2 * XPHI_BENCH_MSG_FRAME_SIZE,
385                      XPHI_BENCH_BUF_FRAME_SIZE / 2,
386                      card_base + 2 * XPHI_BENCH_MSG_FRAME_SIZE,
387                      host_base + 2 * XPHI_BENCH_MSG_FRAME_SIZE);
388    debug_printf("\n");
389    debug_printf("========================================\n");
390    debug_printf("\n");
391    debug_printf("DMA-BENCH: HOST LOCAL \n");
392    debug_printf("\n");
393    debug_printf("========================================\n");
394    debug_printf("\n");
395    xphi_bench_memcpy(
396                    (struct dma_device *) dev,
397                    host_buf + 2 * XPHI_BENCH_MSG_FRAME_SIZE,
398                    host_buf + 2 * XPHI_BENCH_MSG_FRAME_SIZE
399                    + (XPHI_BENCH_BUF_FRAME_SIZE / 2),
400                    XPHI_BENCH_BUF_FRAME_SIZE / 2, 0, 0);
401#endif
402}
403
404