1/*
2 * Copyright (c) 2007-12 ETH Zurich.
3 * All rights reserved.
4 *
5 * This file is distributed under the terms in the attached LICENSE file.
6 * If you do not find this file, copies can be found by writing to:
7 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
8 */
9#include <stdio.h>
10#include <string.h>
11#include <stdlib.h>
12
13#include <barrelfish/barrelfish.h>
14#include <barrelfish/ump_chan.h>
15#include <bench/bench.h>
16#include <barrelfish/sys_debug.h>
17
18#include <xeon_phi/xeon_phi.h>
19#include <xeon_phi/xeon_phi_client.h>
20
21#include <dma/xeon_phi/xeon_phi_dma.h>
22#include <dma/dma_request.h>
23#include <dma/client/dma_client_device.h>
24#include <dma/dma_manager_client.h>
25
26#include <driverkit/hwmodel.h>
27#include <driverkit/iommu.h>
28
29#include <if/xomp_defs.h>
30#include <barrelfish/deferred.h>
31#include <skb/skb.h>
32
33
34#define ENABLE_NETWORKING 0
35#define RUN_OSDI_BENCHMARK 0
36
37#define HLINE debug_printf("#######################################################\n");
38#define hline debug_printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
39#define PRINTF(x...) debug_printf("[HW Models] " x)
40#define TODO(x...) debug_printf("[HW Models] TODO: " x)
41
42
43/**
44 * callback is executed when
45 */
46static errval_t msg_open_cb(xphi_dom_id_t domain,
47                            uint64_t usrdata,
48                            struct capref msgframe,
49                            uint8_t type)
50{
51    PRINTF("msg_open callback\n");
52    return SYS_ERR_OK;
53}
54
55
56static struct xeon_phi_callbacks callbacks = {
57        .open = msg_open_cb
58};
59
60
61
62
63struct xomp_binding *coprocessor = NULL;
64static bool work_is_done = false;
65
66static int32_t node_id_dma = -1;
67static int32_t node_id_offload_core = -1;
68static int32_t node_id_self = -1;
69static int32_t node_id_ram = -1;
70#if ENABLE_NETWORKING
71static int32_t node_id_network = -1;
72#endif
73
74#define OFFLOAD_PATH "k1om/sbin/hwmodel/offload"
75#define XEON_PHI_ID 0
76#define XEON_PHI_CORE 1
77//#define DATA_SIZE (1UL << 30)
78#define DATA_SIZE (1UL << 21)
79#define MSG_CHANNEL_SIZE (1UL << 20)
80#define MSG_FRAME_SIZE (2 * MSG_CHANNEL_SIZE)
81
82static void get_node_ids(void)
83{
84    PRINTF("Obtaining ");
85    node_id_self = driverkit_hwmodel_get_my_node_id();
86    PRINTF("node id self is %d\n", node_id_self);
87
88    node_id_dma = xeon_phi_client_get_node_id(XEON_PHI_ID, "dma");
89    PRINTF("node id dma is %d\n", node_id_dma);
90
91    node_id_offload_core = xeon_phi_client_get_node_id(XEON_PHI_ID,
92                                                       "core: 1");
93    PRINTF("node id offload is %d\n", node_id_offload_core);
94
95    #if ENABLE_NETWORKING
96    node_id_network = driverkit_hwmodel_lookup_node_id("e1000");
97    PRINTF("node id network is %d\n", node_id_offload_core);
98    #endif
99
100    node_id_ram = driverkit_hwmodel_lookup_dram_node_id();
101    PRINTF("node id ram is %d\n", node_id_offload_core);
102
103    if (node_id_self == -1 || node_id_offload_core == -1
104            || node_id_dma == -1 || node_id_ram == -1
105    #if ENABLE_NETWORKING
106        || node_id_network == -1
107    #endif
108    ) {
109        USER_PANIC("Failed to obtain node id\n");
110    }
111}
112
113
114
115static void notify_rx(struct xomp_binding *_binding, uint64_t arg, errval_t err)
116{
117    PRINTF("Work is done callback.\n");
118
119    work_is_done = true;
120}
121
122
123//static void connect_cb(void *st, struct xomp_binding *binding)
124static void connect_cb(void *st, errval_t err, struct xomp_binding *_binding)
125{
126    PRINTF("Client connected.\n");
127    if (err_is_fail(err)) {
128        USER_PANIC_ERR(err, "failed to accept the connection");
129    }
130    coprocessor = _binding;
131    coprocessor->rx_vtbl.done_notify = notify_rx;
132}
133
134static void message_passing_init(struct dmem *msgmem)
135{
136    errval_t err;
137
138    struct xomp_frameinfo fi = {
139        .sendbase = msgmem->devaddr,
140        .inbuf = (void *)(msgmem->vbase + MSG_CHANNEL_SIZE),
141        .inbufsize = MSG_CHANNEL_SIZE,
142        .outbuf = (void *)(msgmem->vbase),
143        .outbufsize = MSG_CHANNEL_SIZE,
144    };
145    err = xomp_accept(&fi, NULL, connect_cb, get_default_waitset(),
146                      IDC_EXPORT_FLAGS_DEFAULT);
147    if (err_is_fail(err)) {
148        USER_PANIC_ERR(err, "failed to accept the connection");
149    }
150}
151
152
153int main(int argc,  char **argv)
154{
155    errval_t err;
156
157    // initialize the xeon phi client
158    err = xeon_phi_client_init(XEON_PHI_ID);
159    if (err_is_fail(err)) {
160        USER_PANIC_ERR(err, "failed to initialize the xeon phi client");
161    }
162
163    HLINE
164    PRINTF("Offload Scenario started.\n");
165    HLINE
166
167    // set the callbacks
168    xeon_phi_client_set_callbacks(&callbacks);
169
170    // obtain the node id
171    get_node_ids();
172
173    PRINTF("Allocating memory for data processing of %zu MB\n", DATA_SIZE >> 20);
174
175    struct capref mem;
176    int32_t nodes_data[] = {
177        node_id_dma, node_id_self,
178    #if ENABLE_NETWORKING
179        node_id_network,
180    #endif
181         0};
182    err = driverkit_hwmodel_frame_alloc(&mem, DATA_SIZE, node_id_ram, nodes_data);
183    if (err_is_fail(err)) {
184        USER_PANIC_ERR(err, "Failed to allocate memory\n");
185    }
186
187    hline
188
189    PRINTF("Mapping area of memory.\n");
190    struct dmem dmem;
191    err = driverkit_hwmodel_vspace_map(node_id_self, mem, VREGION_FLAGS_READ_WRITE,
192                               &dmem);
193    if (err_is_fail(err)) {
194        USER_PANIC_ERR(err, "failed to map the memory\n");
195    }
196
197    /* TODO: allocate vspace in client */
198    uint64_t clientva = (20UL * (512UL << 30));
199
200    hline
201
202    PRINTF("Populating memory region with data\n");
203    for(size_t i = 0; i < DATA_SIZE; i += sizeof(uint64_t)) {
204        uint64_t *p = (uint64_t *)(dmem.vbase + i);
205        *p = i;
206    }
207
208    hline
209
210    PRINTF("Allocating memory for message passing of %zu kb\n", MSG_FRAME_SIZE >> 10);
211
212    struct capref msgframemem;
213    int32_t nodes_msg[] = {
214            node_id_offload_core, node_id_self, 0
215    };
216    err = driverkit_hwmodel_frame_alloc(&msgframemem, MSG_FRAME_SIZE, node_id_ram, nodes_msg);
217    if (err_is_fail(err)) {
218        USER_PANIC_ERR(err, "Failed to allocate memory\n");
219    }
220
221    struct dmem msgmem;
222    err = driverkit_hwmodel_vspace_map(node_id_self, msgframemem, VREGION_FLAGS_READ_WRITE,
223                                       &msgmem);
224    if (err_is_fail(err)) {
225        USER_PANIC_ERR(err, "failed to map the memory\n");
226    }
227
228    message_passing_init(&msgmem);
229
230    hline
231
232    PRINTF("Allocating memory on the co-processor of %zu MB\n", DATA_SIZE >> 20);
233
234    struct capref offloadmem;
235    #if 0
236
237    int32_t nodes_offload[] = {
238            node_id_offload_core, node_id_dma, 0
239    };
240    err = driverkit_hwmodel_frame_alloc(&offloadmem, DATA_SIZE, node_id_gddr,
241                                    nodes_offload);
242    #endif
243    err = xeon_phi_client_alloc_memory(XEON_PHI_ID, &offloadmem, DATA_SIZE);
244    if (err_is_fail(err)) {
245        USER_PANIC_ERR(err, "Failed to allocate memory\n");
246    }
247
248    hline
249
250    PRINTF("Prepare DMA from system RAM to co-processor GDDR\n");
251
252    uint64_t addr;
253    err = xeon_phi_client_dma_register(XEON_PHI_ID, mem, &addr);
254    if (err_is_fail(err)) {
255        USER_PANIC_ERR(err, "failed to register memory\n");
256    }
257    dmem.devaddr = addr;
258
259    err = xeon_phi_client_dma_register(XEON_PHI_ID, offloadmem, &addr);
260    if (err_is_fail(err)) {
261        USER_PANIC_ERR(err, "failed to register memory\n");
262    }
263
264    hline
265
266    PRINTF("Spawning process on co-processor\n");
267    xphi_dom_id_t  domid;
268    err = xeon_phi_client_spawn(XEON_PHI_ID, XEON_PHI_CORE, OFFLOAD_PATH, NULL,
269                                msgframemem, 0, &domid);
270    if (err_is_fail(err)) {
271        USER_PANIC_ERR(err, "failed to start the programm\n");
272    }
273
274    PRINTF("Spawned process with did: 0x%lx, waiting for connection\n", domid);
275
276    while(coprocessor == NULL) {
277        messages_wait_and_handle_next();
278    }
279
280    hline
281
282    PRINTF("Adding DMA mem\n");
283    err = xeon_phi_client_chan_open(XEON_PHI_ID, domid, clientva, offloadmem, 1);
284    if (err_is_fail(err)) {
285        USER_PANIC_ERR(err, "failed to set the channel");
286    }
287
288    hline
289
290    PRINTF("Perform DMA from system RAM to co-processor GDDR [%lx] -> [%lx]\n",
291           dmem.devaddr, addr);
292    err = xeon_phi_client_dma_memcpy(XEON_PHI_ID, addr, dmem.devaddr, DATA_SIZE);
293    if (err_is_fail(err)) {
294        USER_PANIC_ERR(err, "failed to do the dma mem cpy\n");
295    }
296
297    hline
298
299    PRINTF("Sending command to the co-processor\n");
300    TODO("SEND COMMAND\n");
301
302    err = coprocessor->tx_vtbl.do_work(coprocessor, NOP_CONT, clientva, DATA_SIZE,
303                                       0, 0);
304    if (err_is_fail(err)) {
305        USER_PANIC_ERR(err, "failed to send the message");
306    }
307
308
309    PRINTF("Wait for co-processor to finish\n");
310
311    while(!work_is_done) {
312        messages_wait_and_handle_next();
313    }
314
315    hline
316
317    PRINTF("Perform DMA from co-processor GDDR to system RAM\n");
318
319    err = xeon_phi_client_dma_memcpy(XEON_PHI_ID, dmem.devaddr, addr, DATA_SIZE);
320    if (err_is_fail(err)) {
321        USER_PANIC_ERR(err, "failed to do the dma mem cpy\n");
322    }
323
324    hline
325
326    PRINTF("Collect the data\n");
327    for(size_t i = 0; i < DATA_SIZE; i += sizeof(uint64_t)) {
328        uint64_t *p = (uint64_t *)(dmem.vbase + i);
329        if (*p != i + 1) {
330            PRINTF("CORRUPTED DATA! [%zu] %lu\n", i, *p);
331        }
332    }
333
334
335    HLINE
336    PRINTF("DONE!\n");
337    HLINE
338#if RUN_OSDI_BENCHMARK
339    char *output = NULL;
340    int output_length = 0;
341    int error_code = 0;
342
343    err = skb_execute_query("bench_real.");
344    output = strdup(skb_get_output());
345    assert(output != NULL);
346    output_length = strlen(output);
347    error_code = skb_read_error_code();
348    if ((error_code != 0) || err_is_fail(err)) {
349        PRINTF("bench_synth(): SKB returned error code %d \n", error_code);
350
351        const char *errout = skb_get_error_output();
352        PRINTF("SKB error returned: %s\n", errout);
353        PRINTF("SKB output: %s\n", output);
354        free(output);
355    } else {
356        PRINTF("======== NODE ALLOC BENCHMARK DONE ======== \n");
357    }
358
359    err = skb_execute_query("bench_synth.");
360    output = strdup(skb_get_output());
361    assert(output != NULL);
362    output_length = strlen(output);
363    error_code = skb_read_error_code();
364    if ((error_code != 0) || err_is_fail(err)) {
365        PRINTF("bench_synth(): SKB returned error code %d \n", error_code);
366
367        const char *errout = skb_get_error_output();
368        PRINTF("SKB error returned: %s\n", errout);
369        PRINTF("SKB output: %s\n", output);
370        free(output);
371    } else {
372        PRINTF("======== BENCHMARK DONE ======== \n");
373    }
374
375#endif
376    while(true) {
377        event_dispatch(get_default_waitset());
378    }
379}
380
381