1/* 2 * Copyright (c) 2007-12 ETH Zurich. 3 * All rights reserved. 4 * 5 * This file is distributed under the terms in the attached LICENSE file. 6 * If you do not find this file, copies can be found by writing to: 7 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group. 8 */ 9#include <stdio.h> 10#include <string.h> 11#include <stdlib.h> 12 13#include <barrelfish/barrelfish.h> 14#include <barrelfish/ump_chan.h> 15#include <bench/bench.h> 16#include <barrelfish/sys_debug.h> 17 18#include <xeon_phi/xeon_phi.h> 19#include <xeon_phi/xeon_phi_client.h> 20 21#include <dma/xeon_phi/xeon_phi_dma.h> 22#include <dma/dma_request.h> 23#include <dma/client/dma_client_device.h> 24#include <dma/dma_manager_client.h> 25 26#include <driverkit/hwmodel.h> 27#include <driverkit/iommu.h> 28 29#include <if/xomp_defs.h> 30#include <barrelfish/deferred.h> 31#include <skb/skb.h> 32 33 34#define ENABLE_NETWORKING 0 35#define RUN_OSDI_BENCHMARK 0 36 37#define HLINE debug_printf("#######################################################\n"); 38#define hline debug_printf("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n"); 39#define PRINTF(x...) debug_printf("[HW Models] " x) 40#define TODO(x...) debug_printf("[HW Models] TODO: " x) 41 42 43/** 44 * callback is executed when 45 */ 46static errval_t msg_open_cb(xphi_dom_id_t domain, 47 uint64_t usrdata, 48 struct capref msgframe, 49 uint8_t type) 50{ 51 PRINTF("msg_open callback\n"); 52 return SYS_ERR_OK; 53} 54 55 56static struct xeon_phi_callbacks callbacks = { 57 .open = msg_open_cb 58}; 59 60 61 62 63struct xomp_binding *coprocessor = NULL; 64static bool work_is_done = false; 65 66static int32_t node_id_dma = -1; 67static int32_t node_id_offload_core = -1; 68static int32_t node_id_self = -1; 69static int32_t node_id_ram = -1; 70#if ENABLE_NETWORKING 71static int32_t node_id_network = -1; 72#endif 73 74#define OFFLOAD_PATH "k1om/sbin/hwmodel/offload" 75#define XEON_PHI_ID 0 76#define XEON_PHI_CORE 1 77//#define DATA_SIZE (1UL << 30) 78#define DATA_SIZE (1UL << 21) 79#define MSG_CHANNEL_SIZE (1UL << 20) 80#define MSG_FRAME_SIZE (2 * MSG_CHANNEL_SIZE) 81 82static void get_node_ids(void) 83{ 84 PRINTF("Obtaining "); 85 node_id_self = driverkit_hwmodel_get_my_node_id(); 86 PRINTF("node id self is %d\n", node_id_self); 87 88 node_id_dma = xeon_phi_client_get_node_id(XEON_PHI_ID, "dma"); 89 PRINTF("node id dma is %d\n", node_id_dma); 90 91 node_id_offload_core = xeon_phi_client_get_node_id(XEON_PHI_ID, 92 "core: 1"); 93 PRINTF("node id offload is %d\n", node_id_offload_core); 94 95 #if ENABLE_NETWORKING 96 node_id_network = driverkit_hwmodel_lookup_node_id("e1000"); 97 PRINTF("node id network is %d\n", node_id_offload_core); 98 #endif 99 100 node_id_ram = driverkit_hwmodel_lookup_dram_node_id(); 101 PRINTF("node id ram is %d\n", node_id_offload_core); 102 103 if (node_id_self == -1 || node_id_offload_core == -1 104 || node_id_dma == -1 || node_id_ram == -1 105 #if ENABLE_NETWORKING 106 || node_id_network == -1 107 #endif 108 ) { 109 USER_PANIC("Failed to obtain node id\n"); 110 } 111} 112 113 114 115static void notify_rx(struct xomp_binding *_binding, uint64_t arg, errval_t err) 116{ 117 PRINTF("Work is done callback.\n"); 118 119 work_is_done = true; 120} 121 122 123//static void connect_cb(void *st, struct xomp_binding *binding) 124static void connect_cb(void *st, errval_t err, struct xomp_binding *_binding) 125{ 126 PRINTF("Client connected.\n"); 127 if (err_is_fail(err)) { 128 USER_PANIC_ERR(err, "failed to accept the connection"); 129 } 130 coprocessor = _binding; 131 coprocessor->rx_vtbl.done_notify = notify_rx; 132} 133 134static void message_passing_init(struct dmem *msgmem) 135{ 136 errval_t err; 137 138 struct xomp_frameinfo fi = { 139 .sendbase = msgmem->devaddr, 140 .inbuf = (void *)(msgmem->vbase + MSG_CHANNEL_SIZE), 141 .inbufsize = MSG_CHANNEL_SIZE, 142 .outbuf = (void *)(msgmem->vbase), 143 .outbufsize = MSG_CHANNEL_SIZE, 144 }; 145 err = xomp_accept(&fi, NULL, connect_cb, get_default_waitset(), 146 IDC_EXPORT_FLAGS_DEFAULT); 147 if (err_is_fail(err)) { 148 USER_PANIC_ERR(err, "failed to accept the connection"); 149 } 150} 151 152 153int main(int argc, char **argv) 154{ 155 errval_t err; 156 157 // initialize the xeon phi client 158 err = xeon_phi_client_init(XEON_PHI_ID); 159 if (err_is_fail(err)) { 160 USER_PANIC_ERR(err, "failed to initialize the xeon phi client"); 161 } 162 163 HLINE 164 PRINTF("Offload Scenario started.\n"); 165 HLINE 166 167 // set the callbacks 168 xeon_phi_client_set_callbacks(&callbacks); 169 170 // obtain the node id 171 get_node_ids(); 172 173 PRINTF("Allocating memory for data processing of %zu MB\n", DATA_SIZE >> 20); 174 175 struct capref mem; 176 int32_t nodes_data[] = { 177 node_id_dma, node_id_self, 178 #if ENABLE_NETWORKING 179 node_id_network, 180 #endif 181 0}; 182 err = driverkit_hwmodel_frame_alloc(&mem, DATA_SIZE, node_id_ram, nodes_data); 183 if (err_is_fail(err)) { 184 USER_PANIC_ERR(err, "Failed to allocate memory\n"); 185 } 186 187 hline 188 189 PRINTF("Mapping area of memory.\n"); 190 struct dmem dmem; 191 err = driverkit_hwmodel_vspace_map(node_id_self, mem, VREGION_FLAGS_READ_WRITE, 192 &dmem); 193 if (err_is_fail(err)) { 194 USER_PANIC_ERR(err, "failed to map the memory\n"); 195 } 196 197 /* TODO: allocate vspace in client */ 198 uint64_t clientva = (20UL * (512UL << 30)); 199 200 hline 201 202 PRINTF("Populating memory region with data\n"); 203 for(size_t i = 0; i < DATA_SIZE; i += sizeof(uint64_t)) { 204 uint64_t *p = (uint64_t *)(dmem.vbase + i); 205 *p = i; 206 } 207 208 hline 209 210 PRINTF("Allocating memory for message passing of %zu kb\n", MSG_FRAME_SIZE >> 10); 211 212 struct capref msgframemem; 213 int32_t nodes_msg[] = { 214 node_id_offload_core, node_id_self, 0 215 }; 216 err = driverkit_hwmodel_frame_alloc(&msgframemem, MSG_FRAME_SIZE, node_id_ram, nodes_msg); 217 if (err_is_fail(err)) { 218 USER_PANIC_ERR(err, "Failed to allocate memory\n"); 219 } 220 221 struct dmem msgmem; 222 err = driverkit_hwmodel_vspace_map(node_id_self, msgframemem, VREGION_FLAGS_READ_WRITE, 223 &msgmem); 224 if (err_is_fail(err)) { 225 USER_PANIC_ERR(err, "failed to map the memory\n"); 226 } 227 228 message_passing_init(&msgmem); 229 230 hline 231 232 PRINTF("Allocating memory on the co-processor of %zu MB\n", DATA_SIZE >> 20); 233 234 struct capref offloadmem; 235 #if 0 236 237 int32_t nodes_offload[] = { 238 node_id_offload_core, node_id_dma, 0 239 }; 240 err = driverkit_hwmodel_frame_alloc(&offloadmem, DATA_SIZE, node_id_gddr, 241 nodes_offload); 242 #endif 243 err = xeon_phi_client_alloc_memory(XEON_PHI_ID, &offloadmem, DATA_SIZE); 244 if (err_is_fail(err)) { 245 USER_PANIC_ERR(err, "Failed to allocate memory\n"); 246 } 247 248 hline 249 250 PRINTF("Prepare DMA from system RAM to co-processor GDDR\n"); 251 252 uint64_t addr; 253 err = xeon_phi_client_dma_register(XEON_PHI_ID, mem, &addr); 254 if (err_is_fail(err)) { 255 USER_PANIC_ERR(err, "failed to register memory\n"); 256 } 257 dmem.devaddr = addr; 258 259 err = xeon_phi_client_dma_register(XEON_PHI_ID, offloadmem, &addr); 260 if (err_is_fail(err)) { 261 USER_PANIC_ERR(err, "failed to register memory\n"); 262 } 263 264 hline 265 266 PRINTF("Spawning process on co-processor\n"); 267 xphi_dom_id_t domid; 268 err = xeon_phi_client_spawn(XEON_PHI_ID, XEON_PHI_CORE, OFFLOAD_PATH, NULL, 269 msgframemem, 0, &domid); 270 if (err_is_fail(err)) { 271 USER_PANIC_ERR(err, "failed to start the programm\n"); 272 } 273 274 PRINTF("Spawned process with did: 0x%lx, waiting for connection\n", domid); 275 276 while(coprocessor == NULL) { 277 messages_wait_and_handle_next(); 278 } 279 280 hline 281 282 PRINTF("Adding DMA mem\n"); 283 err = xeon_phi_client_chan_open(XEON_PHI_ID, domid, clientva, offloadmem, 1); 284 if (err_is_fail(err)) { 285 USER_PANIC_ERR(err, "failed to set the channel"); 286 } 287 288 hline 289 290 PRINTF("Perform DMA from system RAM to co-processor GDDR [%lx] -> [%lx]\n", 291 dmem.devaddr, addr); 292 err = xeon_phi_client_dma_memcpy(XEON_PHI_ID, addr, dmem.devaddr, DATA_SIZE); 293 if (err_is_fail(err)) { 294 USER_PANIC_ERR(err, "failed to do the dma mem cpy\n"); 295 } 296 297 hline 298 299 PRINTF("Sending command to the co-processor\n"); 300 TODO("SEND COMMAND\n"); 301 302 err = coprocessor->tx_vtbl.do_work(coprocessor, NOP_CONT, clientva, DATA_SIZE, 303 0, 0); 304 if (err_is_fail(err)) { 305 USER_PANIC_ERR(err, "failed to send the message"); 306 } 307 308 309 PRINTF("Wait for co-processor to finish\n"); 310 311 while(!work_is_done) { 312 messages_wait_and_handle_next(); 313 } 314 315 hline 316 317 PRINTF("Perform DMA from co-processor GDDR to system RAM\n"); 318 319 err = xeon_phi_client_dma_memcpy(XEON_PHI_ID, dmem.devaddr, addr, DATA_SIZE); 320 if (err_is_fail(err)) { 321 USER_PANIC_ERR(err, "failed to do the dma mem cpy\n"); 322 } 323 324 hline 325 326 PRINTF("Collect the data\n"); 327 for(size_t i = 0; i < DATA_SIZE; i += sizeof(uint64_t)) { 328 uint64_t *p = (uint64_t *)(dmem.vbase + i); 329 if (*p != i + 1) { 330 PRINTF("CORRUPTED DATA! [%zu] %lu\n", i, *p); 331 } 332 } 333 334 335 HLINE 336 PRINTF("DONE!\n"); 337 HLINE 338#if RUN_OSDI_BENCHMARK 339 char *output = NULL; 340 int output_length = 0; 341 int error_code = 0; 342 343 err = skb_execute_query("bench_real."); 344 output = strdup(skb_get_output()); 345 assert(output != NULL); 346 output_length = strlen(output); 347 error_code = skb_read_error_code(); 348 if ((error_code != 0) || err_is_fail(err)) { 349 PRINTF("bench_synth(): SKB returned error code %d \n", error_code); 350 351 const char *errout = skb_get_error_output(); 352 PRINTF("SKB error returned: %s\n", errout); 353 PRINTF("SKB output: %s\n", output); 354 free(output); 355 } else { 356 PRINTF("======== NODE ALLOC BENCHMARK DONE ======== \n"); 357 } 358 359 err = skb_execute_query("bench_synth."); 360 output = strdup(skb_get_output()); 361 assert(output != NULL); 362 output_length = strlen(output); 363 error_code = skb_read_error_code(); 364 if ((error_code != 0) || err_is_fail(err)) { 365 PRINTF("bench_synth(): SKB returned error code %d \n", error_code); 366 367 const char *errout = skb_get_error_output(); 368 PRINTF("SKB error returned: %s\n", errout); 369 PRINTF("SKB output: %s\n", output); 370 free(output); 371 } else { 372 PRINTF("======== BENCHMARK DONE ======== \n"); 373 } 374 375#endif 376 while(true) { 377 event_dispatch(get_default_waitset()); 378 } 379} 380 381