1/* 2 * Copyright (c) 2014 ETH Zurich. 3 * All rights reserved. 4 * 5 * This file is distributed under the terms in the attached LICENSE file. 6 * If you do not find this file, copies can be found by writing to: 7 * ETH Zurich D-INFK, Universitaetsstrasse 6, CH-8092 Zurich. Attn: Systems Group. 8 */ 9#include <stdio.h> 10#include <string.h> 11#include <stdlib.h> 12#include <limits.h> 13 14#include <barrelfish/barrelfish.h> 15#include <barrelfish/ump_chan.h> 16#include <bench/bench.h> 17#include <barrelfish/sys_debug.h> 18#include <dma/dma.h> 19#include <dma/dma_request.h> 20#include <dma/client/dma_client_device.h> 21#include <dma/dma_manager_client.h> 22 23#include "benchmark.h" 24 25static void xphi_bench_print_settings(void) 26{ 27 printf("Core host: %u, Core card: %u\n", 28 XPHI_BENCH_CORE_HOST, 29 XPHI_BENCH_CORE_CARD); 30 printf("Buffer size = %lu bytes, processing runs %u\n", 31 XPHI_BENCH_BUF_SIZE, 32 XPHI_BENCH_PROCESS_RUNS); 33 printf("Bytes per run: %lu kB\n", 34 (XPHI_BENCH_NUM_RUNS * XPHI_BENCH_BUF_SIZE) / 1024); 35 36#ifdef XPHI_BENCH_PROCESS_CARD 37 printf("Processing Side: Card\n"); 38#else 39 printf("Processing Side: Host\n"); 40#endif 41 42#ifdef XPHI_BENCH_CHAN_CARD 43#ifdef XPHI_BENCH_BUFFER_CARD 44 printf("Memory Setup (Normal): Host [ ] Card [ UMP | UMP | BUFFERS ] \n"); 45 printf("Memory Setup (Reversed): Host [ UMP | UMP | BUFFERS ] Card [ ] \n"); 46#else 47 printf("Memory Setup (Normal): Host [ BUFFERS ] Card [ UMP | UMP ] \n"); 48 printf("Memory Setup (Reversed): Host [ UMP | UMP | BUFFERS ] Card [ ] \n"); 49#endif 50#endif 51 52#ifdef XPHI_BENCH_CHAN_HOST 53#ifdef XPHI_BENCH_BUFFER_CARD 54 printf("Memory Setup (Normal): Host [ UMP | UMP ] Card [ BUFFERS ] \n"); 55 printf("Memory Setup (Reversed): Host [ UMP | UMP | BUFFERS ] Card [ ] \n"); 56#else 57 printf("Memory Setup (Normal): Host [ BUFFERS ] Card [ UMP | UMP ] \n"); 58 printf("Memory Setup (Reversed): Host [ ] Card [ UMP | UMP | BUFFERS ] \n"); 59#endif 60#endif 61 62#ifdef XPHI_BENCH_CHAN_DEFAULT 63#ifdef XPHI_BENCH_BUFFER_CARD 64 printf("Memory Setup (Normal): Host [ UMP ] Card [ UMP | BUFFERS ] \n"); 65 printf("Memory Setup (Reversed): Host [ UMP | BUFFERS ] Card [ UMP ] \n"); 66#else 67 printf("Memory Setup (Normal): Host [ UMP | BUFFERS ] Card [ UMP ] \n"); 68 printf("Memory Setup (Reversed): Host [ UMP ] Card [ UMP | BUFFERS ] \n"); 69#endif 70 printf("UMP Channel Setup (Normal): Recv Remote, Send Local\n"); 71 printf("UMP Channel Setup (Reversed): Recv Local, Send Remote\n"); 72#endif 73} 74 75errval_t xphi_bench_memwrite(void *target) 76{ 77 return SYS_ERR_OK; 78 79 debug_printf("Executing local measurements\n"); 80 81 errval_t err; 82 83 bench_init(); 84 85 cycles_t tsc_start, tsc_end; 86 cycles_t result[4]; 87 uint64_t tscperus; 88 bench_ctl_t *ctl; 89 90 err = sys_debug_get_tsc_per_ms(&tscperus); 91 assert(err_is_ok(err)); 92 tscperus /= 1000; 93 94 debug_printf("tscperus = %lu\n", tscperus); 95 96 ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 3, XPHI_BENCH_NUM_REPS); 97 98 debug_printf("starting benchmark...\n"); 99 uint32_t rep_counter = 0; 100 do { 101 debug_printf(" > run %u of %u memwrite of %lu bytes..\n", rep_counter++, 102 XPHI_BENCH_NUM_REPS, 103 XPHI_BENCH_BUF_FRAME_SIZE); 104 105 /* using memset */ 106 tsc_start = bench_tsc(); 107 memset(target, 0, XPHI_BENCH_BUF_FRAME_SIZE); 108 tsc_end = bench_tsc(); 109 if (tsc_end < tsc_start) { 110 result[0] = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead(); 111 } else { 112 result[0] = (tsc_end - tsc_start - bench_tscoverhead()); 113 } 114 115 /* writing in a loop*/ 116 volatile uint8_t *buf = target; 117 tsc_start = bench_tsc(); 118 for (uint32_t i = 0; i < XPHI_BENCH_BUF_FRAME_SIZE; ++i) { 119 buf[i] = (uint8_t) 1; 120 } 121 tsc_end = bench_tsc(); 122 if (tsc_end < tsc_start) { 123 result[1] = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead(); 124 } else { 125 result[1] = (tsc_end - tsc_start - bench_tscoverhead()); 126 } 127 128 /* reading in a while loop */ 129 buf = target; 130 buf[XPHI_BENCH_BUF_FRAME_SIZE - 1] = 0; 131 tsc_start = bench_tsc(); 132 while (*(buf++)) 133 ; 134 135 tsc_end = bench_tsc(); 136 if (tsc_end < tsc_start) { 137 result[2] = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead(); 138 } else { 139 result[2] = (tsc_end - tsc_start - bench_tscoverhead()); 140 } 141 142 } while (!bench_ctl_add_run(ctl, result)); 143 144 // bench_ctl_dump_csv(ctl, "", tscperus); 145 bench_ctl_dump_analysis(ctl, 0, "memset()", tscperus); 146 bench_ctl_dump_analysis(ctl, 1, "forloop write", tscperus); 147 bench_ctl_dump_analysis(ctl, 2, "forloop read", tscperus); 148 return SYS_ERR_OK; 149 150 return SYS_ERR_OK; 151} 152 153static volatile uint8_t dma_done; 154 155static void dma_done_cb(errval_t err, 156 dma_req_id_t id, 157 void *st) 158{ 159 dma_req_id_t *id2 = st; 160 if (id != *id2) { 161 debug_printf("id %016lx, %016lx\n", id, *id2); 162 } 163 assert(id == *id2); XPHI_BENCH_DBG("DMA request executed...\n"); 164 dma_done = 0x1; 165} 166 167static inline cycles_t calculate_time(cycles_t tsc_start, 168 cycles_t tsc_end) 169{ 170 cycles_t result; 171 if (tsc_end < tsc_start) { 172 result = (LONG_MAX - tsc_start) + tsc_end - bench_tscoverhead(); 173 } else { 174 result = (tsc_end - tsc_start - bench_tscoverhead()); 175 } 176 return result; 177} 178 179static errval_t measure_memcpy(void *dst, 180 void *src) 181{ 182 errval_t err; 183 cycles_t tsc_start, tsc_end; 184 uint64_t tscperus; 185 bench_ctl_t *ctl; 186 187 cycles_t result; 188 189 debug_printf("--------------------------------\n"); 190 debug_printf("Measuring memcpy...\n"); 191 debug_printf("--------------------------------\n"); 192 193 bench_init(); 194 195 err = sys_debug_get_tsc_per_ms(&tscperus); 196 assert(err_is_ok(err)); 197 tscperus /= 1000; 198 199 for (int i = XPHI_BENCH_SIZE_MIN_BITS; i <= XPHI_BENCH_SIZE_MAX_BITS - 2; 200 ++i) { 201 size_t size = (1UL << i); 202 203 ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, XPHI_BENCH_NUM_REPS); 204 205 uint8_t idx = 0; 206 //debug_printf("Benchmark: Run %u, size = %lu bytes, [%016lx] -> [%016lx]\n", idx, size, src, dst); 207 do { 208 tsc_start = bench_tsc(); 209 memcpy(dst, src, size); 210 tsc_end = bench_tsc(); 211 result = calculate_time(tsc_start, tsc_end); 212 idx++; 213 } while (!bench_ctl_add_run(ctl, &result)); 214 char buf[50]; 215 216 snprintf(buf, sizeof(buf), "%u", i); 217 bench_ctl_dump_analysis(ctl, 0, buf, tscperus); 218 219 bench_ctl_destroy(ctl); 220 } 221 debug_printf("--------------------------------\n"); 222 return SYS_ERR_OK; 223} 224 225static errval_t measure_forloop(void *dst, 226 void *src) 227{ 228 errval_t err; 229 cycles_t tsc_start, tsc_end; 230 uint64_t tscperus; 231 bench_ctl_t *ctl; 232 233 cycles_t result; 234 235 debug_printf("--------------------------------\n"); 236 debug_printf("Measuring Forloop...\n"); 237 debug_printf("--------------------------------\n"); 238 239 bench_init(); 240 241 err = sys_debug_get_tsc_per_ms(&tscperus); 242 assert(err_is_ok(err)); 243 tscperus /= 1000; 244 245 for (int i = XPHI_BENCH_SIZE_MIN_BITS; i <= XPHI_BENCH_SIZE_MAX_BITS - 2; 246 ++i) { 247 size_t size = (1UL << i); 248 249 ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, XPHI_BENCH_NUM_REPS); 250 251 uint8_t idx = 0; 252 //debug_printf("Benchmark: Run %u, size = %lu bytes, [%016lx] -> [%016lx]\n", idx, size, src, dst); 253 do { 254 volatile uint64_t *bsrc = src; 255 volatile uint64_t *bdst = dst; 256 tsc_start = bench_tsc(); 257 for (uint32_t j = 0; j < size / sizeof(uint64_t); ++j) { 258 bdst[j] = bsrc[j]; 259 } 260 tsc_end = bench_tsc(); 261 result = calculate_time(tsc_start, tsc_end); 262 idx++; 263 } while (!bench_ctl_add_run(ctl, &result)); 264 char buf[50]; 265 266 snprintf(buf, sizeof(buf), "%u", i); 267 bench_ctl_dump_analysis(ctl, 0, buf, tscperus); 268 269 bench_ctl_destroy(ctl); 270 } 271 debug_printf("--------------------------------\n"); 272 return SYS_ERR_OK; 273} 274 275static errval_t measure_dma(struct dma_device *dev, 276 lpaddr_t pdst, 277 lpaddr_t psrc) 278{ 279 errval_t err; 280 cycles_t tsc_start, tsc_end; 281 uint64_t tscperus; 282 bench_ctl_t *ctl; 283 284 cycles_t result; 285 debug_printf("--------------------------------\n"); 286 debug_printf("Measuring DMA...\n"); 287 debug_printf("--------------------------------\n"); 288 // avoid host-host DMA. 289 if (psrc == 0) { 290 debug_printf("skipping host-host transfer\n"); 291 return SYS_ERR_OK; 292 } 293 294 bench_init(); 295 296 err = sys_debug_get_tsc_per_ms(&tscperus); 297 assert(err_is_ok(err)); 298 tscperus /= 1000; 299 300 for (int i = XPHI_BENCH_SIZE_MIN_BITS; i <= XPHI_BENCH_SIZE_MAX_BITS; ++i) { 301 size_t size = (1UL << i); 302 303 ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, XPHI_BENCH_NUM_REPS); 304 305 uint8_t idx = 0; 306 //debug_printf("Benchmark: Run %u, size = %lu bytes, [%016lx] -> [%016lx]\n", idx, size, src, dst); 307 do { 308 309 dma_req_id_t id; 310 311 struct dma_req_setup setup = { 312 .done_cb = dma_done_cb, 313 .cb_arg = &id, 314 .args = { 315 .memcpy = { 316 .src = psrc, 317 .dst = pdst, 318 .bytes = size 319 } 320 } 321 }; 322 323 dma_done = 0x0; 324 325 tsc_start = bench_tsc(); 326 err = dma_request_memcpy(dev, &setup, &id); 327 if (err_is_fail(err)) { 328 USER_PANIC_ERR(err, "could not exec the transfer"); 329 } 330 while (!dma_done) { 331 messages_wait_and_handle_next(); 332 } 333 tsc_end = bench_tsc(); 334 result = calculate_time(tsc_start, tsc_end); 335 idx++; 336 } while (!bench_ctl_add_run(ctl, &result)); 337 char buf[50]; 338 339 snprintf(buf, sizeof(buf), "%u", i); 340 bench_ctl_dump_analysis(ctl, 0, buf, tscperus); 341 342 bench_ctl_destroy(ctl); 343 } 344 345 debug_printf("--------------------------------\n"); 346 347 return SYS_ERR_OK; 348} 349 350errval_t xphi_bench_memcpy(struct dma_device *dev, 351 void *dst, 352 void *src, 353 size_t size, 354 lpaddr_t pdst, 355 lpaddr_t psrc) 356{ 357 errval_t err; 358 uint64_t tscperus; 359 360 bench_init(); 361 362 err = sys_debug_get_tsc_per_ms(&tscperus); 363 assert(err_is_ok(err)); 364 tscperus /= 1000; 365 366 debug_printf("Starting memcpy benchmark. tsc/us=%lu, cpysize=%lu bytes\n", 367 tscperus, (uint64_t) size); 368 369 if (0) { 370 measure_memcpy(dst, src); 371 372 measure_forloop(dst, src); 373 } 374 measure_dma(dev, pdst, psrc); 375 376 return SYS_ERR_OK; 377} 378 379void xphi_bench_start_echo(struct bench_bufs *bufs, 380 struct ump_chan *uc) 381{ 382 errval_t err; 383 384 volatile struct ump_message *msg; 385 volatile struct ump_message *msg_recv; 386 387 struct ump_control ctrl; 388 msg = ump_chan_get_next(uc, &ctrl); 389 390 // send initiator message 391 debug_printf("signal ready.\n"); 392 msg->data[0] = 123; 393 msg->header.control = ctrl; 394 395 debug_printf("xphi_bench_start_echo: receiving messages.\n"); 396#ifdef XPHI_BENCH_CHECK_STOP 397 uint64_t data = 0x0; 398 while (data != XPHI_BENCH_STOP_FLAG) { 399#else 400 while(true) { 401#endif 402 err = ump_chan_recv(uc, &msg_recv); 403 if (err_is_ok(err)) { 404 XPHI_BENCH_DBG("received ump message [%p]\n", msg_recv); 405 msg = ump_chan_get_next(uc, &ctrl); 406 msg->header.control = ctrl; 407#ifdef XPHI_BENCH_CHECK_STOP 408 data = msg_recv->data[0]; 409#endif 410 } 411 } 412 if (data == XPHI_BENCH_STOP_FLAG) { 413 debug_printf("xphi_bench_start_echo: received stop flag.\n"); 414 } 415} 416 417void xphi_bench_start_processor(struct bench_bufs *bufs, 418 struct ump_chan *uc) 419{ 420 errval_t err; 421 422 volatile struct ump_message *msg; 423 424 uint64_t buf_idx = 0; 425 426 struct ump_control ctrl; 427 msg = ump_chan_get_next(uc, &ctrl); 428 429 // send initiator message 430 debug_printf("signal ready.\n"); 431 msg->data[0] = 123; 432 msg->header.control = ctrl; 433 434 debug_printf("xphi_bench_start_processor: receiving messages.\n"); 435#ifdef XPHI_BENCH_CHECK_STOP 436 while (buf_idx != XPHI_BENCH_STOP_FLAG) { 437#else 438 while(true) { 439#endif 440 err = ump_chan_recv(uc, &msg); 441 if (err_is_ok(err)) { 442 buf_idx = msg->data[0]; 443 XPHI_BENCH_DBG("received ump message [%016lx]\n", buf_idx); 444 struct bench_buf *buf = &bufs->buf[buf_idx]; 445 xphi_bench_fill_buffer(buf, XPHI_BENCH_PROCESS_RUNS); 446 msg = ump_chan_get_next(uc, &ctrl); 447 msg->data[0] = buf_idx; 448 msg->header.control = ctrl; 449 } 450 } 451 if (buf_idx == XPHI_BENCH_STOP_FLAG) { 452 debug_printf("xphi_bench_start_processor: received stop flag\n"); 453 } 454} 455 456errval_t xphi_bench_start_initator_rtt(struct bench_bufs *bufs, 457 struct ump_chan *uc) 458{ 459 errval_t err; 460 cycles_t tsc_start, tsc_end; 461 cycles_t result; 462 uint64_t tscperus; 463 bench_ctl_t *ctl; 464 465 volatile struct ump_message *msg; 466 467 bench_init(); 468 469 err = sys_debug_get_tsc_per_ms(&tscperus); 470 assert(err_is_ok(err)); 471 tscperus /= 1000; 472 473 ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, 474 XPHI_BENCH_NUM_REPS * XPHI_BENCH_NUM_RUNS); 475 476 debug_printf("RTT benchmark: waiting for ready signal.\n"); 477 while (1) { 478 err = ump_chan_recv(uc, &msg); 479 if (err_is_ok(err)) { 480 break; 481 } 482 } 483 484 struct ump_control ctrl; 485 486 debug_printf("Starting RTT benchmark tsc/us=%lu\n", tscperus); 487 uint32_t rep_counter = 0; 488 do { 489 if (!(rep_counter++ % XPHI_BENCH_NUM_RUNS)) { 490 debug_printf(" > run %u of %u...\n", rep_counter, 491 XPHI_BENCH_NUM_REPS * XPHI_BENCH_NUM_RUNS); 492 } 493 tsc_start = bench_tsc(); 494 msg = ump_chan_get_next(uc, &ctrl); 495 msg->header.control = ctrl; 496 do { 497 err = ump_chan_recv(uc, &msg); 498 } while (err_is_fail(err)); 499 tsc_end = bench_tsc(); 500 result = calculate_time(tsc_start, tsc_end); 501 502 } while (!bench_ctl_add_run(ctl, &result)); 503 504#ifdef XPHI_BENCH_CHECK_STOP 505 msg = ump_chan_get_next(uc, &ctrl); 506 msg->data[0] = XPHI_BENCH_STOP_FLAG; 507 msg->header.control = ctrl; 508#endif 509 xphi_bench_print_settings(); 510 // bench_ctl_dump_csv(ctl, "", tscperus); 511 bench_ctl_dump_analysis(ctl, 0, "RTT", tscperus); 512 513 return SYS_ERR_OK; 514} 515 516errval_t xphi_bench_start_initator_sync(struct bench_bufs *bufs, 517 struct ump_chan *uc) 518{ 519 errval_t err; 520 521 cycles_t tsc_start, tsc_end; 522 cycles_t result; 523 uint64_t tscperus; 524 bench_ctl_t *ctl; 525 526 volatile struct ump_message *msg; 527 uint64_t buf_idx; 528 529 bench_init(); 530 531 uint32_t n_recv = 0; 532 533 err = sys_debug_get_tsc_per_ms(&tscperus); 534 assert(err_is_ok(err)); 535 tscperus /= 1000; 536 537 ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, XPHI_BENCH_NUM_REPS); 538 539 debug_printf("Sync Throughput Benchmark: waiting for ready signal...\n"); 540 while (1) { 541 err = ump_chan_recv(uc, &msg); 542 if (err_is_ok(err)) { 543 break; 544 } 545 } 546 547 struct ump_control ctrl; 548 549 debug_printf("Starting sync throughput benchmark. tsc/us=%lu\n", tscperus); 550 uint32_t rep_counter = 0; 551 do { 552 uint64_t b_idx = 0; 553 554 debug_printf(" > run %u of %u with %u moves...\n", rep_counter++, 555 XPHI_BENCH_NUM_REPS, 556 XPHI_BENCH_NUM_RUNS); 557 558 tsc_start = bench_tsc(); 559 560 msg = ump_chan_get_next(uc, &ctrl); 561 struct bench_buf *buf = &bufs->buf[b_idx]; 562 xphi_bench_fill_buffer(buf, 1); 563 564 // send initiator message 565 XPHI_BENCH_DBG("sending message [%lu]\n", b_idx); 566 msg->data[0] = b_idx; 567 msg->header.control = ctrl; 568 n_recv = 0; 569 for (uint32_t irun = 0; irun < (XPHI_BENCH_NUM_RUNS - 1); ++irun) { 570 do { 571 err = ump_chan_recv(uc, &msg); 572 } while (err_is_fail(err)); 573 574 n_recv++; 575 buf_idx = msg->data[0]; 576 uint32_t ret_count = 0; 577 buf = &bufs->buf[b_idx]; 578 xphi_bench_read_buffer(buf, 1, &ret_count); 579 XPHI_BENCH_DBG("received message [%lu]\n", buf_idx); 580 assert(buf_idx == b_idx); 581 b_idx = (b_idx + 1) & (bufs->num - 1); 582 583 buf = &bufs->buf[b_idx]; 584 xphi_bench_fill_buffer(buf, 1); 585 586 XPHI_BENCH_DBG("sending message [%lu]\n", b_idx); 587 msg = ump_chan_get_next(uc, &ctrl); 588 assert(msg); 589 msg->data[0] = b_idx; 590 msg->header.control = ctrl; 591 } 592 593 while (n_recv < XPHI_BENCH_NUM_RUNS) { 594 err = ump_chan_recv(uc, &msg); 595 if (err_is_ok(err)) { 596 buf_idx = msg->data[0]; 597 XPHI_BENCH_DBG("received message [%"PRIu64"]\n", buf_idx); 598 buf = &bufs->buf[buf_idx]; 599 uint32_t ret_count = 0; 600 xphi_bench_read_buffer(buf, 1, &ret_count); 601 n_recv++; 602 } 603 } 604 tsc_end = bench_tsc(); 605 result = calculate_time(tsc_start, tsc_end); 606 } while (!bench_ctl_add_run(ctl, &result)); 607 608#ifdef XPHI_BENCH_CHECK_STOP 609 msg = ump_chan_get_next(uc, &ctrl); 610 msg->data[0] = XPHI_BENCH_STOP_FLAG; 611 msg->header.control = ctrl; 612#endif 613 614 double avg_s = bench_avg(ctl->data, ctl->result_count) / tscperus; 615 avg_s /= 1000000; 616 xphi_bench_print_settings(); 617// bench_ctl_dump_csv(ctl, "", tscperus); 618 bench_ctl_dump_analysis(ctl, 0, "Sync Throughput", tscperus); 619 printf("Average seconds: %f\n", avg_s); 620 printf("Average throughput: %f GByte/s\n", 621 (((double) (XPHI_BENCH_NUM_RUNS * XPHI_BENCH_BUF_SIZE)) / 1024 / 1024 622 / 1024) 623 / (avg_s)); 624 printf("Average throughput (with processing): %f GByte/s\n", 625 (XPHI_BENCH_PROCESS_RUNS * ((double) (XPHI_BENCH_NUM_RUNS 626 * XPHI_BENCH_BUF_SIZE)) 627 / 1024 / 1024 / 1024) 628 / (avg_s)); 629 630 return SYS_ERR_OK; 631} 632 633errval_t xphi_bench_start_initator_async(struct bench_bufs *bufs, 634 struct ump_chan *uc) 635{ 636 volatile struct ump_message *msg; 637 uint64_t buf_idx; 638 uint32_t in_transit = 0; 639 640 errval_t err; 641 642 bench_init(); 643 644 cycles_t tsc_start; 645 cycles_t result; 646 uint64_t tscperus; 647 bench_ctl_t *ctl; 648 649 err = sys_debug_get_tsc_per_ms(&tscperus); 650 assert(err_is_ok(err)); 651 tscperus /= 1000; 652 653 debug_printf("tscperus = %lu\n", tscperus); 654 655 ctl = bench_ctl_init(BENCH_MODE_FIXEDRUNS, 1, XPHI_BENCH_NUM_REPS); 656 657 debug_printf("waiting for ready signal\n"); 658 while (1) { 659 err = ump_chan_recv(uc, &msg); 660 if (err_is_ok(err)) { 661 break; 662 } 663 } 664 665 debug_printf("starting benchmark ASYNC...\n"); 666 667 struct ump_control ctrl; 668 669 uint32_t rep_counter = 0; 670 do { 671 uint64_t b_idx = 0; 672 debug_printf(" > run %u of %u with %u moves...\n", rep_counter++, 673 XPHI_BENCH_NUM_REPS, 674 XPHI_BENCH_NUM_RUNS); 675 tsc_start = bench_tsc(); 676 677 uint32_t irun = 0; 678 uint32_t n_recv = 0; 679 struct bench_buf *buf; 680 while (irun < XPHI_BENCH_NUM_RUNS) { 681 if (in_transit < XPHI_BENCH_MSG_NUM) { 682 msg = ump_chan_get_next(uc, &ctrl); 683 if (!msg) { 684 continue; 685 } 686 buf = &bufs->buf[b_idx]; 687 xphi_bench_fill_buffer(buf, 1); 688 XPHI_BENCH_DBG("sending message [%lu] %p\n", b_idx, msg); 689 msg->data[0] = b_idx; 690 msg->header.control = ctrl; 691 irun++; 692 in_transit++; 693 b_idx = (b_idx + 1) & (bufs->num - 1); 694 } 695 696 err = ump_chan_recv(uc, &msg); 697 if (err_is_ok(err)) { 698 buf_idx = msg->data[0]; 699 XPHI_BENCH_DBG("receiving message [%"PRIu64"]\n", buf_idx); 700 buf = &bufs->buf[buf_idx]; 701 uint32_t ret_count = 0; 702 xphi_bench_read_buffer(buf, 1, &ret_count); 703 in_transit--; 704 n_recv++; 705 } 706 } 707 708 while (n_recv < XPHI_BENCH_NUM_RUNS) { 709 err = ump_chan_recv(uc, &msg); 710 if (err_is_ok(err)) { 711 buf_idx = msg->data[0]; 712 buf = &bufs->buf[buf_idx]; 713 uint32_t ret_count = 0; 714 XPHI_BENCH_DBG("receiving message [%lu]\n", buf_idx); 715 xphi_bench_read_buffer(buf, 1, &ret_count); 716 in_transit--; 717 n_recv++; 718 } 719 } 720 721 result = bench_tsc(); 722 if (result - tsc_start > bench_tscoverhead()) { 723 debug_printf("%lu %lu", result - tsc_start, bench_tscoverhead()); 724 } 725 if (result < tsc_start) { 726 result = (LONG_MAX - tsc_start) + result - bench_tscoverhead(); 727 } else { 728 result = (result - tsc_start - bench_tscoverhead()); 729 } 730 731 assert(in_transit == 0); 732 } while (!bench_ctl_add_run(ctl, &result)); 733 734#ifdef XPHI_BENCH_CHECK_STOP 735 msg = ump_chan_get_next(uc, &ctrl); 736 msg->data[0] = XPHI_BENCH_STOP_FLAG; 737 msg->header.control = ctrl; 738#endif 739 740 double avg_s = bench_avg(ctl->data, ctl->result_count) / tscperus; 741 avg_s /= 1000000; 742 xphi_bench_print_settings(); 743// bench_ctl_dump_csv(ctl, "", tscperus); 744 bench_ctl_dump_analysis(ctl, 0, "ASync Throughput", tscperus); 745 printf("Average seconds: %f\n", avg_s); 746 printf("Average throughput: %f GByte/s\n", 747 (((double) (XPHI_BENCH_NUM_RUNS * XPHI_BENCH_BUF_SIZE)) / 1024 / 1024 748 / 1024) 749 / (avg_s)); 750 printf("Average throughput (with processing): %f GByte/s\n", 751 (XPHI_BENCH_PROCESS_RUNS * ((double) (XPHI_BENCH_NUM_RUNS 752 * XPHI_BENCH_BUF_SIZE)) 753 / 1024 / 1024 / 1024) 754 / (avg_s)); 755 756 return SYS_ERR_OK; 757} 758 759