1/* 2 Copyright (c) 2014 Intel Corporation. All Rights Reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 8 * Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 * Redistributions in binary form must reproduce the above copyright 11 notice, this list of conditions and the following disclaimer in the 12 documentation and/or other materials provided with the distribution. 13 * Neither the name of Intel Corporation nor the names of its 14 contributors may be used to endorse or promote products derived 15 from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28*/ 29 30 31// Forward declaration as the following 2 functions are declared as friend in offload_engine.h 32// CLANG does not like static to been after friend declaration. 33static void __offload_init_library_once(void); 34static void __offload_fini_library(void); 35 36#include "offload_host.h" 37#ifdef MYO_SUPPORT 38#include "offload_myo_host.h" 39#endif 40 41#include <malloc.h> 42#ifndef TARGET_WINNT 43#include <alloca.h> 44#include <elf.h> 45#endif // TARGET_WINNT 46#include <errno.h> 47#include <fcntl.h> 48#include <stdlib.h> 49#include <string.h> 50#include <sys/stat.h> 51#include <sys/types.h> 52#include <sys/stat.h> 53 54#include <algorithm> 55#include <bitset> 56 57#if defined(HOST_WINNT) 58#define PATH_SEPARATOR ";" 59#else 60#define PATH_SEPARATOR ":" 61#endif 62 63#define GET_OFFLOAD_NUMBER(timer_data) \ 64 timer_data? timer_data->offload_number : 0 65 66#ifdef TARGET_WINNT 67// Small subset of ELF declarations for Windows which is needed to compile 68// this file. ELF header is used to understand what binary type is contained 69// in the target image - shared library or executable. 70 71typedef uint16_t Elf64_Half; 72typedef uint32_t Elf64_Word; 73typedef uint64_t Elf64_Addr; 74typedef uint64_t Elf64_Off; 75 76#define EI_NIDENT 16 77 78#define ET_EXEC 2 79#define ET_DYN 3 80 81typedef struct 82{ 83 unsigned char e_ident[EI_NIDENT]; 84 Elf64_Half e_type; 85 Elf64_Half e_machine; 86 Elf64_Word e_version; 87 Elf64_Addr e_entry; 88 Elf64_Off e_phoff; 89 Elf64_Off e_shoff; 90 Elf64_Word e_flags; 91 Elf64_Half e_ehsize; 92 Elf64_Half e_phentsize; 93 Elf64_Half e_phnum; 94 Elf64_Half e_shentsize; 95 Elf64_Half e_shnum; 96 Elf64_Half e_shstrndx; 97} Elf64_Ehdr; 98#endif // TARGET_WINNT 99 100// Host console and file logging 101const char *prefix; 102int console_enabled = 0; 103int offload_number = 0; 104 105static const char *htrace_envname = "H_TRACE"; 106static const char *offload_report_envname = "OFFLOAD_REPORT"; 107static char *timer_envname = "H_TIME"; 108 109// Trace information 110static const char* vardesc_direction_as_string[] = { 111 "NOCOPY", 112 "IN", 113 "OUT", 114 "INOUT" 115}; 116static const char* vardesc_type_as_string[] = { 117 "unknown", 118 "data", 119 "data_ptr", 120 "func_ptr", 121 "void_ptr", 122 "string_ptr", 123 "dv", 124 "dv_data", 125 "dv_data_slice", 126 "dv_ptr", 127 "dv_ptr_data", 128 "dv_ptr_data_slice", 129 "cean_var", 130 "cean_var_ptr", 131 "c_data_ptr_array", 132 "c_func_ptr_array", 133 "c_void_ptr_array", 134 "c_string_ptr_array" 135}; 136 137Engine* mic_engines = 0; 138uint32_t mic_engines_total = 0; 139pthread_key_t mic_thread_key; 140MicEnvVar mic_env_vars; 141uint64_t cpu_frequency = 0; 142 143// MIC_STACKSIZE 144uint32_t mic_stack_size = 12 * 1024 * 1024; 145 146// MIC_BUFFERSIZE 147uint64_t mic_buffer_size = 0; 148 149// MIC_LD_LIBRARY_PATH 150char* mic_library_path = 0; 151 152// MIC_PROXY_IO 153bool mic_proxy_io = true; 154 155// MIC_PROXY_FS_ROOT 156char* mic_proxy_fs_root = 0; 157 158// Threshold for creating buffers with large pages. Buffer is created 159// with large pages hint if its size exceeds the threshold value. 160// By default large pages are disabled right now (by setting default 161// value for threshold to MAX) due to HSD 4114629. 162uint64_t __offload_use_2mb_buffers = 0xffffffffffffffffULL; 163static const char *mic_use_2mb_buffers_envname = 164 "MIC_USE_2MB_BUFFERS"; 165 166static uint64_t __offload_use_async_buffer_write = 2 * 1024 * 1024; 167static const char *mic_use_async_buffer_write_envname = 168 "MIC_USE_ASYNC_BUFFER_WRITE"; 169 170static uint64_t __offload_use_async_buffer_read = 2 * 1024 * 1024; 171static const char *mic_use_async_buffer_read_envname = 172 "MIC_USE_ASYNC_BUFFER_READ"; 173 174// device initialization type 175OffloadInitType __offload_init_type = c_init_on_offload_all; 176static const char *offload_init_envname = "OFFLOAD_INIT"; 177 178// active wait 179static bool __offload_active_wait = true; 180static const char *offload_active_wait_envname = "OFFLOAD_ACTIVE_WAIT"; 181 182// OMP_DEFAULT_DEVICE 183int __omp_device_num = 0; 184static const char *omp_device_num_envname = "OMP_DEFAULT_DEVICE"; 185 186// The list of pending target libraries 187static bool __target_libs; 188static TargetImageList __target_libs_list; 189static mutex_t __target_libs_lock; 190static mutex_t stack_alloc_lock; 191 192// Target executable 193TargetImage* __target_exe; 194 195static char * offload_get_src_base(void * ptr, uint8_t type) 196{ 197 char *base; 198 if (VAR_TYPE_IS_PTR(type)) { 199 base = *static_cast<char**>(ptr); 200 } 201 else if (VAR_TYPE_IS_SCALAR(type)) { 202 base = static_cast<char*>(ptr); 203 } 204 else if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) { 205 ArrDesc *dvp; 206 if (VAR_TYPE_IS_DV_DATA_SLICE(type)) { 207 const arr_desc *ap = static_cast<const arr_desc*>(ptr); 208 dvp = (type == c_dv_data_slice) ? 209 reinterpret_cast<ArrDesc*>(ap->base) : 210 *reinterpret_cast<ArrDesc**>(ap->base); 211 } 212 else { 213 dvp = (type == c_dv_data) ? 214 static_cast<ArrDesc*>(ptr) : 215 *static_cast<ArrDesc**>(ptr); 216 } 217 base = reinterpret_cast<char*>(dvp->Base); 218 } 219 else { 220 base = NULL; 221 } 222 return base; 223} 224 225void OffloadDescriptor::report_coi_error(error_types msg, COIRESULT res) 226{ 227 // special case for the 'process died' error 228 if (res == COI_PROCESS_DIED) { 229 m_device.fini_process(true); 230 } 231 else { 232 switch (msg) { 233 case c_buf_create: 234 if (res == COI_OUT_OF_MEMORY) { 235 msg = c_buf_create_out_of_mem; 236 } 237 /* fallthru */ 238 239 case c_buf_create_from_mem: 240 case c_buf_get_address: 241 case c_pipeline_create: 242 case c_pipeline_run_func: 243 LIBOFFLOAD_ERROR(msg, m_device.get_logical_index(), res); 244 break; 245 246 case c_buf_read: 247 case c_buf_write: 248 case c_buf_copy: 249 case c_buf_map: 250 case c_buf_unmap: 251 case c_buf_destroy: 252 case c_buf_set_state: 253 LIBOFFLOAD_ERROR(msg, res); 254 break; 255 256 default: 257 break; 258 } 259 } 260 261 exit(1); 262} 263 264_Offload_result OffloadDescriptor::translate_coi_error(COIRESULT res) const 265{ 266 switch (res) { 267 case COI_SUCCESS: 268 return OFFLOAD_SUCCESS; 269 270 case COI_PROCESS_DIED: 271 return OFFLOAD_PROCESS_DIED; 272 273 case COI_OUT_OF_MEMORY: 274 return OFFLOAD_OUT_OF_MEMORY; 275 276 default: 277 return OFFLOAD_ERROR; 278 } 279} 280 281bool OffloadDescriptor::alloc_ptr_data( 282 PtrData* &ptr_data, 283 void *base, 284 int64_t disp, 285 int64_t size, 286 int64_t alloc_disp, 287 int align 288) 289{ 290 // total length of base 291 int64_t length = disp + size; 292 bool is_new; 293 294 OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n", 295 base, length); 296 297 // add new entry 298 ptr_data = m_device.insert_ptr_data(base, length, is_new); 299 if (is_new) { 300 301 OFFLOAD_TRACE(3, "Added new association\n"); 302 303 if (length > 0) { 304 OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers); 305 COIRESULT res; 306 307 // align should be a power of 2 308 if (align > 0 && (align & (align - 1)) == 0) { 309 // offset within mic_buffer. Can do offset optimization 310 // only when source address alignment satisfies requested 311 // alignment on the target (cq172736). 312 if ((reinterpret_cast<intptr_t>(base) & (align - 1)) == 0) { 313 ptr_data->mic_offset = reinterpret_cast<intptr_t>(base) & 4095; 314 } 315 } 316 317 // buffer size and flags 318 uint64_t buffer_size = length + ptr_data->mic_offset; 319 uint32_t buffer_flags = 0; 320 321 // create buffer with large pages if data length exceeds 322 // large page threshold 323 if (length >= __offload_use_2mb_buffers) { 324 buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE; 325 } 326 327 // create CPU buffer 328 OFFLOAD_DEBUG_TRACE_1(3, 329 GET_OFFLOAD_NUMBER(get_timer_data()), 330 c_offload_create_buf_host, 331 "Creating buffer from source memory %p, " 332 "length %lld\n", base, length); 333 334 // result is not checked because we can continue without cpu 335 // buffer. In this case we will use COIBufferRead/Write instead 336 // of COIBufferCopy. 337 COI::BufferCreateFromMemory(length, 338 COI_BUFFER_NORMAL, 339 0, 340 base, 341 1, 342 &m_device.get_process(), 343 &ptr_data->cpu_buf); 344 345 OFFLOAD_DEBUG_TRACE_1(3, 346 GET_OFFLOAD_NUMBER(get_timer_data()), 347 c_offload_create_buf_mic, 348 "Creating buffer for sink: size %lld, offset %d, " 349 "flags =0x%x\n", buffer_size - alloc_disp, 350 ptr_data->mic_offset, buffer_flags); 351 352 // create MIC buffer 353 res = COI::BufferCreate(buffer_size - alloc_disp, 354 COI_BUFFER_NORMAL, 355 buffer_flags, 356 0, 357 1, 358 &m_device.get_process(), 359 &ptr_data->mic_buf); 360 if (res != COI_SUCCESS) { 361 if (m_status != 0) { 362 m_status->result = translate_coi_error(res); 363 } 364 else if (m_is_mandatory) { 365 report_coi_error(c_buf_create, res); 366 } 367 ptr_data->alloc_ptr_data_lock.unlock(); 368 return false; 369 } 370 371 // make buffer valid on the device. 372 res = COI::BufferSetState(ptr_data->mic_buf, 373 m_device.get_process(), 374 COI_BUFFER_VALID, 375 COI_BUFFER_NO_MOVE, 376 0, 0, 0); 377 if (res != COI_SUCCESS) { 378 if (m_status != 0) { 379 m_status->result = translate_coi_error(res); 380 } 381 else if (m_is_mandatory) { 382 report_coi_error(c_buf_set_state, res); 383 } 384 ptr_data->alloc_ptr_data_lock.unlock(); 385 return false; 386 } 387 388 res = COI::BufferSetState(ptr_data->mic_buf, 389 COI_PROCESS_SOURCE, 390 COI_BUFFER_INVALID, 391 COI_BUFFER_NO_MOVE, 392 0, 0, 0); 393 if (res != COI_SUCCESS) { 394 if (m_status != 0) { 395 m_status->result = translate_coi_error(res); 396 } 397 else if (m_is_mandatory) { 398 report_coi_error(c_buf_set_state, res); 399 } 400 ptr_data->alloc_ptr_data_lock.unlock(); 401 return false; 402 } 403 } 404 405 ptr_data->alloc_disp = alloc_disp; 406 ptr_data->alloc_ptr_data_lock.unlock(); 407 } 408 else { 409 mutex_locker_t locker(ptr_data->alloc_ptr_data_lock); 410 411 OFFLOAD_TRACE(3, "Found existing association: addr %p, length %lld, " 412 "is_static %d\n", 413 ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(), 414 ptr_data->is_static); 415 416 // This is not a new entry. Make sure that provided address range fits 417 // into existing one. 418 MemRange addr_range(base, length - ptr_data->alloc_disp); 419 if (!ptr_data->cpu_addr.contains(addr_range)) { 420 LIBOFFLOAD_ERROR(c_bad_ptr_mem_range); 421 exit(1); 422 } 423 424 // if the entry is associated with static data it may not have buffers 425 // created because they are created on demand. 426 if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) { 427 return false; 428 } 429 } 430 431 return true; 432} 433 434bool OffloadDescriptor::find_ptr_data( 435 PtrData* &ptr_data, 436 void *base, 437 int64_t disp, 438 int64_t size, 439 bool report_error 440) 441{ 442 // total length of base 443 int64_t length = disp + size; 444 445 OFFLOAD_TRACE(3, "Looking for association for data: addr %p, " 446 "length %lld\n", base, length); 447 448 // find existing association in pointer table 449 ptr_data = m_device.find_ptr_data(base); 450 if (ptr_data == 0) { 451 if (report_error) { 452 LIBOFFLOAD_ERROR(c_no_ptr_data, base); 453 exit(1); 454 } 455 OFFLOAD_TRACE(3, "Association does not exist\n"); 456 return true; 457 } 458 459 OFFLOAD_TRACE(3, "Found association: base %p, length %lld, is_static %d\n", 460 ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(), 461 ptr_data->is_static); 462 463 // make sure that provided address range fits into existing one 464 MemRange addr_range(base, length); 465 if (!ptr_data->cpu_addr.contains(addr_range)) { 466 if (report_error) { 467 LIBOFFLOAD_ERROR(c_bad_ptr_mem_range); 468 exit(1); 469 } 470 OFFLOAD_TRACE(3, "Existing association partially overlaps with " 471 "data address range\n"); 472 ptr_data = 0; 473 return true; 474 } 475 476 // if the entry is associated with static data it may not have buffers 477 // created because they are created on demand. 478 if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) { 479 return false; 480 } 481 482 return true; 483} 484 485bool OffloadDescriptor::init_static_ptr_data(PtrData *ptr_data) 486{ 487 OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers); 488 489 if (ptr_data->cpu_buf == 0) { 490 OFFLOAD_TRACE(3, "Creating buffer from source memory %llx\n", 491 ptr_data->cpu_addr.start()); 492 493 COIRESULT res = COI::BufferCreateFromMemory( 494 ptr_data->cpu_addr.length(), 495 COI_BUFFER_NORMAL, 496 0, 497 const_cast<void*>(ptr_data->cpu_addr.start()), 498 1, &m_device.get_process(), 499 &ptr_data->cpu_buf); 500 501 if (res != COI_SUCCESS) { 502 if (m_status != 0) { 503 m_status->result = translate_coi_error(res); 504 return false; 505 } 506 report_coi_error(c_buf_create_from_mem, res); 507 } 508 } 509 510 if (ptr_data->mic_buf == 0) { 511 OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n", 512 ptr_data->mic_addr); 513 514 COIRESULT res = COI::BufferCreateFromMemory( 515 ptr_data->cpu_addr.length(), 516 COI_BUFFER_NORMAL, 517 COI_SINK_MEMORY, 518 reinterpret_cast<void*>(ptr_data->mic_addr), 519 1, &m_device.get_process(), 520 &ptr_data->mic_buf); 521 522 if (res != COI_SUCCESS) { 523 if (m_status != 0) { 524 m_status->result = translate_coi_error(res); 525 return false; 526 } 527 report_coi_error(c_buf_create_from_mem, res); 528 } 529 } 530 531 return true; 532} 533 534bool OffloadDescriptor::init_mic_address(PtrData *ptr_data) 535{ 536 if (ptr_data->mic_buf != 0 && ptr_data->mic_addr == 0) { 537 COIRESULT res = COI::BufferGetSinkAddress(ptr_data->mic_buf, 538 &ptr_data->mic_addr); 539 if (res != COI_SUCCESS) { 540 if (m_status != 0) { 541 m_status->result = translate_coi_error(res); 542 } 543 else if (m_is_mandatory) { 544 report_coi_error(c_buf_get_address, res); 545 } 546 return false; 547 } 548 } 549 return true; 550} 551 552bool OffloadDescriptor::nullify_target_stack( 553 COIBUFFER targ_buf, 554 uint64_t size 555) 556{ 557 char * ptr = (char*)malloc(size); 558 if (ptr == NULL) 559 LIBOFFLOAD_ERROR(c_malloc); 560 COIRESULT res; 561 562 memset(ptr, 0, size); 563 res = COI::BufferWrite( 564 targ_buf, 565 0, 566 ptr, 567 size, 568 COI_COPY_UNSPECIFIED, 569 0, 0, 0); 570 free(ptr); 571 if (res != COI_SUCCESS) { 572 if (m_status != 0) { 573 m_status->result = translate_coi_error(res); 574 return false; 575 } 576 report_coi_error(c_buf_write, res); 577 } 578 return true; 579} 580 581bool OffloadDescriptor::offload_stack_memory_manager( 582 const void * stack_begin, 583 int routine_id, 584 int buf_size, 585 int align, 586 bool *is_new) 587{ 588 mutex_locker_t locker(stack_alloc_lock); 589 590 PersistData * new_el; 591 PersistDataList::iterator it_begin = m_device.m_persist_list.begin(); 592 PersistDataList::iterator it_end; 593 int erase = 0; 594 595 *is_new = false; 596 597 for (PersistDataList::iterator it = m_device.m_persist_list.begin(); 598 it != m_device.m_persist_list.end(); it++) { 599 PersistData cur_el = *it; 600 601 if (stack_begin > it->stack_cpu_addr) { 602 // this stack data must be destroyed 603 m_destroy_stack.push_front(cur_el.stack_ptr_data); 604 it_end = it; 605 erase++; 606 } 607 else if (stack_begin == it->stack_cpu_addr) { 608 if (routine_id != it-> routine_id) { 609 // this stack data must be destroyed 610 m_destroy_stack.push_front(cur_el.stack_ptr_data); 611 it_end = it; 612 erase++; 613 break; 614 } 615 else { 616 // stack data is reused 617 m_stack_ptr_data = it->stack_ptr_data; 618 if (erase > 0) { 619 // all obsolete stack sections must be erased from the list 620 m_device.m_persist_list.erase(it_begin, ++it_end); 621 622 m_in_datalen += 623 erase * sizeof(new_el->stack_ptr_data->mic_addr); 624 } 625 OFFLOAD_TRACE(3, "Reuse of stack buffer with addr %p\n", 626 m_stack_ptr_data->mic_addr); 627 return true; 628 } 629 } 630 else if (stack_begin < it->stack_cpu_addr) { 631 break; 632 } 633 } 634 635 if (erase > 0) { 636 // all obsolete stack sections must be erased from the list 637 m_device.m_persist_list.erase(it_begin, ++it_end); 638 m_in_datalen += erase * sizeof(new_el->stack_ptr_data->mic_addr); 639 } 640 // new stack table is created 641 new_el = new PersistData(stack_begin, routine_id, buf_size); 642 // create MIC buffer 643 COIRESULT res; 644 uint32_t buffer_flags = 0; 645 646 // create buffer with large pages if data length exceeds 647 // large page threshold 648 if (buf_size >= __offload_use_2mb_buffers) { 649 buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE; 650 } 651 res = COI::BufferCreate(buf_size, 652 COI_BUFFER_NORMAL, 653 buffer_flags, 654 0, 655 1, 656 &m_device.get_process(), 657 &new_el->stack_ptr_data->mic_buf); 658 if (res != COI_SUCCESS) { 659 if (m_status != 0) { 660 m_status->result = translate_coi_error(res); 661 } 662 else if (m_is_mandatory) { 663 report_coi_error(c_buf_create, res); 664 } 665 return false; 666 } 667 // make buffer valid on the device. 668 res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf, 669 m_device.get_process(), 670 COI_BUFFER_VALID, 671 COI_BUFFER_NO_MOVE, 672 0, 0, 0); 673 if (res != COI_SUCCESS) { 674 if (m_status != 0) { 675 m_status->result = translate_coi_error(res); 676 } 677 else if (m_is_mandatory) { 678 report_coi_error(c_buf_set_state, res); 679 } 680 return false; 681 } 682 res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf, 683 COI_PROCESS_SOURCE, 684 COI_BUFFER_INVALID, 685 COI_BUFFER_NO_MOVE, 686 0, 0, 0); 687 if (res != COI_SUCCESS) { 688 if (m_status != 0) { 689 m_status->result = translate_coi_error(res); 690 } 691 else if (m_is_mandatory) { 692 report_coi_error(c_buf_set_state, res); 693 } 694 return false; 695 } 696 // persistence algorithm requires target stack initialy to be nullified 697 if (!nullify_target_stack(new_el->stack_ptr_data->mic_buf, buf_size)) { 698 return false; 699 } 700 701 m_stack_ptr_data = new_el->stack_ptr_data; 702 init_mic_address(m_stack_ptr_data); 703 OFFLOAD_TRACE(3, "Allocating stack buffer with addr %p\n", 704 m_stack_ptr_data->mic_addr); 705 m_device.m_persist_list.push_front(*new_el); 706 init_mic_address(new_el->stack_ptr_data); 707 *is_new = true; 708 return true; 709} 710 711bool OffloadDescriptor::setup_descriptors( 712 VarDesc *vars, 713 VarDesc2 *vars2, 714 int vars_total, 715 int entry_id, 716 const void *stack_addr 717) 718{ 719 COIRESULT res; 720 721 OffloadTimer timer(get_timer_data(), c_offload_host_setup_buffers); 722 723 // make a copy of variable descriptors 724 m_vars_total = vars_total; 725 if (vars_total > 0) { 726 m_vars = (VarDesc*) malloc(m_vars_total * sizeof(VarDesc)); 727 if (m_vars == NULL) 728 LIBOFFLOAD_ERROR(c_malloc); 729 memcpy(m_vars, vars, m_vars_total * sizeof(VarDesc)); 730 m_vars_extra = (VarExtra*) malloc(m_vars_total * sizeof(VarExtra)); 731 if (m_vars_extra == NULL) 732 LIBOFFLOAD_ERROR(c_malloc); 733 } 734 735 // dependencies 736 m_in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * (m_vars_total + 1)); 737 if (m_in_deps == NULL) 738 LIBOFFLOAD_ERROR(c_malloc); 739 if (m_vars_total > 0) { 740 m_out_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * m_vars_total); 741 if (m_out_deps == NULL) 742 LIBOFFLOAD_ERROR(c_malloc); 743 } 744 745 // copyin/copyout data length 746 m_in_datalen = 0; 747 m_out_datalen = 0; 748 749 // First pass over variable descriptors 750 // - Calculate size of the input and output non-pointer data 751 // - Allocate buffers for input and output pointers 752 for (int i = 0; i < m_vars_total; i++) { 753 void* alloc_base = NULL; 754 int64_t alloc_disp = 0; 755 int64_t alloc_size; 756 bool src_is_for_mic = (m_vars[i].direction.out || 757 m_vars[i].into == NULL); 758 759 const char *var_sname = ""; 760 if (vars2 != NULL && i < vars_total) { 761 if (vars2[i].sname != NULL) { 762 var_sname = vars2[i].sname; 763 } 764 } 765 OFFLOAD_TRACE(2, " VarDesc %d, var=%s, %s, %s\n", 766 i, var_sname, 767 vardesc_direction_as_string[m_vars[i].direction.bits], 768 vardesc_type_as_string[m_vars[i].type.src]); 769 if (vars2 != NULL && i < vars_total && vars2[i].dname != NULL) { 770 OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname, 771 vardesc_type_as_string[m_vars[i].type.dst]); 772 } 773 OFFLOAD_TRACE(2, 774 " type_src=%d, type_dstn=%d, direction=%d, " 775 "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, " 776 "offset=%lld, size=%lld, count/disp=%lld, ptr=%p, into=%p\n", 777 m_vars[i].type.src, 778 m_vars[i].type.dst, 779 m_vars[i].direction.bits, 780 m_vars[i].alloc_if, 781 m_vars[i].free_if, 782 m_vars[i].align, 783 m_vars[i].mic_offset, 784 m_vars[i].flags.bits, 785 m_vars[i].offset, 786 m_vars[i].size, 787 m_vars[i].count, 788 m_vars[i].ptr, 789 m_vars[i].into); 790 791 if (m_vars[i].alloc != NULL) { 792 // array descriptor 793 const arr_desc *ap = 794 static_cast<const arr_desc*>(m_vars[i].alloc); 795 796 // debug dump 797 __arr_desc_dump(" ", "ALLOC", ap, 0); 798 799 __arr_data_offset_and_length(ap, alloc_disp, alloc_size); 800 801 alloc_base = reinterpret_cast<void*>(ap->base); 802 } 803 804 m_vars_extra[i].cpu_disp = 0; 805 m_vars_extra[i].cpu_offset = 0; 806 m_vars_extra[i].src_data = 0; 807 m_vars_extra[i].read_rng_src = 0; 808 m_vars_extra[i].read_rng_dst = 0; 809 // flag is_arr_ptr_el is 1 only for var_descs generated 810 // for c_data_ptr_array type 811 if (i < vars_total) { 812 m_vars_extra[i].is_arr_ptr_el = 0; 813 } 814 815 switch (m_vars[i].type.src) { 816 case c_data_ptr_array: 817 { 818 const arr_desc *ap; 819 const VarDesc3 *vd3 = 820 static_cast<const VarDesc3*>(m_vars[i].ptr); 821 int flags = vd3->array_fields; 822 OFFLOAD_TRACE(2, 823 " pointer array flags = %04x\n", flags); 824 OFFLOAD_TRACE(2, 825 " pointer array type is %s\n", 826 vardesc_type_as_string[flags & 0x3f]); 827 ap = static_cast<const arr_desc*>(vd3->ptr_array); 828 __arr_desc_dump(" ", "ptr array", ap, 0); 829 if (m_vars[i].into) { 830 ap = static_cast<const arr_desc*>(m_vars[i].into); 831 __arr_desc_dump( 832 " ", "into array", ap, 0); 833 } 834 if ((flags & (1<<flag_align_is_array)) != 0) { 835 ap = static_cast<const arr_desc*>(vd3->align_array); 836 __arr_desc_dump( 837 " ", "align array", ap, 0); 838 } 839 if ((flags & (1<<flag_alloc_if_is_array)) != 0) { 840 ap = static_cast<const arr_desc*>(vd3->alloc_if_array); 841 __arr_desc_dump( 842 " ", "alloc_if array", ap, 0); 843 } 844 if ((flags & (1<<flag_free_if_is_array)) != 0) { 845 ap = static_cast<const arr_desc*>(vd3->free_if_array); 846 __arr_desc_dump( 847 " ", "free_if array", ap, 0); 848 } 849 if ((flags & (1<<flag_extent_start_is_array)) != 0) { 850 ap = static_cast<const arr_desc*>(vd3->extent_start); 851 __arr_desc_dump( 852 " ", "extent_start array", ap, 0); 853 } else if ((flags & 854 (1<<flag_extent_start_is_scalar)) != 0) { 855 OFFLOAD_TRACE(2, 856 " extent_start scalar = %d\n", 857 (int64_t)vd3->extent_start); 858 } 859 if ((flags & (1<<flag_extent_elements_is_array)) != 0) { 860 ap = static_cast<const arr_desc*> 861 (vd3->extent_elements); 862 __arr_desc_dump( 863 " ", "extent_elements array", ap, 0); 864 } else if ((flags & 865 (1<<flag_extent_elements_is_scalar)) != 0) { 866 OFFLOAD_TRACE(2, 867 " extent_elements scalar = %d\n", 868 (int64_t)vd3->extent_elements); 869 } 870 if ((flags & (1<<flag_into_start_is_array)) != 0) { 871 ap = static_cast<const arr_desc*>(vd3->into_start); 872 __arr_desc_dump( 873 " ", "into_start array", ap, 0); 874 } else if ((flags & 875 (1<<flag_into_start_is_scalar)) != 0) { 876 OFFLOAD_TRACE(2, 877 " into_start scalar = %d\n", 878 (int64_t)vd3->into_start); 879 } 880 if ((flags & (1<<flag_into_elements_is_array)) != 0) { 881 ap = static_cast<const arr_desc*>(vd3->into_elements); 882 __arr_desc_dump( 883 " ", "into_elements array", ap, 0); 884 } else if ((flags & 885 (1<<flag_into_elements_is_scalar)) != 0) { 886 OFFLOAD_TRACE(2, 887 " into_elements scalar = %d\n", 888 (int64_t)vd3->into_elements); 889 } 890 if ((flags & (1<<flag_alloc_start_is_array)) != 0) { 891 ap = static_cast<const arr_desc*>(vd3->alloc_start); 892 __arr_desc_dump( 893 " ", "alloc_start array", ap, 0); 894 } else if ((flags & 895 (1<<flag_alloc_start_is_scalar)) != 0) { 896 OFFLOAD_TRACE(2, 897 " alloc_start scalar = %d\n", 898 (int64_t)vd3->alloc_start); 899 } 900 if ((flags & (1<<flag_alloc_elements_is_array)) != 0) { 901 ap = static_cast<const arr_desc*>(vd3->alloc_elements); 902 __arr_desc_dump( 903 " ", "alloc_elements array", ap, 0); 904 } else if ((flags & 905 (1<<flag_alloc_elements_is_scalar)) != 0) { 906 OFFLOAD_TRACE(2, 907 " alloc_elements scalar = %d\n", 908 (int64_t)vd3->alloc_elements); 909 } 910 } 911 if (!gen_var_descs_for_pointer_array(i)) { 912 return false; 913 } 914 break; 915 916 case c_data: 917 case c_void_ptr: 918 case c_cean_var: 919 // In all uses later 920 // VarDesc.size will have the length of the data to be 921 // transferred 922 // VarDesc.disp will have an offset from base 923 if (m_vars[i].type.src == c_cean_var) { 924 // array descriptor 925 const arr_desc *ap = 926 static_cast<const arr_desc*>(m_vars[i].ptr); 927 928 // debug dump 929 __arr_desc_dump("", "IN/OUT", ap, 0); 930 931 // offset and length are derived from the array descriptor 932 __arr_data_offset_and_length(ap, m_vars[i].disp, 933 m_vars[i].size); 934 if (!is_arr_desc_contiguous(ap)) { 935 m_vars[i].flags.is_noncont_src = 1; 936 m_vars_extra[i].read_rng_src = 937 init_read_ranges_arr_desc(ap); 938 } 939 // all necessary information about length and offset is 940 // transferred in var descriptor. There is no need to send 941 // array descriptor to the target side. 942 m_vars[i].ptr = reinterpret_cast<void*>(ap->base); 943 } 944 else { 945 m_vars[i].size *= m_vars[i].count; 946 m_vars[i].disp = 0; 947 } 948 949 if (m_vars[i].direction.bits) { 950 // make sure that transfer size > 0 951 if (m_vars[i].size <= 0) { 952 LIBOFFLOAD_ERROR(c_zero_or_neg_transfer_size); 953 exit(1); 954 } 955 956 if (m_vars[i].flags.is_static) { 957 PtrData *ptr_data; 958 959 // find data associated with variable 960 if (!find_ptr_data(ptr_data, 961 m_vars[i].ptr, 962 m_vars[i].disp, 963 m_vars[i].size, 964 false)) { 965 return false; 966 } 967 968 if (ptr_data != 0) { 969 // offset to base from the beginning of the buffer 970 // memory 971 m_vars[i].offset = 972 (char*) m_vars[i].ptr - 973 (char*) ptr_data->cpu_addr.start(); 974 } 975 else { 976 m_vars[i].flags.is_static = false; 977 if (m_vars[i].into == NULL) { 978 m_vars[i].flags.is_static_dstn = false; 979 } 980 } 981 m_vars_extra[i].src_data = ptr_data; 982 } 983 984 if (m_is_openmp) { 985 if (m_vars[i].flags.is_static) { 986 // Static data is transferred only by omp target 987 // update construct which passes zeros for 988 // alloc_if and free_if. 989 if (m_vars[i].alloc_if || m_vars[i].free_if) { 990 m_vars[i].direction.bits = c_parameter_nocopy; 991 } 992 } 993 else { 994 AutoData *auto_data; 995 if (m_vars[i].alloc_if) { 996 auto_data = m_device.insert_auto_data( 997 m_vars[i].ptr, m_vars[i].size); 998 auto_data->add_reference(); 999 } 1000 else { 1001 // TODO: what should be done if var is not in 1002 // the table? 1003 auto_data = m_device.find_auto_data( 1004 m_vars[i].ptr); 1005 } 1006 1007 // For automatic variables data is transferred 1008 // only if alloc_if == 0 && free_if == 0 1009 // or reference count is 1 1010 if ((m_vars[i].alloc_if || m_vars[i].free_if) && 1011 auto_data != 0 && 1012 auto_data->get_reference() != 1) { 1013 m_vars[i].direction.bits = c_parameter_nocopy; 1014 } 1015 1016 // save data for later use 1017 m_vars_extra[i].auto_data = auto_data; 1018 } 1019 } 1020 1021 if (m_vars[i].direction.in && 1022 !m_vars[i].flags.is_static) { 1023 m_in_datalen += m_vars[i].size; 1024 1025 // for non-static target destination defined as CEAN 1026 // expression we pass to target its size and dist 1027 if (m_vars[i].into == NULL && 1028 m_vars[i].type.src == c_cean_var) { 1029 m_in_datalen += 2 * sizeof(uint64_t); 1030 } 1031 m_need_runfunction = true; 1032 } 1033 if (m_vars[i].direction.out && 1034 !m_vars[i].flags.is_static) { 1035 m_out_datalen += m_vars[i].size; 1036 m_need_runfunction = true; 1037 } 1038 } 1039 break; 1040 1041 case c_dv: 1042 if (m_vars[i].direction.bits || 1043 m_vars[i].alloc_if || 1044 m_vars[i].free_if) { 1045 ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].ptr); 1046 1047 // debug dump 1048 __dv_desc_dump("IN/OUT", dvp); 1049 1050 // send dope vector contents excluding base 1051 m_in_datalen += m_vars[i].size - sizeof(uint64_t); 1052 m_need_runfunction = true; 1053 } 1054 break; 1055 1056 case c_string_ptr: 1057 if ((m_vars[i].direction.bits || 1058 m_vars[i].alloc_if || 1059 m_vars[i].free_if) && 1060 m_vars[i].size == 0) { 1061 m_vars[i].size = 1; 1062 m_vars[i].count = 1063 strlen(*static_cast<char**>(m_vars[i].ptr)) + 1; 1064 } 1065 /* fallthru */ 1066 1067 case c_data_ptr: 1068 if (m_vars[i].flags.is_stack_buf && 1069 !m_vars[i].direction.bits && 1070 m_vars[i].alloc_if) { 1071 // this var_desc is for stack buffer 1072 bool is_new; 1073 1074 if (!offload_stack_memory_manager( 1075 stack_addr, entry_id, 1076 m_vars[i].count, m_vars[i].align, &is_new)) { 1077 return false; 1078 } 1079 if (is_new) { 1080 m_compute_buffers.push_back( 1081 m_stack_ptr_data->mic_buf); 1082 m_device.m_persist_list.front().cpu_stack_addr = 1083 static_cast<char*>(m_vars[i].ptr); 1084 } 1085 else { 1086 m_vars[i].flags.sink_addr = 1; 1087 m_in_datalen += sizeof(m_stack_ptr_data->mic_addr); 1088 } 1089 m_vars[i].size = m_destroy_stack.size(); 1090 m_vars_extra[i].src_data = m_stack_ptr_data; 1091 // need to add reference for buffer 1092 m_need_runfunction = true; 1093 break; 1094 } 1095 /* fallthru */ 1096 1097 case c_cean_var_ptr: 1098 case c_dv_ptr: 1099 if (m_vars[i].type.src == c_cean_var_ptr) { 1100 // array descriptor 1101 const arr_desc *ap = 1102 static_cast<const arr_desc*>(m_vars[i].ptr); 1103 1104 // debug dump 1105 __arr_desc_dump("", "IN/OUT", ap, 1); 1106 1107 // offset and length are derived from the array descriptor 1108 __arr_data_offset_and_length(ap, m_vars[i].disp, 1109 m_vars[i].size); 1110 1111 if (!is_arr_desc_contiguous(ap)) { 1112 m_vars[i].flags.is_noncont_src = 1; 1113 m_vars_extra[i].read_rng_src = 1114 init_read_ranges_arr_desc(ap); 1115 } 1116 // all necessary information about length and offset is 1117 // transferred in var descriptor. There is no need to send 1118 // array descriptor to the target side. 1119 m_vars[i].ptr = reinterpret_cast<void*>(ap->base); 1120 } 1121 else if (m_vars[i].type.src == c_dv_ptr) { 1122 // need to send DV to the device unless it is 'nocopy' 1123 if (m_vars[i].direction.bits || 1124 m_vars[i].alloc_if || 1125 m_vars[i].free_if) { 1126 ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].ptr); 1127 1128 // debug dump 1129 __dv_desc_dump("IN/OUT", dvp); 1130 1131 m_vars[i].direction.bits = c_parameter_in; 1132 } 1133 1134 // no displacement 1135 m_vars[i].disp = 0; 1136 } 1137 else { 1138 // c_data_ptr or c_string_ptr 1139 m_vars[i].size *= m_vars[i].count; 1140 m_vars[i].disp = 0; 1141 } 1142 1143 if (m_vars[i].direction.bits || 1144 m_vars[i].alloc_if || 1145 m_vars[i].free_if) { 1146 PtrData *ptr_data; 1147 1148 // check that buffer length >= 0 1149 if (m_vars[i].alloc_if && 1150 m_vars[i].disp + m_vars[i].size < 0) { 1151 LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len); 1152 exit(1); 1153 } 1154 1155 // base address 1156 void *base = *static_cast<void**>(m_vars[i].ptr); 1157 1158 // allocate buffer if we have no INTO and don't need 1159 // allocation for the ptr at target 1160 if (src_is_for_mic) { 1161 if (m_vars[i].flags.is_stack_buf) { 1162 // for stack persistent objects ptr data is created 1163 // by var_desc with number 0. 1164 // Its ptr_data is stored at m_stack_ptr_data 1165 ptr_data = m_stack_ptr_data; 1166 m_vars[i].flags.sink_addr = 1; 1167 } 1168 else if (m_vars[i].alloc_if) { 1169 // add new entry 1170 if (!alloc_ptr_data( 1171 ptr_data, 1172 base, 1173 (alloc_base != NULL) ? 1174 alloc_disp : m_vars[i].disp, 1175 (alloc_base != NULL) ? 1176 alloc_size : m_vars[i].size, 1177 alloc_disp, 1178 (alloc_base != NULL) ? 1179 0 : m_vars[i].align)) { 1180 return false; 1181 } 1182 1183 if (ptr_data->add_reference() == 0 && 1184 ptr_data->mic_buf != 0) { 1185 // add buffer to the list of buffers that 1186 // are passed to dispatch call 1187 m_compute_buffers.push_back( 1188 ptr_data->mic_buf); 1189 } 1190 else { 1191 // will send buffer address to device 1192 m_vars[i].flags.sink_addr = 1; 1193 } 1194 1195 if (!ptr_data->is_static) { 1196 // need to add reference for buffer 1197 m_need_runfunction = true; 1198 } 1199 } 1200 else { 1201 bool error_if_not_found = true; 1202 if (m_is_openmp) { 1203 // For omp target update variable is ignored 1204 // if it does not exist. 1205 if (!m_vars[i].alloc_if && 1206 !m_vars[i].free_if) { 1207 error_if_not_found = false; 1208 } 1209 } 1210 1211 // use existing association from pointer table 1212 if (!find_ptr_data(ptr_data, 1213 base, 1214 m_vars[i].disp, 1215 m_vars[i].size, 1216 error_if_not_found)) { 1217 return false; 1218 } 1219 1220 if (m_is_openmp) { 1221 // make var nocopy if it does not exist 1222 if (ptr_data == 0) { 1223 m_vars[i].direction.bits = 1224 c_parameter_nocopy; 1225 } 1226 } 1227 1228 if (ptr_data != 0) { 1229 m_vars[i].flags.sink_addr = 1; 1230 } 1231 } 1232 1233 if (ptr_data != 0) { 1234 if (m_is_openmp) { 1235 // data is transferred only if 1236 // alloc_if == 0 && free_if == 0 1237 // or reference count is 1 1238 if ((m_vars[i].alloc_if || 1239 m_vars[i].free_if) && 1240 ptr_data->get_reference() != 1) { 1241 m_vars[i].direction.bits = 1242 c_parameter_nocopy; 1243 } 1244 } 1245 1246 if (ptr_data->alloc_disp != 0) { 1247 m_vars[i].flags.alloc_disp = 1; 1248 m_in_datalen += sizeof(alloc_disp); 1249 } 1250 1251 if (m_vars[i].flags.sink_addr) { 1252 // get buffers's address on the sink 1253 if (!init_mic_address(ptr_data)) { 1254 return false; 1255 } 1256 1257 m_in_datalen += sizeof(ptr_data->mic_addr); 1258 } 1259 1260 if (!ptr_data->is_static && m_vars[i].free_if) { 1261 // need to decrement buffer reference on target 1262 m_need_runfunction = true; 1263 } 1264 1265 // offset to base from the beginning of the buffer 1266 // memory 1267 m_vars[i].offset = (char*) base - 1268 (char*) ptr_data->cpu_addr.start(); 1269 1270 // copy other pointer properties to var descriptor 1271 m_vars[i].mic_offset = ptr_data->mic_offset; 1272 m_vars[i].flags.is_static = ptr_data->is_static; 1273 } 1274 } 1275 else { 1276 if (!find_ptr_data(ptr_data, 1277 base, 1278 m_vars[i].disp, 1279 m_vars[i].size, 1280 false)) { 1281 return false; 1282 } 1283 if (ptr_data) { 1284 m_vars[i].offset = 1285 (char*) base - 1286 (char*) ptr_data->cpu_addr.start(); 1287 } 1288 } 1289 1290 // save pointer data 1291 m_vars_extra[i].src_data = ptr_data; 1292 } 1293 break; 1294 1295 case c_func_ptr: 1296 if (m_vars[i].direction.in) { 1297 m_in_datalen += __offload_funcs.max_name_length(); 1298 } 1299 if (m_vars[i].direction.out) { 1300 m_out_datalen += __offload_funcs.max_name_length(); 1301 } 1302 m_need_runfunction = true; 1303 break; 1304 1305 case c_dv_data: 1306 case c_dv_ptr_data: 1307 case c_dv_data_slice: 1308 case c_dv_ptr_data_slice: 1309 ArrDesc *dvp; 1310 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) { 1311 const arr_desc *ap; 1312 ap = static_cast<const arr_desc*>(m_vars[i].ptr); 1313 1314 dvp = (m_vars[i].type.src == c_dv_data_slice) ? 1315 reinterpret_cast<ArrDesc*>(ap->base) : 1316 *reinterpret_cast<ArrDesc**>(ap->base); 1317 } 1318 else { 1319 dvp = (m_vars[i].type.src == c_dv_data) ? 1320 static_cast<ArrDesc*>(m_vars[i].ptr) : 1321 *static_cast<ArrDesc**>(m_vars[i].ptr); 1322 } 1323 1324 // if allocatable dope vector isn't allocated don't 1325 // transfer its data 1326 if (!__dv_is_allocated(dvp)) { 1327 m_vars[i].direction.bits = c_parameter_nocopy; 1328 m_vars[i].alloc_if = 0; 1329 m_vars[i].free_if = 0; 1330 } 1331 if (m_vars[i].direction.bits || 1332 m_vars[i].alloc_if || 1333 m_vars[i].free_if) { 1334 const arr_desc *ap; 1335 1336 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) { 1337 ap = static_cast<const arr_desc*>(m_vars[i].ptr); 1338 1339 // debug dump 1340 __arr_desc_dump("", "IN/OUT", ap, 0); 1341 } 1342 if (!__dv_is_contiguous(dvp)) { 1343 m_vars[i].flags.is_noncont_src = 1; 1344 m_vars_extra[i].read_rng_src = 1345 init_read_ranges_dv(dvp); 1346 } 1347 1348 // size and displacement 1349 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) { 1350 // offset and length are derived from the 1351 // array descriptor 1352 __arr_data_offset_and_length(ap, 1353 m_vars[i].disp, 1354 m_vars[i].size); 1355 if (m_vars[i].direction.bits) { 1356 if (!is_arr_desc_contiguous(ap)) { 1357 if (m_vars[i].flags.is_noncont_src) { 1358 LIBOFFLOAD_ERROR(c_slice_of_noncont_array); 1359 return false; 1360 } 1361 m_vars[i].flags.is_noncont_src = 1; 1362 m_vars_extra[i].read_rng_src = 1363 init_read_ranges_arr_desc(ap); 1364 } 1365 } 1366 } 1367 else { 1368 if (m_vars[i].flags.has_length) { 1369 m_vars[i].size = 1370 __dv_data_length(dvp, m_vars[i].count); 1371 } 1372 else { 1373 m_vars[i].size = __dv_data_length(dvp); 1374 } 1375 m_vars[i].disp = 0; 1376 } 1377 1378 // check that length >= 0 1379 if (m_vars[i].alloc_if && 1380 (m_vars[i].disp + m_vars[i].size < 0)) { 1381 LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len); 1382 exit(1); 1383 } 1384 1385 // base address 1386 void *base = reinterpret_cast<void*>(dvp->Base); 1387 PtrData *ptr_data; 1388 1389 // allocate buffer if we have no INTO and don't need 1390 // allocation for the ptr at target 1391 if (src_is_for_mic) { 1392 if (m_vars[i].alloc_if) { 1393 // add new entry 1394 if (!alloc_ptr_data( 1395 ptr_data, 1396 base, 1397 (alloc_base != NULL) ? 1398 alloc_disp : m_vars[i].disp, 1399 (alloc_base != NULL) ? 1400 alloc_size : m_vars[i].size, 1401 alloc_disp, 1402 (alloc_base != NULL) ? 1403 0 : m_vars[i].align)) { 1404 return false; 1405 } 1406 1407 if (ptr_data->add_reference() == 0 && 1408 ptr_data->mic_buf != 0) { 1409 // add buffer to the list of buffers 1410 // that are passed to dispatch call 1411 m_compute_buffers.push_back( 1412 ptr_data->mic_buf); 1413 } 1414 else { 1415 // will send buffer address to device 1416 m_vars[i].flags.sink_addr = 1; 1417 } 1418 1419 if (!ptr_data->is_static) { 1420 // need to add reference for buffer 1421 m_need_runfunction = true; 1422 } 1423 } 1424 else { 1425 bool error_if_not_found = true; 1426 if (m_is_openmp) { 1427 // For omp target update variable is ignored 1428 // if it does not exist. 1429 if (!m_vars[i].alloc_if && 1430 !m_vars[i].free_if) { 1431 error_if_not_found = false; 1432 } 1433 } 1434 1435 // use existing association from pointer table 1436 if (!find_ptr_data(ptr_data, 1437 base, 1438 m_vars[i].disp, 1439 m_vars[i].size, 1440 error_if_not_found)) { 1441 return false; 1442 } 1443 1444 if (m_is_openmp) { 1445 // make var nocopy if it does not exist 1446 if (ptr_data == 0) { 1447 m_vars[i].direction.bits = 1448 c_parameter_nocopy; 1449 } 1450 } 1451 1452 if (ptr_data != 0) { 1453 // need to update base in dope vector on device 1454 m_vars[i].flags.sink_addr = 1; 1455 } 1456 } 1457 1458 if (ptr_data != 0) { 1459 if (m_is_openmp) { 1460 // data is transferred only if 1461 // alloc_if == 0 && free_if == 0 1462 // or reference count is 1 1463 if ((m_vars[i].alloc_if || 1464 m_vars[i].free_if) && 1465 ptr_data->get_reference() != 1) { 1466 m_vars[i].direction.bits = 1467 c_parameter_nocopy; 1468 } 1469 } 1470 1471 if (ptr_data->alloc_disp != 0) { 1472 m_vars[i].flags.alloc_disp = 1; 1473 m_in_datalen += sizeof(alloc_disp); 1474 } 1475 1476 if (m_vars[i].flags.sink_addr) { 1477 // get buffers's address on the sink 1478 if (!init_mic_address(ptr_data)) { 1479 return false; 1480 } 1481 1482 m_in_datalen += sizeof(ptr_data->mic_addr); 1483 } 1484 1485 if (!ptr_data->is_static && m_vars[i].free_if) { 1486 // need to decrement buffer reference on target 1487 m_need_runfunction = true; 1488 } 1489 1490 // offset to base from the beginning of the buffer 1491 // memory 1492 m_vars[i].offset = 1493 (char*) base - 1494 (char*) ptr_data->cpu_addr.start(); 1495 1496 // copy other pointer properties to var descriptor 1497 m_vars[i].mic_offset = ptr_data->mic_offset; 1498 m_vars[i].flags.is_static = ptr_data->is_static; 1499 } 1500 } 1501 else { // !src_is_for_mic 1502 if (!find_ptr_data(ptr_data, 1503 base, 1504 m_vars[i].disp, 1505 m_vars[i].size, 1506 false)) { 1507 return false; 1508 } 1509 m_vars[i].offset = !ptr_data ? 0 : 1510 (char*) base - 1511 (char*) ptr_data->cpu_addr.start(); 1512 } 1513 1514 // save pointer data 1515 m_vars_extra[i].src_data = ptr_data; 1516 } 1517 break; 1518 1519 default: 1520 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src); 1521 LIBOFFLOAD_ABORT; 1522 } 1523 if (m_vars[i].type.src == c_data_ptr_array) { 1524 continue; 1525 } 1526 1527 if (src_is_for_mic && m_vars[i].flags.is_stack_buf) { 1528 m_vars[i].offset = static_cast<char*>(m_vars[i].ptr) - 1529 m_device.m_persist_list.front().cpu_stack_addr; 1530 } 1531 // if source is used at CPU save its offset and disp 1532 if (m_vars[i].into == NULL || m_vars[i].direction.in) { 1533 m_vars_extra[i].cpu_offset = m_vars[i].offset; 1534 m_vars_extra[i].cpu_disp = m_vars[i].disp; 1535 } 1536 1537 // If "into" is define we need to do the similar work for it 1538 if (!m_vars[i].into) { 1539 continue; 1540 } 1541 1542 int64_t into_disp =0, into_offset = 0; 1543 1544 switch (m_vars[i].type.dst) { 1545 case c_data_ptr_array: 1546 break; 1547 case c_data: 1548 case c_void_ptr: 1549 case c_cean_var: { 1550 int64_t size = m_vars[i].size; 1551 1552 if (m_vars[i].type.dst == c_cean_var) { 1553 // array descriptor 1554 const arr_desc *ap = 1555 static_cast<const arr_desc*>(m_vars[i].into); 1556 1557 // debug dump 1558 __arr_desc_dump(" ", "INTO", ap, 0); 1559 1560 // offset and length are derived from the array descriptor 1561 __arr_data_offset_and_length(ap, into_disp, size); 1562 1563 if (!is_arr_desc_contiguous(ap)) { 1564 m_vars[i].flags.is_noncont_dst = 1; 1565 m_vars_extra[i].read_rng_dst = 1566 init_read_ranges_arr_desc(ap); 1567 if (!cean_ranges_match( 1568 m_vars_extra[i].read_rng_src, 1569 m_vars_extra[i].read_rng_dst)) { 1570 LIBOFFLOAD_ERROR(c_ranges_dont_match); 1571 exit(1); 1572 } 1573 } 1574 m_vars[i].into = reinterpret_cast<void*>(ap->base); 1575 } 1576 1577 int64_t size_src = m_vars_extra[i].read_rng_src ? 1578 cean_get_transf_size(m_vars_extra[i].read_rng_src) : 1579 m_vars[i].size; 1580 int64_t size_dst = m_vars_extra[i].read_rng_dst ? 1581 cean_get_transf_size(m_vars_extra[i].read_rng_dst) : 1582 size; 1583 // It's supposed that "into" size must be not less 1584 // than src size 1585 if (size_src > size_dst) { 1586 LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes, 1587 size_src, size_dst); 1588 exit(1); 1589 } 1590 1591 if (m_vars[i].direction.bits) { 1592 if (m_vars[i].flags.is_static_dstn) { 1593 PtrData *ptr_data; 1594 1595 // find data associated with variable 1596 if (!find_ptr_data(ptr_data, m_vars[i].into, 1597 into_disp, size, false)) { 1598 return false; 1599 } 1600 if (ptr_data != 0) { 1601 // offset to base from the beginning of the buffer 1602 // memory 1603 into_offset = 1604 (char*) m_vars[i].into - 1605 (char*) ptr_data->cpu_addr.start(); 1606 } 1607 else { 1608 m_vars[i].flags.is_static_dstn = false; 1609 } 1610 m_vars_extra[i].dst_data = ptr_data; 1611 } 1612 } 1613 1614 if (m_vars[i].direction.in && 1615 !m_vars[i].flags.is_static_dstn) { 1616 m_in_datalen += m_vars[i].size; 1617 1618 // for non-static target destination defined as CEAN 1619 // expression we pass to target its size and dist 1620 if (m_vars[i].type.dst == c_cean_var) { 1621 m_in_datalen += 2 * sizeof(uint64_t); 1622 } 1623 m_need_runfunction = true; 1624 } 1625 break; 1626 } 1627 1628 case c_dv: 1629 if (m_vars[i].direction.bits || 1630 m_vars[i].alloc_if || 1631 m_vars[i].free_if) { 1632 ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].into); 1633 1634 // debug dump 1635 __dv_desc_dump("INTO", dvp); 1636 1637 // send dope vector contents excluding base 1638 m_in_datalen += m_vars[i].size - sizeof(uint64_t); 1639 m_need_runfunction = true; 1640 } 1641 break; 1642 1643 case c_string_ptr: 1644 case c_data_ptr: 1645 case c_cean_var_ptr: 1646 case c_dv_ptr: { 1647 int64_t size = m_vars[i].size; 1648 1649 if (m_vars[i].type.dst == c_cean_var_ptr) { 1650 // array descriptor 1651 const arr_desc *ap = 1652 static_cast<const arr_desc*>(m_vars[i].into); 1653 1654 // debug dump 1655 __arr_desc_dump(" ", "INTO", ap, 1); 1656 1657 // offset and length are derived from the array descriptor 1658 __arr_data_offset_and_length(ap, into_disp, size); 1659 1660 if (!is_arr_desc_contiguous(ap)) { 1661 m_vars[i].flags.is_noncont_src = 1; 1662 m_vars_extra[i].read_rng_dst = 1663 init_read_ranges_arr_desc(ap); 1664 if (!cean_ranges_match( 1665 m_vars_extra[i].read_rng_src, 1666 m_vars_extra[i].read_rng_dst)) { 1667 LIBOFFLOAD_ERROR(c_ranges_dont_match); 1668 } 1669 } 1670 m_vars[i].into = reinterpret_cast<char**>(ap->base); 1671 } 1672 else if (m_vars[i].type.dst == c_dv_ptr) { 1673 // need to send DV to the device unless it is 'nocopy' 1674 if (m_vars[i].direction.bits || 1675 m_vars[i].alloc_if || 1676 m_vars[i].free_if) { 1677 ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].into); 1678 1679 // debug dump 1680 __dv_desc_dump("INTO", dvp); 1681 1682 m_vars[i].direction.bits = c_parameter_in; 1683 } 1684 } 1685 1686 int64_t size_src = m_vars_extra[i].read_rng_src ? 1687 cean_get_transf_size(m_vars_extra[i].read_rng_src) : 1688 m_vars[i].size; 1689 int64_t size_dst = m_vars_extra[i].read_rng_dst ? 1690 cean_get_transf_size(m_vars_extra[i].read_rng_dst) : 1691 size; 1692 // It's supposed that "into" size must be not less than 1693 // src size 1694 if (size_src > size_dst) { 1695 LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes, 1696 size_src, size_dst); 1697 exit(1); 1698 } 1699 1700 if (m_vars[i].direction.bits) { 1701 PtrData *ptr_data; 1702 1703 // base address 1704 void *base = *static_cast<void**>(m_vars[i].into); 1705 1706 if (m_vars[i].direction.in) { 1707 // allocate buffer 1708 if (m_vars[i].flags.is_stack_buf) { 1709 // for stack persistent objects ptr data is created 1710 // by var_desc with number 0. 1711 // Its ptr_data is stored at m_stack_ptr_data 1712 ptr_data = m_stack_ptr_data; 1713 m_vars[i].flags.sink_addr = 1; 1714 } 1715 else if (m_vars[i].alloc_if) { 1716 // add new entry 1717 if (!alloc_ptr_data( 1718 ptr_data, 1719 base, 1720 (alloc_base != NULL) ? 1721 alloc_disp : into_disp, 1722 (alloc_base != NULL) ? 1723 alloc_size : size, 1724 alloc_disp, 1725 (alloc_base != NULL) ? 1726 0 : m_vars[i].align)) { 1727 return false; 1728 } 1729 1730 if (ptr_data->add_reference() == 0 && 1731 ptr_data->mic_buf != 0) { 1732 // add buffer to the list of buffers that 1733 // are passed to dispatch call 1734 m_compute_buffers.push_back( 1735 ptr_data->mic_buf); 1736 } 1737 else { 1738 // will send buffer address to device 1739 m_vars[i].flags.sink_addr = 1; 1740 } 1741 1742 if (!ptr_data->is_static) { 1743 // need to add reference for buffer 1744 m_need_runfunction = true; 1745 } 1746 } 1747 else { 1748 // use existing association from pointer table 1749 if (!find_ptr_data(ptr_data, base, into_disp, size)) { 1750 return false; 1751 } 1752 m_vars[i].flags.sink_addr = 1; 1753 } 1754 1755 if (ptr_data->alloc_disp != 0) { 1756 m_vars[i].flags.alloc_disp = 1; 1757 m_in_datalen += sizeof(alloc_disp); 1758 } 1759 1760 if (m_vars[i].flags.sink_addr) { 1761 // get buffers's address on the sink 1762 if (!init_mic_address(ptr_data)) { 1763 return false; 1764 } 1765 1766 m_in_datalen += sizeof(ptr_data->mic_addr); 1767 } 1768 1769 if (!ptr_data->is_static && m_vars[i].free_if) { 1770 // need to decrement buffer reference on target 1771 m_need_runfunction = true; 1772 } 1773 1774 // copy other pointer properties to var descriptor 1775 m_vars[i].mic_offset = ptr_data->mic_offset; 1776 m_vars[i].flags.is_static_dstn = ptr_data->is_static; 1777 } 1778 else { 1779 if (!find_ptr_data(ptr_data, 1780 base, 1781 into_disp, 1782 m_vars[i].size, 1783 false)) { 1784 return false; 1785 } 1786 } 1787 if (ptr_data) { 1788 into_offset = ptr_data ? 1789 (char*) base - 1790 (char*) ptr_data->cpu_addr.start() : 1791 0; 1792 } 1793 // save pointer data 1794 m_vars_extra[i].dst_data = ptr_data; 1795 } 1796 break; 1797 } 1798 1799 case c_func_ptr: 1800 break; 1801 1802 case c_dv_data: 1803 case c_dv_ptr_data: 1804 case c_dv_data_slice: 1805 case c_dv_ptr_data_slice: 1806 if (m_vars[i].direction.bits || 1807 m_vars[i].alloc_if || 1808 m_vars[i].free_if) { 1809 const arr_desc *ap; 1810 ArrDesc *dvp; 1811 PtrData *ptr_data; 1812 int64_t disp; 1813 int64_t size; 1814 1815 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) { 1816 ap = static_cast<const arr_desc*>(m_vars[i].into); 1817 1818 // debug dump 1819 __arr_desc_dump(" ", "INTO", ap, 0); 1820 1821 dvp = (m_vars[i].type.dst == c_dv_data_slice) ? 1822 reinterpret_cast<ArrDesc*>(ap->base) : 1823 *reinterpret_cast<ArrDesc**>(ap->base); 1824 } 1825 else { 1826 dvp = (m_vars[i].type.dst == c_dv_data) ? 1827 static_cast<ArrDesc*>(m_vars[i].into) : 1828 *static_cast<ArrDesc**>(m_vars[i].into); 1829 } 1830 if (!__dv_is_contiguous(dvp)) { 1831 m_vars[i].flags.is_noncont_dst = 1; 1832 m_vars_extra[i].read_rng_dst = 1833 init_read_ranges_dv(dvp); 1834 } 1835 // size and displacement 1836 if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) { 1837 // offset and length are derived from the array 1838 // descriptor 1839 __arr_data_offset_and_length(ap, into_disp, size); 1840 if (m_vars[i].direction.bits) { 1841 if (!is_arr_desc_contiguous(ap)) { 1842 if (m_vars[i].flags.is_noncont_dst) { 1843 LIBOFFLOAD_ERROR(c_slice_of_noncont_array); 1844 return false; 1845 } 1846 m_vars[i].flags.is_noncont_dst = 1; 1847 m_vars_extra[i].read_rng_dst = 1848 init_read_ranges_arr_desc(ap); 1849 if (!cean_ranges_match( 1850 m_vars_extra[i].read_rng_src, 1851 m_vars_extra[i].read_rng_dst)) { 1852 LIBOFFLOAD_ERROR(c_ranges_dont_match); 1853 } 1854 } 1855 } 1856 } 1857 else { 1858 if (m_vars[i].flags.has_length) { 1859 size = __dv_data_length(dvp, m_vars[i].count); 1860 } 1861 else { 1862 size = __dv_data_length(dvp); 1863 } 1864 disp = 0; 1865 } 1866 1867 int64_t size_src = 1868 m_vars_extra[i].read_rng_src ? 1869 cean_get_transf_size(m_vars_extra[i].read_rng_src) : 1870 m_vars[i].size; 1871 int64_t size_dst = 1872 m_vars_extra[i].read_rng_dst ? 1873 cean_get_transf_size(m_vars_extra[i].read_rng_dst) : 1874 size; 1875 // It's supposed that "into" size must be not less 1876 // than src size 1877 if (size_src > size_dst) { 1878 LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes, 1879 size_src, size_dst); 1880 exit(1); 1881 } 1882 1883 // base address 1884 void *base = reinterpret_cast<void*>(dvp->Base); 1885 1886 // allocate buffer 1887 if (m_vars[i].direction.in) { 1888 if (m_vars[i].alloc_if) { 1889 // add new entry 1890 if (!alloc_ptr_data( 1891 ptr_data, 1892 base, 1893 (alloc_base != NULL) ? 1894 alloc_disp : into_disp, 1895 (alloc_base != NULL) ? 1896 alloc_size : size, 1897 alloc_disp, 1898 (alloc_base != NULL) ? 1899 0 : m_vars[i].align)) { 1900 return false; 1901 } 1902 if (ptr_data->add_reference() == 0 && 1903 ptr_data->mic_buf !=0) { 1904 // add buffer to the list of buffers 1905 // that are passed to dispatch call 1906 m_compute_buffers.push_back( 1907 ptr_data->mic_buf); 1908 } 1909 else { 1910 // will send buffer address to device 1911 m_vars[i].flags.sink_addr = 1; 1912 } 1913 1914 if (!ptr_data->is_static) { 1915 // need to add reference for buffer 1916 m_need_runfunction = true; 1917 } 1918 } 1919 else { 1920 // use existing association from pointer table 1921 if (!find_ptr_data(ptr_data, base, into_disp, size)) { 1922 return false; 1923 } 1924 1925 // need to update base in dope vector on device 1926 m_vars[i].flags.sink_addr = 1; 1927 } 1928 1929 if (ptr_data->alloc_disp != 0) { 1930 m_vars[i].flags.alloc_disp = 1; 1931 m_in_datalen += sizeof(alloc_disp); 1932 } 1933 1934 if (m_vars[i].flags.sink_addr) { 1935 // get buffers's address on the sink 1936 if (!init_mic_address(ptr_data)) { 1937 return false; 1938 } 1939 m_in_datalen += sizeof(ptr_data->mic_addr); 1940 } 1941 1942 if (!ptr_data->is_static && m_vars[i].free_if) { 1943 // need to decrement buffer reference on target 1944 m_need_runfunction = true; 1945 } 1946 1947 // offset to base from the beginning of the buffer 1948 // memory 1949 into_offset = 1950 (char*) base - (char*) ptr_data->cpu_addr.start(); 1951 1952 // copy other pointer properties to var descriptor 1953 m_vars[i].mic_offset = ptr_data->mic_offset; 1954 m_vars[i].flags.is_static_dstn = ptr_data->is_static; 1955 } 1956 else { // src_is_for_mic 1957 if (!find_ptr_data(ptr_data, 1958 base, 1959 into_disp, 1960 size, 1961 false)) { 1962 return false; 1963 } 1964 into_offset = !ptr_data ? 1965 0 : 1966 (char*) base - (char*) ptr_data->cpu_addr.start(); 1967 } 1968 1969 // save pointer data 1970 m_vars_extra[i].dst_data = ptr_data; 1971 } 1972 break; 1973 1974 default: 1975 LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src); 1976 LIBOFFLOAD_ABORT; 1977 } 1978 // if into is used at CPU save its offset and disp 1979 if (m_vars[i].direction.out) { 1980 m_vars_extra[i].cpu_offset = into_offset; 1981 m_vars_extra[i].cpu_disp = into_disp; 1982 } 1983 else { 1984 if (m_vars[i].flags.is_stack_buf) { 1985 into_offset = static_cast<char*>(m_vars[i].into) - 1986 m_device.m_persist_list.front().cpu_stack_addr; 1987 } 1988 m_vars[i].offset = into_offset; 1989 m_vars[i].disp = into_disp; 1990 } 1991 } 1992 1993 return true; 1994} 1995 1996bool OffloadDescriptor::setup_misc_data(const char *name) 1997{ 1998 OffloadTimer timer(get_timer_data(), c_offload_host_setup_misc_data); 1999 2000 // we can skip run functon call together with wait if offloaded 2001 // region is empty and there is no user defined non-pointer IN/OUT data 2002 if (m_need_runfunction) { 2003 // variable descriptors are sent as input data 2004 m_in_datalen += m_vars_total * sizeof(VarDesc); 2005 2006 // timer data is sent as a part of the output data 2007 m_out_datalen += OFFLOAD_TIMER_DATALEN(); 2008 2009 // max from input data and output data length 2010 uint64_t data_len = m_in_datalen > m_out_datalen ? m_in_datalen : 2011 m_out_datalen; 2012 2013 // Misc data has the following layout 2014 // <Function Descriptor> 2015 // <Function Name> 2016 // <In/Out Data> (optional) 2017 // 2018 // We can transfer copyin/copyout data in misc/return data which can 2019 // be passed to run function call if its size does not exceed 2020 // COI_PIPELINE_MAX_IN_MISC_DATA_LEN. Otherwise we have to allocate 2021 // buffer for it. 2022 2023 m_func_desc_size = sizeof(FunctionDescriptor) + strlen(name) + 1; 2024 m_func_desc_size = (m_func_desc_size + 7) & ~7; 2025 2026 int misc_data_offset = 0; 2027 int misc_data_size = 0; 2028 if (data_len > 0) { 2029 if (m_func_desc_size + 2030 m_in_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN && 2031 m_out_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN) { 2032 // use misc/return data for copyin/copyout 2033 misc_data_offset = m_func_desc_size; 2034 misc_data_size = data_len; 2035 } 2036 else { 2037 OffloadTimer timer_buf(get_timer_data(), 2038 c_offload_host_alloc_data_buffer); 2039 2040 // send/receive data using buffer 2041 COIRESULT res = COI::BufferCreate(data_len, 2042 COI_BUFFER_NORMAL, 2043 0, 0, 2044 1, &m_device.get_process(), 2045 &m_inout_buf); 2046 if (res != COI_SUCCESS) { 2047 if (m_status != 0) { 2048 m_status->result = translate_coi_error(res); 2049 return false; 2050 } 2051 report_coi_error(c_buf_create, res); 2052 } 2053 2054 m_compute_buffers.push_back(m_inout_buf); 2055 m_destroy_buffers.push_back(m_inout_buf); 2056 } 2057 } 2058 2059 // initialize function descriptor 2060 m_func_desc = (FunctionDescriptor*) malloc(m_func_desc_size + 2061 misc_data_size); 2062 if (m_func_desc == NULL) 2063 LIBOFFLOAD_ERROR(c_malloc); 2064 m_func_desc->console_enabled = console_enabled; 2065 m_func_desc->timer_enabled = 2066 timer_enabled || (offload_report_level && offload_report_enabled); 2067 m_func_desc->offload_report_level = offload_report_level; 2068 m_func_desc->offload_number = GET_OFFLOAD_NUMBER(get_timer_data()); 2069 m_func_desc->in_datalen = m_in_datalen; 2070 m_func_desc->out_datalen = m_out_datalen; 2071 m_func_desc->vars_num = m_vars_total; 2072 m_func_desc->data_offset = misc_data_offset; 2073 2074 // append entry name 2075 strcpy(m_func_desc->data, name); 2076 } 2077 2078 return true; 2079} 2080 2081bool OffloadDescriptor::wait_dependencies( 2082 const void **waits, 2083 int num_waits 2084) 2085{ 2086 OffloadTimer timer(get_timer_data(), c_offload_host_wait_deps); 2087 bool ret = true; 2088 2089 for (int i = 0; i < num_waits; i++) { 2090 2091 OffloadDescriptor *task = m_device.find_signal(waits[i], true); 2092 if (task == 0) { 2093 LIBOFFLOAD_ERROR(c_offload1, m_device.get_logical_index(), 2094 waits[i]); 2095 LIBOFFLOAD_ABORT; 2096 } 2097 2098 if (!task->offload_finish()) { 2099 ret = false; 2100 } 2101 2102 task->cleanup(); 2103 delete task; 2104 } 2105 2106 return ret; 2107} 2108 2109bool OffloadDescriptor::offload( 2110 const char *name, 2111 bool is_empty, 2112 VarDesc *vars, 2113 VarDesc2 *vars2, 2114 int vars_total, 2115 const void **waits, 2116 int num_waits, 2117 const void **signal, 2118 int entry_id, 2119 const void *stack_addr 2120) 2121{ 2122 if (signal == 0) { 2123 OFFLOAD_DEBUG_TRACE_1(1, 2124 GET_OFFLOAD_NUMBER(get_timer_data()), 2125 c_offload_init_func, 2126 "Offload function %s, is_empty=%d, #varDescs=%d, " 2127 "#waits=%d, signal=none\n", 2128 name, is_empty, vars_total, num_waits); 2129 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()), 2130 c_offload_sent_pointer_data, 2131 "#Wait : %d \n", num_waits); 2132 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()), 2133 c_offload_signal, 2134 "none %d\n", 0); 2135 } 2136 else { 2137 OFFLOAD_DEBUG_TRACE_1(1, 2138 GET_OFFLOAD_NUMBER(get_timer_data()), 2139 c_offload_init_func, 2140 "Offload function %s, is_empty=%d, #varDescs=%d, " 2141 "#waits=%d, signal=%p\n", 2142 name, is_empty, vars_total, num_waits, 2143 *signal); 2144 2145 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()), 2146 c_offload_signal, 2147 "%d\n", signal); 2148 } 2149 OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()), 2150 c_offload_wait, 2151 "#Wait : %d %p\n", num_waits, waits); 2152 2153 if (m_status != 0) { 2154 m_status->result = OFFLOAD_SUCCESS; 2155 m_status->device_number = m_device.get_logical_index(); 2156 } 2157 2158 m_need_runfunction = !is_empty; 2159 2160 // wait for dependencies to finish 2161 if (!wait_dependencies(waits, num_waits)) { 2162 cleanup(); 2163 return false; 2164 } 2165 2166 // setup buffers 2167 if (!setup_descriptors(vars, vars2, vars_total, entry_id, stack_addr)) { 2168 cleanup(); 2169 return false; 2170 } 2171 2172 // initiate send for pointers. Want to do it as early as possible. 2173 if (!send_pointer_data(signal != 0)) { 2174 cleanup(); 2175 return false; 2176 } 2177 2178 // setup misc data for run function 2179 if (!setup_misc_data(name)) { 2180 cleanup(); 2181 return false; 2182 } 2183 2184 // gather copyin data into buffer 2185 if (!gather_copyin_data()) { 2186 cleanup(); 2187 return false; 2188 } 2189 2190 // Start the computation 2191 if (!compute()) { 2192 cleanup(); 2193 return false; 2194 } 2195 2196 // initiate receive for pointers 2197 if (!receive_pointer_data(signal != 0)) { 2198 cleanup(); 2199 return false; 2200 } 2201 2202 // if there is a signal save descriptor for the later use. 2203 if (signal != 0) { 2204 m_device.add_signal(*signal, this); 2205 return true; 2206 } 2207 2208 // wait for the offload to finish. 2209 if (!offload_finish()) { 2210 cleanup(); 2211 return false; 2212 } 2213 2214 cleanup(); 2215 return true; 2216} 2217 2218bool OffloadDescriptor::offload_finish() 2219{ 2220 COIRESULT res; 2221 2222 // wait for compute dependencies to become signaled 2223 if (m_in_deps_total > 0) { 2224 OffloadTimer timer(get_timer_data(), c_offload_host_wait_compute); 2225 2226 if (__offload_active_wait) { 2227 // keep CPU busy 2228 do { 2229 res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0); 2230 } 2231 while (res == COI_TIME_OUT_REACHED); 2232 } 2233 else { 2234 res = COI::EventWait(m_in_deps_total, m_in_deps, -1, 1, 0, 0); 2235 } 2236 2237 if (res != COI_SUCCESS) { 2238 if (m_status != 0) { 2239 m_status->result = translate_coi_error(res); 2240 return false; 2241 } 2242 report_coi_error(c_event_wait, res); 2243 } 2244 } 2245 2246 // scatter copyout data received from target 2247 if (!scatter_copyout_data()) { 2248 return false; 2249 } 2250 // wait for receive dependencies to become signaled 2251 if (m_out_deps_total > 0) { 2252 OffloadTimer timer(get_timer_data(), c_offload_host_wait_buffers_reads); 2253 2254 if (__offload_active_wait) { 2255 // keep CPU busy 2256 do { 2257 res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0); 2258 } 2259 while (res == COI_TIME_OUT_REACHED); 2260 } 2261 else { 2262 res = COI::EventWait(m_out_deps_total, m_out_deps, -1, 1, 0, 0); 2263 } 2264 2265 if (res != COI_SUCCESS) { 2266 if (m_status != 0) { 2267 m_status->result = translate_coi_error(res); 2268 return false; 2269 } 2270 report_coi_error(c_event_wait, res); 2271 } 2272 } 2273 2274 // destroy buffers 2275 { 2276 OffloadTimer timer(get_timer_data(), c_offload_host_destroy_buffers); 2277 2278 for (BufferList::const_iterator it = m_destroy_buffers.begin(); 2279 it != m_destroy_buffers.end(); it++) { 2280 res = COI::BufferDestroy(*it); 2281 if (res != COI_SUCCESS) { 2282 if (m_status != 0) { 2283 m_status->result = translate_coi_error(res); 2284 return false; 2285 } 2286 report_coi_error(c_buf_destroy, res); 2287 } 2288 } 2289 } 2290 2291 return true; 2292} 2293 2294void OffloadDescriptor::cleanup() 2295{ 2296 // release device in orsl 2297 ORSL::release(m_device.get_logical_index()); 2298 2299 OFFLOAD_TIMER_STOP(get_timer_data(), c_offload_host_total_offload); 2300 2301 // report stuff 2302 Offload_Report_Epilog(get_timer_data()); 2303} 2304 2305bool OffloadDescriptor::is_signaled() 2306{ 2307 bool signaled = true; 2308 COIRESULT res; 2309 2310 // check compute and receive dependencies 2311 if (m_in_deps_total > 0) { 2312 res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0); 2313 signaled = signaled && (res == COI_SUCCESS); 2314 } 2315 if (m_out_deps_total > 0) { 2316 res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0); 2317 signaled = signaled && (res == COI_SUCCESS); 2318 } 2319 2320 return signaled; 2321} 2322 2323// Send pointer data if source or destination or both of them are 2324// noncontiguous. There is guarantee that length of destination enough for 2325// transfered data. 2326bool OffloadDescriptor::send_noncontiguous_pointer_data( 2327 int i, 2328 PtrData* src_data, 2329 PtrData* dst_data, 2330 COIEVENT *event 2331 ) 2332{ 2333 int64_t offset_src, offset_dst; 2334 int64_t length_src, length_dst; 2335 int64_t length_src_cur, length_dst_cur; 2336 int64_t send_size, data_sent = 0; 2337 COIRESULT res; 2338 bool dst_is_empty = true; 2339 bool src_is_empty = true; 2340 2341 // Set length_src and length_dst 2342 length_src = (m_vars_extra[i].read_rng_src) ? 2343 m_vars_extra[i].read_rng_src->range_size : m_vars[i].size; 2344 length_dst = !m_vars[i].into ? length_src : 2345 (m_vars_extra[i].read_rng_dst) ? 2346 m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size; 2347 send_size = (length_src < length_dst) ? length_src : length_dst; 2348 2349 // consequently get contiguous ranges, 2350 // define corresponded destination offset and send data 2351 do { 2352 if (src_is_empty) { 2353 if (m_vars_extra[i].read_rng_src) { 2354 if (!get_next_range(m_vars_extra[i].read_rng_src, 2355 &offset_src)) { 2356 // source ranges are over - nothing to send 2357 break; 2358 } 2359 } 2360 else if (data_sent == 0) { 2361 offset_src = m_vars_extra[i].cpu_disp; 2362 } 2363 else { 2364 break; 2365 } 2366 length_src_cur = length_src; 2367 } 2368 else { 2369 // if source is contiguous or its contiguous range is greater 2370 // than destination one 2371 offset_src += send_size; 2372 } 2373 length_src_cur -= send_size; 2374 src_is_empty = length_src_cur == 0; 2375 2376 if (dst_is_empty) { 2377 if (m_vars[i].into) { 2378 if (m_vars_extra[i].read_rng_dst) { 2379 if (!get_next_range(m_vars_extra[i].read_rng_dst, 2380 &offset_dst)) { 2381 // destination ranges are over 2382 LIBOFFLOAD_ERROR(c_destination_is_over); 2383 return false; 2384 } 2385 } 2386 // into is contiguous. 2387 else { 2388 offset_dst = m_vars[i].disp; 2389 } 2390 length_dst_cur = length_dst; 2391 } 2392 // same as source 2393 else { 2394 offset_dst = offset_src; 2395 length_dst_cur = length_src; 2396 } 2397 } 2398 else { 2399 // if destination is contiguous or its contiguous range is greater 2400 // than source one 2401 offset_dst += send_size; 2402 } 2403 length_dst_cur -= send_size; 2404 dst_is_empty = length_dst_cur == 0; 2405 2406 if (src_data != 0 && src_data->cpu_buf != 0) { 2407 res = COI::BufferCopy( 2408 dst_data->mic_buf, 2409 src_data->cpu_buf, 2410 m_vars[i].mic_offset - dst_data->alloc_disp + 2411 m_vars[i].offset + offset_dst, 2412 m_vars_extra[i].cpu_offset + offset_src, 2413 send_size, 2414 COI_COPY_UNSPECIFIED, 2415 0, 0, 2416 event); 2417 if (res != COI_SUCCESS) { 2418 if (m_status != 0) { 2419 m_status->result = translate_coi_error(res); 2420 return false; 2421 } 2422 report_coi_error(c_buf_copy, res); 2423 } 2424 } 2425 else { 2426 char *base = offload_get_src_base(m_vars[i].ptr, 2427 m_vars[i].type.src); 2428 2429 res = COI::BufferWrite( 2430 dst_data->mic_buf, 2431 m_vars[i].mic_offset - dst_data->alloc_disp + 2432 m_vars[i].offset + offset_dst, 2433 base + offset_src, 2434 send_size, 2435 COI_COPY_UNSPECIFIED, 2436 0, 0, 2437 event); 2438 if (res != COI_SUCCESS) { 2439 if (m_status != 0) { 2440 m_status->result = translate_coi_error(res); 2441 return false; 2442 } 2443 report_coi_error(c_buf_write, res); 2444 } 2445 } 2446 data_sent += length_src; 2447 } 2448 while (true); 2449 return true; 2450} 2451 2452bool OffloadDescriptor::send_pointer_data(bool is_async) 2453{ 2454 OffloadTimer timer(get_timer_data(), c_offload_host_send_pointers); 2455 2456 uint64_t ptr_sent = 0; 2457 COIRESULT res; 2458 2459 // Initiate send for pointer data 2460 for (int i = 0; i < m_vars_total; i++) { 2461 switch (m_vars[i].type.dst) { 2462 case c_data_ptr_array: 2463 break; 2464 case c_data: 2465 case c_void_ptr: 2466 case c_cean_var: 2467 if (m_vars[i].direction.in && 2468 m_vars[i].flags.is_static_dstn) { 2469 COIEVENT *event = 2470 (is_async || 2471 m_vars[i].size >= __offload_use_async_buffer_write) ? 2472 &m_in_deps[m_in_deps_total++] : 0; 2473 PtrData* dst_data = m_vars[i].into ? 2474 m_vars_extra[i].dst_data : 2475 m_vars_extra[i].src_data; 2476 PtrData* src_data = 2477 VAR_TYPE_IS_PTR(m_vars[i].type.src) || 2478 VAR_TYPE_IS_SCALAR(m_vars[i].type.src) && 2479 m_vars[i].flags.is_static ? 2480 m_vars_extra[i].src_data : 0; 2481 2482 if (m_vars[i].flags.is_noncont_src || 2483 m_vars[i].flags.is_noncont_dst) { 2484 if (!send_noncontiguous_pointer_data( 2485 i, src_data, dst_data, event)) { 2486 return false; 2487 } 2488 } 2489 else if (src_data != 0 && src_data->cpu_buf != 0) { 2490 res = COI::BufferCopy( 2491 dst_data->mic_buf, 2492 src_data->cpu_buf, 2493 m_vars[i].mic_offset - dst_data->alloc_disp + 2494 m_vars[i].offset + m_vars[i].disp, 2495 m_vars_extra[i].cpu_offset + 2496 m_vars_extra[i].cpu_disp, 2497 m_vars[i].size, 2498 COI_COPY_UNSPECIFIED, 2499 0, 0, 2500 event); 2501 if (res != COI_SUCCESS) { 2502 if (m_status != 0) { 2503 m_status->result = translate_coi_error(res); 2504 return false; 2505 } 2506 report_coi_error(c_buf_copy, res); 2507 } 2508 } 2509 else { 2510 char *base = offload_get_src_base(m_vars[i].ptr, 2511 m_vars[i].type.src); 2512 res = COI::BufferWrite( 2513 dst_data->mic_buf, 2514 m_vars[i].mic_offset - dst_data->alloc_disp + 2515 m_vars[i].offset + m_vars[i].disp, 2516 base + m_vars_extra[i].cpu_disp, 2517 m_vars[i].size, 2518 COI_COPY_UNSPECIFIED, 2519 0, 0, 2520 event); 2521 if (res != COI_SUCCESS) { 2522 if (m_status != 0) { 2523 m_status->result = translate_coi_error(res); 2524 return false; 2525 } 2526 report_coi_error(c_buf_write, res); 2527 } 2528 } 2529 ptr_sent += m_vars[i].size; 2530 } 2531 break; 2532 2533 case c_string_ptr: 2534 case c_data_ptr: 2535 case c_cean_var_ptr: 2536 case c_dv_ptr: 2537 if (m_vars[i].direction.in && m_vars[i].size > 0) { 2538 COIEVENT *event = 2539 (is_async || 2540 m_vars[i].size >= __offload_use_async_buffer_write) ? 2541 &m_in_deps[m_in_deps_total++] : 0; 2542 PtrData* dst_data = m_vars[i].into ? 2543 m_vars_extra[i].dst_data : 2544 m_vars_extra[i].src_data; 2545 PtrData* src_data = 2546 VAR_TYPE_IS_PTR(m_vars[i].type.src) || 2547 VAR_TYPE_IS_SCALAR(m_vars[i].type.src) && 2548 m_vars[i].flags.is_static ? 2549 m_vars_extra[i].src_data : 0; 2550 2551 if (m_vars[i].flags.is_noncont_src || 2552 m_vars[i].flags.is_noncont_dst) { 2553 send_noncontiguous_pointer_data( 2554 i, src_data, dst_data, event); 2555 } 2556 else if (src_data != 0 && src_data->cpu_buf != 0) { 2557 res = COI::BufferCopy( 2558 dst_data->mic_buf, 2559 src_data->cpu_buf, 2560 m_vars[i].mic_offset - dst_data->alloc_disp + 2561 m_vars[i].offset + m_vars[i].disp, 2562 m_vars_extra[i].cpu_offset + 2563 m_vars_extra[i].cpu_disp, 2564 m_vars[i].size, 2565 COI_COPY_UNSPECIFIED, 2566 0, 0, 2567 event); 2568 if (res != COI_SUCCESS) { 2569 if (m_status != 0) { 2570 m_status->result = translate_coi_error(res); 2571 return false; 2572 } 2573 report_coi_error(c_buf_copy, res); 2574 } 2575 } 2576 else { 2577 char *base = offload_get_src_base(m_vars[i].ptr, 2578 m_vars[i].type.src); 2579 res = COI::BufferWrite( 2580 dst_data->mic_buf, 2581 m_vars[i].mic_offset - dst_data->alloc_disp + 2582 m_vars[i].offset + m_vars[i].disp, 2583 base + m_vars_extra[i].cpu_disp, 2584 m_vars[i].size, 2585 COI_COPY_UNSPECIFIED, 2586 0, 0, 2587 event); 2588 if (res != COI_SUCCESS) { 2589 if (m_status != 0) { 2590 m_status->result = translate_coi_error(res); 2591 return false; 2592 } 2593 report_coi_error(c_buf_write, res); 2594 } 2595 } 2596 2597 ptr_sent += m_vars[i].size; 2598 } 2599 break; 2600 2601 case c_dv_data: 2602 case c_dv_ptr_data: 2603 if (m_vars[i].direction.in && 2604 m_vars[i].size > 0) { 2605 PtrData *ptr_data = m_vars[i].into ? 2606 m_vars_extra[i].dst_data : 2607 m_vars_extra[i].src_data; 2608 PtrData* src_data = m_vars_extra[i].src_data; 2609 2610 COIEVENT *event = 2611 (is_async || 2612 m_vars[i].size >= __offload_use_async_buffer_write) ? 2613 &m_in_deps[m_in_deps_total++] : 0; 2614 2615 if (m_vars[i].flags.is_noncont_src || 2616 m_vars[i].flags.is_noncont_dst) { 2617 send_noncontiguous_pointer_data( 2618 i, src_data, ptr_data, event); 2619 } 2620 else if (src_data && src_data->cpu_buf != 0) { 2621 res = COI::BufferCopy( 2622 ptr_data->mic_buf, 2623 src_data->cpu_buf, 2624 m_vars[i].offset + ptr_data->mic_offset - 2625 ptr_data->alloc_disp + 2626 m_vars[i].disp, 2627 m_vars_extra[i].cpu_offset + 2628 m_vars_extra[i].cpu_disp, 2629 m_vars[i].size, 2630 COI_COPY_UNSPECIFIED, 2631 0, 0, 2632 event); 2633 if (res != COI_SUCCESS) { 2634 if (m_status != 0) { 2635 m_status->result = translate_coi_error(res); 2636 return false; 2637 } 2638 report_coi_error(c_buf_copy, res); 2639 } 2640 } 2641 else { 2642 char *base = offload_get_src_base(m_vars[i].ptr, 2643 m_vars[i].type.src); 2644 res = COI::BufferWrite( 2645 ptr_data->mic_buf, 2646 ptr_data->mic_offset - ptr_data->alloc_disp + 2647 m_vars[i].offset + m_vars[i].disp, 2648 base + m_vars_extra[i].cpu_disp, 2649 m_vars[i].size, 2650 COI_COPY_UNSPECIFIED, 2651 0, 0, 2652 event); 2653 if (res != COI_SUCCESS) { 2654 if (m_status != 0) { 2655 m_status->result = translate_coi_error(res); 2656 return false; 2657 } 2658 report_coi_error(c_buf_write, res); 2659 } 2660 } 2661 ptr_sent += m_vars[i].size; 2662 } 2663 break; 2664 2665 case c_dv_data_slice: 2666 case c_dv_ptr_data_slice: 2667 if (m_vars[i].direction.in && 2668 m_vars[i].size > 0) { 2669 PtrData *dst_data = m_vars[i].into ? 2670 m_vars_extra[i].dst_data : 2671 m_vars_extra[i].src_data; 2672 PtrData* src_data = 2673 (VAR_TYPE_IS_PTR(m_vars[i].type.src) || 2674 VAR_TYPE_IS_DV_DATA(m_vars[i].type.src) || 2675 VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) || 2676 VAR_TYPE_IS_SCALAR(m_vars[i].type.src) && 2677 m_vars[i].flags.is_static) ? 2678 m_vars_extra[i].src_data : 0; 2679 COIEVENT *event = 2680 (is_async || 2681 m_vars[i].size >= __offload_use_async_buffer_write) ? 2682 &m_in_deps[m_in_deps_total++] : 0; 2683 if (m_vars[i].flags.is_noncont_src || 2684 m_vars[i].flags.is_noncont_dst) { 2685 send_noncontiguous_pointer_data( 2686 i, src_data, dst_data, event); 2687 } 2688 else if (src_data && src_data->cpu_buf != 0) { 2689 res = COI::BufferCopy( 2690 dst_data->mic_buf, 2691 src_data->cpu_buf, 2692 m_vars[i].offset - dst_data->alloc_disp + 2693 dst_data->mic_offset + 2694 m_vars[i].disp, 2695 m_vars_extra[i].cpu_offset + 2696 m_vars_extra[i].cpu_disp, 2697 m_vars[i].size, 2698 COI_COPY_UNSPECIFIED, 2699 0, 0, 2700 event); 2701 if (res != COI_SUCCESS) { 2702 if (m_status != 0) { 2703 m_status->result = translate_coi_error(res); 2704 return false; 2705 } 2706 report_coi_error(c_buf_copy, res); 2707 } 2708 } 2709 else { 2710 char *base = offload_get_src_base(m_vars[i].ptr, 2711 m_vars[i].type.src); 2712 res = COI::BufferWrite( 2713 dst_data->mic_buf, 2714 dst_data->mic_offset - dst_data->alloc_disp + 2715 m_vars[i].offset + m_vars[i].disp, 2716 base + m_vars_extra[i].cpu_disp, 2717 m_vars[i].size, 2718 COI_COPY_UNSPECIFIED, 2719 0, 0, 2720 event); 2721 if (res != COI_SUCCESS) { 2722 if (m_status != 0) { 2723 m_status->result = translate_coi_error(res); 2724 return false; 2725 } 2726 report_coi_error(c_buf_write, res); 2727 } 2728 } 2729 2730 ptr_sent += m_vars[i].size; 2731 } 2732 break; 2733 2734 default: 2735 break; 2736 } 2737 2738 // alloc field isn't used at target. 2739 // We can reuse it for offset of array pointers. 2740 if (m_vars_extra[i].is_arr_ptr_el) { 2741 m_vars[i].ptr_arr_offset = m_vars_extra[i].ptr_arr_offset; 2742 } 2743 } 2744 2745 if (m_status) { 2746 m_status->data_sent += ptr_sent; 2747 } 2748 2749 OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), ptr_sent); 2750 OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()), 2751 c_offload_sent_pointer_data, 2752 "Total pointer data sent to target: [%lld] bytes\n", 2753 ptr_sent); 2754 2755 return true; 2756} 2757 2758bool OffloadDescriptor::gather_copyin_data() 2759{ 2760 OffloadTimer timer(get_timer_data(), c_offload_host_gather_inputs); 2761 2762 if (m_need_runfunction && m_in_datalen > 0) { 2763 COIMAPINSTANCE map_inst; 2764 char *data; 2765 2766 // init marshaller 2767 if (m_inout_buf != 0) { 2768 OffloadTimer timer_map(get_timer_data(), 2769 c_offload_host_map_in_data_buffer); 2770 2771 COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_in_datalen, 2772 COI_MAP_WRITE_ENTIRE_BUFFER, 2773 0, 0, 0, &map_inst, 2774 reinterpret_cast<void**>(&data)); 2775 if (res != COI_SUCCESS) { 2776 if (m_status != 0) { 2777 m_status->result = translate_coi_error(res); 2778 return false; 2779 } 2780 report_coi_error(c_buf_map, res); 2781 } 2782 } 2783 else { 2784 data = (char*) m_func_desc + m_func_desc->data_offset; 2785 } 2786 2787 // send variable descriptors 2788 memcpy(data, m_vars, m_vars_total * sizeof(VarDesc)); 2789 data += m_vars_total * sizeof(VarDesc); 2790 2791 // init marshaller 2792 m_in.init_buffer(data, m_in_datalen); 2793 2794 // Gather copy data into buffer 2795 for (int i = 0; i < m_vars_total; i++) { 2796 bool src_is_for_mic = (m_vars[i].direction.out || 2797 m_vars[i].into == NULL); 2798 PtrData* ptr_data = src_is_for_mic ? 2799 m_vars_extra[i].src_data : 2800 m_vars_extra[i].dst_data; 2801 if (m_vars[i].flags.alloc_disp) { 2802 m_in.send_data(&ptr_data->alloc_disp, 2803 sizeof(ptr_data->alloc_disp)); 2804 } 2805 2806 // send sink address to the target 2807 if (m_vars[i].flags.sink_addr) { 2808 m_in.send_data(&ptr_data->mic_addr, 2809 sizeof(ptr_data->mic_addr)); 2810 } 2811 2812 switch (m_vars[i].type.dst) { 2813 case c_data_ptr_array: 2814 break; 2815 case c_data: 2816 case c_void_ptr: 2817 case c_cean_var: 2818 if (m_vars[i].direction.in && 2819 !m_vars[i].flags.is_static_dstn) { 2820 2821 char *ptr = offload_get_src_base(m_vars[i].ptr, 2822 m_vars[i].type.src); 2823 if (m_vars[i].type.dst == c_cean_var) { 2824 // offset and length are derived from the array 2825 // descriptor 2826 int64_t size = m_vars[i].size; 2827 int64_t disp = m_vars[i].disp; 2828 m_in.send_data(reinterpret_cast<char*>(&size), 2829 sizeof(int64_t)); 2830 m_in.send_data(reinterpret_cast<char*>(&disp), 2831 sizeof(int64_t)); 2832 } 2833 2834 m_in.send_data(ptr + m_vars_extra[i].cpu_disp, 2835 m_vars[i].size); 2836 } 2837 break; 2838 2839 case c_dv: 2840 if (m_vars[i].direction.bits || 2841 m_vars[i].alloc_if || 2842 m_vars[i].free_if) { 2843 // send dope vector excluding base 2844 char *ptr = static_cast<char*>(m_vars[i].ptr); 2845 m_in.send_data(ptr + sizeof(uint64_t), 2846 m_vars[i].size - sizeof(uint64_t)); 2847 } 2848 break; 2849 2850 case c_data_ptr: 2851 // send to target addresses of obsolete 2852 // stacks to be released 2853 if (m_vars[i].flags.is_stack_buf && 2854 !m_vars[i].direction.bits && 2855 m_vars[i].alloc_if && 2856 m_vars[i].size != 0) { 2857 for (PtrDataList::iterator it = 2858 m_destroy_stack.begin(); 2859 it != m_destroy_stack.end(); it++) { 2860 PtrData * ptr_data = *it; 2861 m_in.send_data(&(ptr_data->mic_addr), 2862 sizeof(ptr_data->mic_addr)); 2863 } 2864 } 2865 break; 2866 case c_func_ptr: 2867 if (m_vars[i].direction.in) { 2868 m_in.send_func_ptr(*((const void**) m_vars[i].ptr)); 2869 } 2870 break; 2871 2872 default: 2873 break; 2874 } 2875 } 2876 2877 if (m_status) { 2878 m_status->data_sent += m_in.get_tfr_size(); 2879 } 2880 2881 if (m_func_desc->data_offset == 0) { 2882 OffloadTimer timer_unmap(get_timer_data(), 2883 c_offload_host_unmap_in_data_buffer); 2884 COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0); 2885 if (res != COI_SUCCESS) { 2886 if (m_status != 0) { 2887 m_status->result = translate_coi_error(res); 2888 return false; 2889 } 2890 report_coi_error(c_buf_unmap, res); 2891 } 2892 } 2893 } 2894 2895 OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), m_in.get_tfr_size()); 2896 OFFLOAD_DEBUG_TRACE_1(1, 2897 GET_OFFLOAD_NUMBER(get_timer_data()), c_offload_copyin_data, 2898 "Total copyin data sent to target: [%lld] bytes\n", 2899 m_in.get_tfr_size()); 2900 2901 return true; 2902} 2903 2904bool OffloadDescriptor::compute() 2905{ 2906 OffloadTimer timer(get_timer_data(), c_offload_host_start_compute); 2907 2908 if (m_need_runfunction) { 2909 OFFLOAD_DEBUG_TRACE_1(2, GET_OFFLOAD_NUMBER(get_timer_data()), 2910 c_offload_compute, "Compute task on MIC\n"); 2911 2912 void* misc = m_func_desc; 2913 int misc_len = m_func_desc_size; 2914 void* ret = 0; 2915 int ret_len = 0; 2916 2917 if (m_func_desc->data_offset != 0) { 2918 misc_len += m_in_datalen; 2919 2920 if (m_out_datalen > 0) { 2921 ret = (char*) m_func_desc + m_func_desc->data_offset; 2922 ret_len = m_out_datalen; 2923 } 2924 } 2925 2926 // dispatch task 2927 COIRESULT res; 2928 COIEVENT event; 2929 res = m_device.compute(m_compute_buffers, 2930 misc, misc_len, 2931 ret, ret_len, 2932 m_in_deps_total, 2933 m_in_deps_total > 0 ? m_in_deps : 0, 2934 &event); 2935 if (res != COI_SUCCESS) { 2936 if (m_status != 0) { 2937 m_status->result = translate_coi_error(res); 2938 return false; 2939 } 2940 report_coi_error(c_pipeline_run_func, res); 2941 } 2942 2943 m_in_deps_total = 1; 2944 m_in_deps[0] = event; 2945 } 2946 2947 return true; 2948} 2949 2950// recieve pointer data if source or destination or both of them are 2951// noncontiguous. There is guarantee that length of destination enough for 2952// transfered data. 2953bool OffloadDescriptor::recieve_noncontiguous_pointer_data( 2954 int i, 2955 char* base, 2956 COIBUFFER dst_buf, 2957 COIEVENT *event 2958) 2959{ 2960 int64_t offset_src, offset_dst; 2961 int64_t length_src, length_dst; 2962 int64_t length_src_cur, length_dst_cur; 2963 int64_t recieve_size, data_recieved = 0; 2964 COIRESULT res; 2965 bool dst_is_empty = true; 2966 bool src_is_empty = true; 2967 2968 // Set length_src and length_dst 2969 length_src = (m_vars_extra[i].read_rng_src) ? 2970 m_vars_extra[i].read_rng_src->range_size : m_vars[i].size; 2971 length_dst = !m_vars[i].into ? length_src : 2972 (m_vars_extra[i].read_rng_dst) ? 2973 m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size; 2974 recieve_size = (length_src < length_dst) ? length_src : length_dst; 2975 2976 // consequently get contiguous ranges, 2977 // define corresponded destination offset and recieve data 2978 do { 2979 // get sorce offset 2980 if (src_is_empty) { 2981 if (m_vars_extra[i].read_rng_src) { 2982 if (!get_next_range(m_vars_extra[i].read_rng_src, 2983 &offset_src)) { 2984 // source ranges are over - nothing to send 2985 break; 2986 } 2987 } 2988 else if (data_recieved == 0) { 2989 offset_src = 0; 2990 } 2991 else { 2992 break; 2993 } 2994 length_src_cur = length_src; 2995 } 2996 else { 2997 // if source is contiguous or its contiguous range is greater 2998 // than destination one 2999 offset_src += recieve_size; 3000 } 3001 length_src_cur -= recieve_size; 3002 src_is_empty = length_src_cur == 0; 3003 3004 // get destination offset 3005 if (dst_is_empty) { 3006 if (m_vars[i].into) { 3007 if (m_vars_extra[i].read_rng_dst) { 3008 if (!get_next_range(m_vars_extra[i].read_rng_dst, 3009 &offset_dst)) { 3010 // destination ranges are over 3011 LIBOFFLOAD_ERROR(c_destination_is_over); 3012 return false; 3013 } 3014 } 3015 // destination is contiguous. 3016 else { 3017 offset_dst = m_vars_extra[i].cpu_disp; 3018 } 3019 length_dst_cur = length_dst; 3020 } 3021 // same as source 3022 else { 3023 offset_dst = offset_src; 3024 length_dst_cur = length_src; 3025 } 3026 } 3027 else { 3028 // if destination is contiguous or its contiguous range is greater 3029 // than source one 3030 offset_dst += recieve_size; 3031 } 3032 length_dst_cur -= recieve_size; 3033 dst_is_empty = length_dst_cur == 0; 3034 3035 if (dst_buf != 0) { 3036 res = COI::BufferCopy( 3037 dst_buf, 3038 m_vars_extra[i].src_data->mic_buf, 3039 m_vars_extra[i].cpu_offset + offset_dst, 3040 m_vars[i].offset + offset_src + 3041 m_vars[i].mic_offset - 3042 m_vars_extra[i].src_data->alloc_disp, 3043 recieve_size, 3044 COI_COPY_UNSPECIFIED, 3045 m_in_deps_total, 3046 m_in_deps_total > 0 ? m_in_deps : 0, 3047 event); 3048 if (res != COI_SUCCESS) { 3049 if (m_status != 0) { 3050 m_status->result = translate_coi_error(res); 3051 return false; 3052 } 3053 report_coi_error(c_buf_copy, res); 3054 } 3055 } 3056 else { 3057 res = COI::BufferRead( 3058 m_vars_extra[i].src_data->mic_buf, 3059 m_vars[i].offset + offset_src + 3060 m_vars[i].mic_offset - 3061 m_vars_extra[i].src_data->alloc_disp, 3062 base + offset_dst, 3063 recieve_size, 3064 COI_COPY_UNSPECIFIED, 3065 m_in_deps_total, 3066 m_in_deps_total > 0 ? m_in_deps : 0, 3067 event); 3068 if (res != COI_SUCCESS) { 3069 if (m_status != 0) { 3070 m_status->result = translate_coi_error(res); 3071 return false; 3072 } 3073 report_coi_error(c_buf_read, res); 3074 } 3075 } 3076 data_recieved += recieve_size; 3077 } 3078 while (true); 3079 return true; 3080} 3081 3082bool OffloadDescriptor::receive_pointer_data(bool is_async) 3083{ 3084 OffloadTimer timer(get_timer_data(), c_offload_host_start_buffers_reads); 3085 3086 uint64_t ptr_received = 0; 3087 COIRESULT res; 3088 3089 for (int i = 0; i < m_vars_total; i++) { 3090 switch (m_vars[i].type.src) { 3091 case c_data_ptr_array: 3092 break; 3093 case c_data: 3094 case c_void_ptr: 3095 case c_cean_var: 3096 if (m_vars[i].direction.out && 3097 m_vars[i].flags.is_static) { 3098 COIEVENT *event = 3099 (is_async || 3100 m_in_deps_total > 0 || 3101 m_vars[i].size >= __offload_use_async_buffer_read) ? 3102 &m_out_deps[m_out_deps_total++] : 0; 3103 PtrData *ptr_data = NULL; 3104 COIBUFFER dst_buf = NULL; // buffer at host 3105 char *base; 3106 3107 if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) { 3108 ptr_data = m_vars[i].into ? 3109 m_vars_extra[i].dst_data : 3110 m_vars_extra[i].src_data; 3111 } 3112 else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) { 3113 if (m_vars[i].flags.is_static_dstn) { 3114 ptr_data = m_vars[i].into ? 3115 m_vars_extra[i].dst_data : 3116 m_vars_extra[i].src_data; 3117 } 3118 } 3119 dst_buf = ptr_data ? ptr_data->cpu_buf : NULL; 3120 if (dst_buf == NULL) { 3121 base = offload_get_src_base( 3122 m_vars[i].into ? 3123 static_cast<char*>(m_vars[i].into) : 3124 static_cast<char*>(m_vars[i].ptr), 3125 m_vars[i].type.dst); 3126 } 3127 3128 if (m_vars[i].flags.is_noncont_src || 3129 m_vars[i].flags.is_noncont_dst) { 3130 recieve_noncontiguous_pointer_data( 3131 i, base, dst_buf, event); 3132 } 3133 else if (dst_buf != 0) { 3134 res = COI::BufferCopy( 3135 dst_buf, 3136 m_vars_extra[i].src_data->mic_buf, 3137 m_vars_extra[i].cpu_offset + 3138 m_vars_extra[i].cpu_disp, 3139 m_vars[i].offset + m_vars[i].disp, 3140 m_vars[i].size, 3141 COI_COPY_UNSPECIFIED, 3142 m_in_deps_total, 3143 m_in_deps_total > 0 ? m_in_deps : 0, 3144 event); 3145 if (res != COI_SUCCESS) { 3146 if (m_status != 0) { 3147 m_status->result = translate_coi_error(res); 3148 return false; 3149 } 3150 report_coi_error(c_buf_copy, res); 3151 } 3152 } 3153 else { 3154 res = COI::BufferRead( 3155 m_vars_extra[i].src_data->mic_buf, 3156 m_vars[i].offset + m_vars[i].disp, 3157 base + m_vars_extra[i].cpu_offset + 3158 m_vars_extra[i].cpu_disp, 3159 m_vars[i].size, 3160 COI_COPY_UNSPECIFIED, 3161 m_in_deps_total, 3162 m_in_deps_total > 0 ? m_in_deps : 0, 3163 event); 3164 if (res != COI_SUCCESS) { 3165 if (m_status != 0) { 3166 m_status->result = translate_coi_error(res); 3167 return false; 3168 } 3169 report_coi_error(c_buf_read, res); 3170 } 3171 } 3172 ptr_received += m_vars[i].size; 3173 } 3174 break; 3175 3176 case c_string_ptr: 3177 case c_data_ptr: 3178 case c_cean_var_ptr: 3179 case c_dv_data: 3180 case c_dv_ptr_data: 3181 case c_dv_data_slice: 3182 case c_dv_ptr_data_slice: 3183 case c_dv_ptr: { 3184 COIBUFFER dst_buf = NULL; // buffer on host 3185 if (m_vars[i].direction.out && m_vars[i].size > 0) { 3186 COIEVENT *event = 3187 (is_async || 3188 m_in_deps_total > 0 || 3189 m_vars[i].size >= __offload_use_async_buffer_read) ? 3190 &m_out_deps[m_out_deps_total++] : 0; 3191 3192 uint64_t dst_offset = 0; 3193 char *base = static_cast<char*>(m_vars[i].ptr); 3194 3195 if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) { 3196 PtrData *ptr_data = m_vars[i].into ? 3197 m_vars_extra[i].dst_data : 3198 m_vars_extra[i].src_data; 3199 dst_buf = ptr_data ? ptr_data->cpu_buf : NULL; 3200 if (dst_buf == NULL) { 3201 base = m_vars[i].into ? 3202 *static_cast<char**>(m_vars[i].into) : 3203 *static_cast<char**>(m_vars[i].ptr); 3204 } 3205 dst_offset = m_vars_extra[i].cpu_offset + 3206 m_vars_extra[i].cpu_disp; 3207 } 3208 else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) { 3209 if (m_vars[i].flags.is_static_dstn) { 3210 dst_buf = m_vars[i].into ? 3211 m_vars_extra[i].dst_data->cpu_buf : 3212 m_vars_extra[i].src_data->cpu_buf; 3213 } 3214 if (dst_buf == NULL) { 3215 base = offload_get_src_base( 3216 m_vars[i].into ? 3217 static_cast<char*>(m_vars[i].into) : 3218 static_cast<char*>(m_vars[i].ptr), 3219 m_vars[i].type.dst); 3220 } 3221 dst_offset = m_vars_extra[i].cpu_offset + 3222 m_vars_extra[i].cpu_disp; 3223 } 3224 else if (VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst) || 3225 VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) { 3226 PtrData *ptr_data = m_vars[i].into != 0 ? 3227 m_vars_extra[i].dst_data : 3228 m_vars_extra[i].src_data; 3229 dst_buf = ptr_data != 0 ? ptr_data->cpu_buf : 0; 3230 if (dst_buf == NULL) { 3231 base = offload_get_src_base( 3232 m_vars[i].into ? 3233 static_cast<char*>(m_vars[i].into) : 3234 static_cast<char*>(m_vars[i].ptr), 3235 m_vars[i].type.dst); 3236 3237 } 3238 dst_offset = m_vars_extra[i].cpu_offset + 3239 m_vars_extra[i].cpu_disp; 3240 } 3241 3242 if (m_vars[i].flags.is_noncont_src || 3243 m_vars[i].flags.is_noncont_dst) { 3244 recieve_noncontiguous_pointer_data( 3245 i, base, dst_buf, event); 3246 } 3247 else if (dst_buf != 0) { 3248 res = COI::BufferCopy( 3249 dst_buf, 3250 m_vars_extra[i].src_data->mic_buf, 3251 dst_offset, 3252 m_vars[i].offset + m_vars[i].disp + 3253 m_vars[i].mic_offset - 3254 m_vars_extra[i].src_data->alloc_disp, 3255 m_vars[i].size, 3256 COI_COPY_UNSPECIFIED, 3257 m_in_deps_total, 3258 m_in_deps_total > 0 ? m_in_deps : 0, 3259 event); 3260 if (res != COI_SUCCESS) { 3261 if (m_status != 0) { 3262 m_status->result = translate_coi_error(res); 3263 return false; 3264 } 3265 report_coi_error(c_buf_copy, res); 3266 } 3267 } 3268 else { 3269 res = COI::BufferRead( 3270 m_vars_extra[i].src_data->mic_buf, 3271 m_vars[i].offset + m_vars[i].disp + 3272 m_vars[i].mic_offset - 3273 m_vars_extra[i].src_data->alloc_disp, 3274 base + dst_offset, 3275 m_vars[i].size, 3276 COI_COPY_UNSPECIFIED, 3277 m_in_deps_total, 3278 m_in_deps_total > 0 ? m_in_deps : 0, 3279 event); 3280 if (res != COI_SUCCESS) { 3281 if (m_status != 0) { 3282 m_status->result = translate_coi_error(res); 3283 return false; 3284 } 3285 report_coi_error(c_buf_read, res); 3286 } 3287 } 3288 ptr_received += m_vars[i].size; 3289 } 3290 break; 3291 } 3292 3293 default: 3294 break; 3295 } 3296 3297 // destroy buffers for obsolete stacks 3298 if (m_destroy_stack.size() != 0) { 3299 for (PtrDataList::iterator it = m_destroy_stack.begin(); 3300 it != m_destroy_stack.end(); it++) { 3301 PtrData *ptr_data = *it; 3302 m_destroy_buffers.push_back(ptr_data->mic_buf); 3303 OFFLOAD_TRACE(3, "Removing stack buffer with addr %p\n", 3304 ptr_data->mic_addr); 3305 } 3306 m_destroy_stack.clear(); 3307 } 3308 if (m_vars[i].free_if) { 3309 // remove association for automatic variables 3310 if (m_is_openmp && !m_vars[i].flags.is_static && 3311 (m_vars[i].type.src == c_data || 3312 m_vars[i].type.src == c_void_ptr || 3313 m_vars[i].type.src == c_cean_var)) { 3314 AutoData *auto_data = m_vars_extra[i].auto_data; 3315 if (auto_data != 0 && auto_data->remove_reference() == 0) { 3316 m_device.remove_auto_data(auto_data->cpu_addr.start()); 3317 } 3318 } 3319 3320 // destroy buffers 3321 if (m_vars[i].direction.out || m_vars[i].into == NULL) { 3322 if (!VAR_TYPE_IS_PTR(m_vars[i].type.src) && 3323 !VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) && 3324 !VAR_TYPE_IS_DV_DATA(m_vars[i].type.src)) { 3325 continue; 3326 } 3327 3328 PtrData *ptr_data = m_vars_extra[i].src_data; 3329 if (ptr_data->remove_reference() == 0) { 3330 // destroy buffers 3331 if (ptr_data->cpu_buf != 0) { 3332 m_destroy_buffers.push_back(ptr_data->cpu_buf); 3333 } 3334 if (ptr_data->mic_buf != 0) { 3335 m_destroy_buffers.push_back(ptr_data->mic_buf); 3336 } 3337 OFFLOAD_TRACE(3, "Removing association for addr %p\n", 3338 ptr_data->cpu_addr.start()); 3339 3340 // remove association from map 3341 m_device.remove_ptr_data(ptr_data->cpu_addr.start()); 3342 } 3343 } 3344 else if (VAR_TYPE_IS_PTR(m_vars[i].type.dst) || 3345 VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst) || 3346 VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst)) { 3347 PtrData *ptr_data = m_vars_extra[i].dst_data; 3348 if (ptr_data->remove_reference() == 0) { 3349 // destroy buffers 3350 if (ptr_data->cpu_buf != 0) { 3351 m_destroy_buffers.push_back(ptr_data->cpu_buf); 3352 } 3353 if (ptr_data->mic_buf != 0) { 3354 m_destroy_buffers.push_back(ptr_data->mic_buf); 3355 } 3356 OFFLOAD_TRACE(3, "Removing association for addr %p\n", 3357 ptr_data->cpu_addr.start()); 3358 3359 // remove association from map 3360 m_device.remove_ptr_data(ptr_data->cpu_addr.start()); 3361 } 3362 } 3363 } 3364 } 3365 3366 if (m_status) { 3367 m_status->data_received += ptr_received; 3368 } 3369 3370 OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), ptr_received); 3371 OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()), 3372 c_offload_received_pointer_data, 3373 "Total pointer data received from target: [%lld] bytes\n", 3374 ptr_received); 3375 3376 return true; 3377} 3378 3379bool OffloadDescriptor::scatter_copyout_data() 3380{ 3381 OffloadTimer timer(get_timer_data(), c_offload_host_scatter_outputs); 3382 3383 if (m_need_runfunction && m_out_datalen > 0) { 3384 3385 // total size that need to be transferred from target to host 3386 COIMAPINSTANCE map_inst; 3387 COIRESULT res; 3388 char *data; 3389 3390 // output data buffer 3391 if (m_func_desc->data_offset == 0) { 3392 OffloadTimer timer_map(get_timer_data(), 3393 c_offload_host_map_out_data_buffer); 3394 3395 COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_out_datalen, 3396 COI_MAP_READ_ONLY, 0, 0, 0, 3397 &map_inst, 3398 reinterpret_cast<void**>(&data)); 3399 if (res != COI_SUCCESS) { 3400 if (m_status != 0) { 3401 m_status->result = translate_coi_error(res); 3402 return false; 3403 } 3404 report_coi_error(c_buf_map, res); 3405 } 3406 } 3407 else { 3408 data = (char*) m_func_desc + m_func_desc->data_offset; 3409 } 3410 3411 // get timing data 3412 OFFLOAD_TIMER_TARGET_DATA(get_timer_data(), data); 3413 data += OFFLOAD_TIMER_DATALEN(); 3414 3415 // initialize output marshaller 3416 m_out.init_buffer(data, m_out_datalen); 3417 3418 for (int i = 0; i < m_vars_total; i++) { 3419 switch (m_vars[i].type.src) { 3420 case c_data_ptr_array: 3421 break; 3422 case c_data: 3423 case c_void_ptr: 3424 case c_cean_var: 3425 if (m_vars[i].direction.out && 3426 !m_vars[i].flags.is_static) { 3427 3428 if (m_vars[i].into) { 3429 char *ptr = offload_get_src_base( 3430 static_cast<char*>(m_vars[i].into), 3431 m_vars[i].type.dst); 3432 m_out.receive_data(ptr + m_vars_extra[i].cpu_disp, 3433 m_vars[i].size); 3434 } 3435 else { 3436 m_out.receive_data( 3437 static_cast<char*>(m_vars[i].ptr) + 3438 m_vars_extra[i].cpu_disp, 3439 m_vars[i].size); 3440 } 3441 } 3442 break; 3443 3444 case c_func_ptr: 3445 if (m_vars[i].direction.out) { 3446 m_out.receive_func_ptr((const void**) m_vars[i].ptr); 3447 } 3448 break; 3449 3450 default: 3451 break; 3452 } 3453 } 3454 3455 if (m_status) { 3456 m_status->data_received += m_out.get_tfr_size(); 3457 } 3458 3459 if (m_func_desc->data_offset == 0) { 3460 OffloadTimer timer_unmap(get_timer_data(), 3461 c_offload_host_unmap_out_data_buffer); 3462 3463 COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0); 3464 if (res != COI_SUCCESS) { 3465 if (m_status != 0) { 3466 m_status->result = translate_coi_error(res); 3467 return false; 3468 } 3469 report_coi_error(c_buf_unmap, res); 3470 } 3471 } 3472 } 3473 3474 OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), m_out.get_tfr_size()); 3475 OFFLOAD_TRACE(1, "Total copyout data received from target: [%lld] bytes\n", 3476 m_out.get_tfr_size()); 3477 3478 return true; 3479} 3480 3481void get_arr_desc_numbers( 3482 const arr_desc *ap, 3483 int64_t el_size, 3484 int64_t &offset, 3485 int64_t &size, 3486 int &el_number, 3487 CeanReadRanges* &ptr_ranges 3488) 3489{ 3490 if (is_arr_desc_contiguous(ap)) { 3491 ptr_ranges = NULL; 3492 __arr_data_offset_and_length(ap, offset, size); 3493 el_number = size / el_size; 3494 } 3495 else { 3496 ptr_ranges = init_read_ranges_arr_desc(ap); 3497 el_number = (ptr_ranges->range_size / el_size) * 3498 ptr_ranges->range_max_number; 3499 size = ptr_ranges->range_size; 3500 } 3501} 3502 3503arr_desc * make_arr_desc( 3504 void* ptr_val, 3505 int64_t extent_start_val, 3506 int64_t extent_elements_val, 3507 int64_t size 3508) 3509{ 3510 arr_desc *res; 3511 res = (arr_desc *)malloc(sizeof(arr_desc)); 3512 if (res == NULL) 3513 LIBOFFLOAD_ERROR(c_malloc); 3514 res->base = reinterpret_cast<int64_t>(ptr_val); 3515 res->rank = 1; 3516 res->dim[0].size = size; 3517 res->dim[0].lindex = 0; 3518 res->dim[0].lower = extent_start_val; 3519 res->dim[0].upper = extent_elements_val + extent_start_val - 1; 3520 res->dim[0].stride = 1; 3521 return res; 3522} 3523 3524bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i) 3525{ 3526 int pointers_number; 3527 int tmp_val; 3528 int new_index = m_vars_total; 3529 const arr_desc *ap; 3530 const VarDesc3 *vd3 = static_cast<const VarDesc3*>(m_vars[i].ptr); 3531 int flags = vd3->array_fields; 3532 bool src_is_for_mic = (m_vars[i].direction.out || 3533 m_vars[i].into == NULL); 3534 3535 ReadArrElements<void *> ptr; 3536 ReadArrElements<void *> into; 3537 ReadArrElements<int64_t> ext_start; 3538 ReadArrElements<int64_t> ext_elements; 3539 ReadArrElements<int64_t> align; 3540 ReadArrElements<int64_t> alloc_if; 3541 ReadArrElements<int64_t> free_if; 3542 ReadArrElements<int64_t> into_start; 3543 ReadArrElements<int64_t> into_elem; 3544 ReadArrElements<int64_t> alloc_start; 3545 ReadArrElements<int64_t> alloc_elem; 3546 3547 3548 ap = static_cast<const arr_desc*>(vd3->ptr_array); 3549 3550 // "pointers_number" for total number of transfered pointers. 3551 // For each of them we create new var_desc and put it at the bottom 3552 // of the var_desc's array 3553 get_arr_desc_numbers(ap, sizeof(void *), ptr.offset, ptr.size, 3554 pointers_number, ptr.ranges); 3555 ptr.base = reinterpret_cast<char*>(ap->base); 3556 3557 // 2. prepare memory for new var_descs 3558 m_vars_total += pointers_number; 3559 m_vars = (VarDesc*)realloc(m_vars, m_vars_total * sizeof(VarDesc)); 3560 if (m_vars == NULL) 3561 LIBOFFLOAD_ERROR(c_malloc); 3562 m_vars_extra = 3563 (VarExtra*)realloc(m_vars_extra, m_vars_total * sizeof(VarExtra)); 3564 if (m_vars_extra == NULL) 3565 LIBOFFLOAD_ERROR(c_malloc); 3566 m_in_deps = 3567 (COIEVENT*)realloc(m_in_deps, sizeof(COIEVENT) * (m_vars_total + 1)); 3568 if (m_in_deps == NULL) 3569 LIBOFFLOAD_ERROR(c_malloc); 3570 m_out_deps = 3571 (COIEVENT*)realloc(m_out_deps, sizeof(COIEVENT) * m_vars_total); 3572 if (m_out_deps == NULL) 3573 LIBOFFLOAD_ERROR(c_malloc); 3574 3575 // 3. Prepare for reading new var_desc's fields 3576 // EXTENT START 3577 if ((flags & (1<<flag_extent_start_is_array)) != 0) { 3578 ap = static_cast<const arr_desc*>(vd3->extent_start); 3579 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, ext_start.offset, 3580 ext_start.size, tmp_val, ext_start.ranges); 3581 ext_start.base = reinterpret_cast<char*>(ap->base); 3582 ext_start.el_size = ap->dim[ap->rank - 1].size; 3583 3584 if (tmp_val < pointers_number) { 3585 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start"); 3586 return false; 3587 } 3588 } 3589 else if ((flags & (1<<flag_extent_start_is_scalar)) != 0) { 3590 ext_start.val = (int64_t)vd3->extent_start; 3591 } 3592 else { 3593 ext_start.val = 0; 3594 } 3595 3596 // EXTENT ELEMENTS NUMBER 3597 if ((flags & (1<<flag_extent_elements_is_array)) != 0) { 3598 ap = static_cast<const arr_desc*>(vd3->extent_elements); 3599 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, 3600 ext_elements.offset, ext_elements.size, 3601 tmp_val, ext_elements.ranges); 3602 ext_elements.base = reinterpret_cast<char*>(ap->base); 3603 ext_elements.el_size = ap->dim[ap->rank - 1].size; 3604 3605 if (tmp_val < pointers_number) { 3606 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements"); 3607 return false; 3608 } 3609 } 3610 else if ((flags & (1<<flag_extent_elements_is_scalar)) != 0) { 3611 ext_elements.val = (int64_t)vd3->extent_elements; 3612 } 3613 else { 3614 ext_elements.val = m_vars[i].count; 3615 } 3616 3617 // ALLOC_IF 3618 if ((flags & (1<<flag_alloc_if_is_array)) != 0) { 3619 ap = static_cast<const arr_desc*>(vd3->alloc_if_array); 3620 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_if.offset, 3621 alloc_if.size, tmp_val, alloc_if.ranges); 3622 alloc_if.base = reinterpret_cast<char*>(ap->base); 3623 alloc_if.el_size = ap->dim[ap->rank - 1].size; 3624 3625 if (tmp_val < pointers_number) { 3626 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if"); 3627 return false; 3628 } 3629 } 3630 else { 3631 alloc_if.val = m_vars[i].count; 3632 } 3633 3634 // FREE_IF 3635 if ((flags & (1<<flag_free_if_is_array)) != 0) { 3636 ap = static_cast<const arr_desc*>(vd3->free_if_array); 3637 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, free_if.offset, 3638 free_if.size, tmp_val, free_if.ranges); 3639 free_if.base = reinterpret_cast<char*>(ap->base); 3640 free_if.el_size = ap->dim[ap->rank - 1].size; 3641 3642 if (tmp_val < pointers_number) { 3643 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if"); 3644 return false; 3645 } 3646 } 3647 else { 3648 free_if.val = m_vars[i].count; 3649 } 3650 3651 // ALIGN 3652 3653 if ((flags & (1<<flag_align_is_array)) != 0) { 3654 ap = static_cast<const arr_desc*>(vd3->align_array); 3655 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, align.offset, 3656 align.size, tmp_val, align.ranges); 3657 align.base = reinterpret_cast<char*>(ap->base); 3658 align.el_size = ap->dim[ap->rank - 1].size; 3659 3660 if (tmp_val < pointers_number) { 3661 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align"); 3662 return false; 3663 } 3664 } 3665 else { 3666 align.val = m_vars[i].align; 3667 } 3668 3669 // 3.1 INTO 3670 3671 if (m_vars[i].into) { 3672 ap = static_cast<const arr_desc*>(m_vars[i].into); 3673 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into.offset, 3674 into.size, tmp_val, into.ranges); 3675 into.base = reinterpret_cast<char*>(ap->base); 3676 3677 if (tmp_val < pointers_number) { 3678 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into"); 3679 return false; 3680 } 3681 } 3682 3683 // 3.2 INTO_START 3684 3685 if ((flags & (1<<flag_into_start_is_array)) != 0) { 3686 ap = static_cast<const arr_desc*>(vd3->into_start); 3687 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_start.offset, 3688 into_start.size, tmp_val, into_start.ranges); 3689 into_start.base = reinterpret_cast<char*>(ap->base); 3690 into_start.el_size = ap->dim[ap->rank - 1].size; 3691 3692 if (tmp_val < pointers_number) { 3693 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start"); 3694 return false; 3695 } 3696 } 3697 else if ((flags & (1<<flag_into_start_is_scalar)) != 0) { 3698 into_start.val = (int64_t)vd3->into_start; 3699 } 3700 else { 3701 into_start.val = 0; 3702 } 3703 3704 // 3.3 INTO_ELEMENTS 3705 3706 if ((flags & (1<<flag_into_elements_is_array)) != 0) { 3707 ap = static_cast<const arr_desc*>(vd3->into_elements); 3708 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_elem.offset, 3709 into_elem.size, tmp_val, into_elem.ranges); 3710 into_elem.base = reinterpret_cast<char*>(ap->base); 3711 into_elem.el_size = ap->dim[ap->rank - 1].size; 3712 3713 if (tmp_val < pointers_number) { 3714 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements"); 3715 return false; 3716 } 3717 } 3718 else if ((flags & (1<<flag_into_elements_is_scalar)) != 0) { 3719 into_elem.val = (int64_t)vd3->into_elements; 3720 } 3721 else { 3722 into_elem.val = m_vars[i].count; 3723 } 3724 3725 // alloc_start 3726 3727 if ((flags & (1<<flag_alloc_start_is_array)) != 0) { 3728 ap = static_cast<const arr_desc*>(vd3->alloc_start); 3729 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, 3730 alloc_start.offset, alloc_start.size, tmp_val, 3731 alloc_start.ranges); 3732 alloc_start.base = reinterpret_cast<char*>(ap->base); 3733 alloc_start.el_size = ap->dim[ap->rank - 1].size; 3734 3735 if (tmp_val < pointers_number) { 3736 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start"); 3737 return false; 3738 } 3739 } 3740 else if ((flags & (1<<flag_alloc_start_is_scalar)) != 0) { 3741 alloc_start.val = (int64_t)vd3->alloc_start; 3742 } 3743 else { 3744 alloc_start.val = 0; 3745 } 3746 3747 // alloc_elem 3748 3749 if ((flags & (1<<flag_alloc_elements_is_array)) != 0) { 3750 ap = static_cast<const arr_desc*>(vd3->alloc_elements); 3751 get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_elem.offset, 3752 alloc_elem.size, tmp_val, alloc_elem.ranges); 3753 alloc_elem.base = reinterpret_cast<char*>(ap->base); 3754 alloc_elem.el_size = ap->dim[ap->rank - 1].size; 3755 if (tmp_val < pointers_number) { 3756 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, 3757 "alloc_extent elements"); 3758 return false; 3759 } 3760 } 3761 else if ((flags & (1<<flag_alloc_elements_is_scalar)) != 0) { 3762 alloc_elem.val = (int64_t)vd3->alloc_elements; 3763 } 3764 else { 3765 alloc_elem.val = 0; 3766 } 3767 3768 for (int k = 0; k < pointers_number; k++) { 3769 int type = flags & 0x3f; 3770 int type_src, type_dst; 3771 // Get new values 3772 // type_src, type_dst 3773 type_src = type_dst = (type == c_data_ptr_array) ? 3774 c_data_ptr : (type == c_func_ptr_array) ? 3775 c_func_ptr : (type == c_void_ptr_array) ? 3776 c_void_ptr : (type == c_string_ptr_array) ? 3777 c_string_ptr : 0; 3778 3779 // Get ptr val 3780 if (!ptr.read_next(true)) { 3781 break; 3782 } 3783 else { 3784 ptr.val = (void*)(ptr.base + ptr.offset); 3785 } 3786 3787 // !!! If we got error at phase of reading - it's an internal 3788 // !!! error, as we must detect mismatch before 3789 3790 // Get into val 3791 if (m_vars[i].into) { 3792 if (!into.read_next(true)) { 3793 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into"); 3794 LIBOFFLOAD_ABORT; 3795 } 3796 else { 3797 into.val = (void*)(into.base + into.offset); 3798 } 3799 } 3800 3801 // Get other components of the clause 3802 if (!ext_start.read_next(flags & (1<<flag_extent_start_is_array))) { 3803 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start"); 3804 LIBOFFLOAD_ABORT; 3805 } 3806 if (!ext_elements.read_next( 3807 flags & (1<<flag_extent_elements_is_array))) { 3808 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements"); 3809 LIBOFFLOAD_ABORT; 3810 } 3811 if (!alloc_if.read_next(flags & (1<<flag_alloc_if_is_array))) { 3812 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if"); 3813 LIBOFFLOAD_ABORT; 3814 } 3815 if (!free_if.read_next(flags & (1<<flag_free_if_is_array))) { 3816 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if"); 3817 LIBOFFLOAD_ABORT; 3818 } 3819 if (!align.read_next(flags & (1<<flag_align_is_array))) { 3820 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align"); 3821 LIBOFFLOAD_ABORT; 3822 } 3823 if (!into_start.read_next(flags & (1<<flag_into_start_is_array))) { 3824 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start"); 3825 LIBOFFLOAD_ABORT; 3826 } 3827 if (!into_elem.read_next(flags & (1<<flag_into_elements_is_array))) { 3828 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements"); 3829 LIBOFFLOAD_ABORT; 3830 } 3831 if (!alloc_start.read_next(flags & (1<<flag_alloc_start_is_array))) { 3832 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start"); 3833 LIBOFFLOAD_ABORT; 3834 } 3835 if (!alloc_elem.read_next( 3836 flags & (1<<flag_alloc_elements_is_array))) { 3837 LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent elements"); 3838 LIBOFFLOAD_ABORT; 3839 } 3840 3841 m_vars[new_index + k].direction.bits = m_vars[i].direction.bits; 3842 m_vars[new_index + k].alloc_if = alloc_if.val; 3843 m_vars[new_index + k].free_if = free_if.val; 3844 m_vars[new_index + k].align = align.val; 3845 m_vars[new_index + k].mic_offset = 0; 3846 m_vars[new_index + k].flags.bits = m_vars[i].flags.bits; 3847 m_vars[new_index + k].offset = 0; 3848 m_vars[new_index + k].size = m_vars[i].size; 3849 3850 if (ext_start.val == 0) { 3851 m_vars[new_index + k].count = ext_elements.val; 3852 m_vars[new_index + k].ptr = ptr.val; 3853 if (type_src == c_string_ptr) { 3854 m_vars[new_index + k].size = 0; 3855 } 3856 } 3857 else { 3858 m_vars[new_index + k].count = 0; 3859 m_vars[new_index + k].ptr = 3860 static_cast<void*>(make_arr_desc( 3861 ptr.val, 3862 ext_start.val, 3863 ext_elements.val, 3864 m_vars[i].size)); 3865 3866 type_src = type_src == c_data_ptr ? c_cean_var_ptr : 3867 c_string_ptr ? c_cean_var_ptr : 3868 type_src; 3869 if (!m_vars[i].into) { 3870 type_dst = type_src; 3871 } 3872 } 3873 3874 if (m_vars[i].into && into_elem.val != 0) { 3875 m_vars[new_index + k].into = 3876 static_cast<void*>(make_arr_desc( 3877 into.val, 3878 into_start.val, 3879 into_elem.val, 3880 m_vars[i].size)); 3881 type_dst = (type == c_data_ptr_array) ? c_cean_var_ptr : 3882 (type == c_string_ptr_array) ? c_cean_var_ptr : 3883 type_src; 3884 } 3885 else { 3886 m_vars[new_index + k].into = NULL; 3887 } 3888 3889 if (alloc_elem.val != 0) { 3890 m_vars[new_index + k].alloc = 3891 static_cast<void*>(make_arr_desc( 3892 ptr.val, 3893 alloc_start.val, 3894 alloc_elem.val, 3895 m_vars[i].size)); 3896 } 3897 else { 3898 m_vars[new_index + k].alloc = NULL; 3899 } 3900 3901 m_vars[new_index + k].type.src = type_src; 3902 m_vars[new_index + k].type.dst = type_dst; 3903 3904 m_vars_extra[new_index + k].is_arr_ptr_el = 1; 3905 m_vars_extra[new_index + k].ptr_arr_offset = 3906 src_is_for_mic ? ptr.offset : into.offset; 3907 } 3908 // count and alloc fields are useless at target. They can be reused 3909 // for pointer arrays. 3910 m_vars[i].count = pointers_number; 3911 m_vars[i].ptr_arr_offset = new_index; 3912 return true; 3913} 3914 3915static void __offload_fini_library(void) 3916{ 3917 OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ...\n"); 3918 if (mic_engines_total > 0) { 3919 delete[] mic_engines; 3920 3921 if (mic_proxy_fs_root != 0) { 3922 free(mic_proxy_fs_root); 3923 mic_proxy_fs_root = 0; 3924 } 3925 3926 if (mic_library_path != 0) { 3927 free(mic_library_path); 3928 mic_library_path = 0; 3929 } 3930 3931 // destroy thread key 3932 thread_key_delete(mic_thread_key); 3933 } 3934 3935 // unload COI library 3936 if (COI::is_available) { 3937 COI::fini(); 3938 } 3939 3940 OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ... done\n"); 3941} 3942 3943static void __offload_init_library_once(void) 3944{ 3945 COIRESULT res; 3946 uint32_t num_devices; 3947 std::bitset<MIC_ENGINES_MAX> devices; 3948 3949 prefix = report_get_message_str(c_report_host); 3950 3951 // initialize trace 3952 const char *env_var = getenv(htrace_envname); 3953 if (env_var != 0 && *env_var != '\0') { 3954 int64_t new_val; 3955 if (__offload_parse_int_string(env_var, new_val)) { 3956 console_enabled = new_val & 0x0f; 3957 } 3958 } 3959 3960 env_var = getenv(offload_report_envname); 3961 if (env_var != 0 && *env_var != '\0') { 3962 int64_t env_val; 3963 if (__offload_parse_int_string(env_var, env_val)) { 3964 if (env_val == OFFLOAD_REPORT_1 || 3965 env_val == OFFLOAD_REPORT_2 || 3966 env_val == OFFLOAD_REPORT_3) { 3967 offload_report_level = env_val; 3968 } 3969 else { 3970 LIBOFFLOAD_ERROR(c_invalid_env_report_value, 3971 offload_report_envname); 3972 } 3973 } 3974 else { 3975 LIBOFFLOAD_ERROR(c_invalid_env_var_int_value, 3976 offload_report_envname); 3977 } 3978 } 3979 else if (!offload_report_level) { 3980 env_var = getenv(timer_envname); 3981 if (env_var != 0 && *env_var != '\0') { 3982 timer_enabled = atoi(env_var); 3983 } 3984 } 3985 3986 // initialize COI 3987 if (!COI::init()) { 3988 return; 3989 } 3990 3991 // get number of devices installed in the system 3992 res = COI::EngineGetCount(COI_ISA_KNC, &num_devices); 3993 if (res != COI_SUCCESS) { 3994 return; 3995 } 3996 3997 if (num_devices > MIC_ENGINES_MAX) { 3998 num_devices = MIC_ENGINES_MAX; 3999 } 4000 4001 // fill in the list of devices that can be used for offloading 4002 env_var = getenv("OFFLOAD_DEVICES"); 4003 if (env_var != 0) { 4004 if (strcasecmp(env_var, "none") != 0) { 4005 // value is composed of comma separated physical device indexes 4006 char *buf = strdup(env_var); 4007 char *str, *ptr; 4008 for (str = strtok_r(buf, ",", &ptr); str != 0; 4009 str = strtok_r(0, ",", &ptr)) { 4010 // convert string to an int 4011 int64_t num; 4012 if (!__offload_parse_int_string(str, num)) { 4013 LIBOFFLOAD_ERROR(c_mic_init5); 4014 4015 // fallback to using all installed devices 4016 devices.reset(); 4017 for (int i = 0; i < num_devices; i++) { 4018 devices.set(i); 4019 } 4020 break; 4021 } 4022 if (num < 0 || num >= num_devices) { 4023 LIBOFFLOAD_ERROR(c_mic_init6, num); 4024 continue; 4025 } 4026 devices.set(num); 4027 } 4028 free(buf); 4029 } 4030 } 4031 else { 4032 // use all available devices 4033 for (int i = 0; i < num_devices; i++) { 4034 COIENGINE engine; 4035 res = COI::EngineGetHandle(COI_ISA_KNC, i, &engine); 4036 if (res == COI_SUCCESS) { 4037 devices.set(i); 4038 } 4039 } 4040 } 4041 4042 mic_engines_total = devices.count(); 4043 4044 // no need to continue if there are no devices to offload to 4045 if (mic_engines_total <= 0) { 4046 return; 4047 } 4048 4049 // initialize indexes for available devices 4050 mic_engines = new Engine[mic_engines_total]; 4051 for (int p_idx = 0, l_idx = 0; p_idx < num_devices; p_idx++) { 4052 if (devices[p_idx]) { 4053 mic_engines[l_idx].set_indexes(l_idx, p_idx); 4054 l_idx++; 4055 } 4056 } 4057 4058 // library search path for device binaries 4059 env_var = getenv("MIC_LD_LIBRARY_PATH"); 4060 if (env_var != 0) { 4061 mic_library_path = strdup(env_var); 4062 } 4063 4064 // memory size reserved for COI buffers 4065 env_var = getenv("MIC_BUFFERSIZE"); 4066 if (env_var != 0) { 4067 uint64_t new_size; 4068 if (__offload_parse_size_string(env_var, new_size)) { 4069 mic_buffer_size = new_size; 4070 } 4071 else { 4072 LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_BUFFERSIZE"); 4073 } 4074 } 4075 4076 // determine stacksize for the pipeline on the device 4077 env_var = getenv("MIC_STACKSIZE"); 4078 if (env_var != 0 && *env_var != '\0') { 4079 uint64_t new_size; 4080 if (__offload_parse_size_string(env_var, new_size) && 4081 (new_size >= 16384) && ((new_size & 4095) == 0)) { 4082 mic_stack_size = new_size; 4083 } 4084 else { 4085 LIBOFFLOAD_ERROR(c_mic_init3); 4086 } 4087 } 4088 4089 // proxy I/O 4090 env_var = getenv("MIC_PROXY_IO"); 4091 if (env_var != 0 && *env_var != '\0') { 4092 int64_t new_val; 4093 if (__offload_parse_int_string(env_var, new_val)) { 4094 mic_proxy_io = new_val; 4095 } 4096 else { 4097 LIBOFFLOAD_ERROR(c_invalid_env_var_int_value, "MIC_PROXY_IO"); 4098 } 4099 } 4100 env_var = getenv("MIC_PROXY_FS_ROOT"); 4101 if (env_var != 0 && *env_var != '\0') { 4102 mic_proxy_fs_root = strdup(env_var); 4103 } 4104 4105 // Prepare environment for the target process using the following 4106 // rules 4107 // - If MIC_ENV_PREFIX is set then any environment variable on the 4108 // host which has that prefix are copied to the device without 4109 // the prefix. 4110 // All other host environment variables are ignored. 4111 // - If MIC_ENV_PREFIX is not set or if MIC_ENV_PREFIX="" then host 4112 // environment is duplicated. 4113 env_var = getenv("MIC_ENV_PREFIX"); 4114 if (env_var != 0 && *env_var != '\0') { 4115 mic_env_vars.set_prefix(env_var); 4116 4117 int len = strlen(env_var); 4118 for (int i = 0; environ[i] != 0; i++) { 4119 if (strncmp(environ[i], env_var, len) == 0 && 4120 strncmp(environ[i], "MIC_LD_LIBRARY_PATH", 19) != 0 && 4121 environ[i][len] != '=') { 4122 mic_env_vars.analyze_env_var(environ[i]); 4123 } 4124 } 4125 } 4126 4127 // create key for thread data 4128 if (thread_key_create(&mic_thread_key, Engine::destroy_thread_data)) { 4129 LIBOFFLOAD_ERROR(c_mic_init4, errno); 4130 return; 4131 } 4132 4133 // cpu frequency 4134 cpu_frequency = COI::PerfGetCycleFrequency(); 4135 4136 env_var = getenv(mic_use_2mb_buffers_envname); 4137 if (env_var != 0 && *env_var != '\0') { 4138 uint64_t new_size; 4139 if (__offload_parse_size_string(env_var, new_size)) { 4140 __offload_use_2mb_buffers = new_size; 4141 } 4142 else { 4143 LIBOFFLOAD_ERROR(c_invalid_env_var_value, 4144 mic_use_2mb_buffers_envname); 4145 } 4146 } 4147 4148 env_var = getenv(mic_use_async_buffer_write_envname); 4149 if (env_var != 0 && *env_var != '\0') { 4150 uint64_t new_size; 4151 if (__offload_parse_size_string(env_var, new_size)) { 4152 __offload_use_async_buffer_write = new_size; 4153 } 4154 } 4155 4156 env_var = getenv(mic_use_async_buffer_read_envname); 4157 if (env_var != 0 && *env_var != '\0') { 4158 uint64_t new_size; 4159 if (__offload_parse_size_string(env_var, new_size)) { 4160 __offload_use_async_buffer_read = new_size; 4161 } 4162 } 4163 4164 // mic initialization type 4165 env_var = getenv(offload_init_envname); 4166 if (env_var != 0 && *env_var != '\0') { 4167 if (strcmp(env_var, "on_offload") == 0) { 4168 __offload_init_type = c_init_on_offload; 4169 } 4170 else if (strcmp(env_var, "on_offload_all") == 0) { 4171 __offload_init_type = c_init_on_offload_all; 4172 } 4173#ifndef TARGET_WINNT 4174 else if (strcmp(env_var, "on_start") == 0) { 4175 __offload_init_type = c_init_on_start; 4176 } 4177#endif // TARGET_WINNT 4178 else { 4179 LIBOFFLOAD_ERROR(c_invalid_env_var_value, offload_init_envname); 4180 } 4181 } 4182 4183 // active wait 4184 env_var = getenv(offload_active_wait_envname); 4185 if (env_var != 0 && *env_var != '\0') { 4186 int64_t new_val; 4187 if (__offload_parse_int_string(env_var, new_val)) { 4188 __offload_active_wait = new_val; 4189 } 4190 else { 4191 LIBOFFLOAD_ERROR(c_invalid_env_var_int_value, 4192 offload_active_wait_envname); 4193 } 4194 } 4195 4196 // omp device num 4197 env_var = getenv(omp_device_num_envname); 4198 if (env_var != 0 && *env_var != '\0') { 4199 int64_t new_val; 4200 if (__offload_parse_int_string(env_var, new_val) && new_val >= 0) { 4201 __omp_device_num = new_val; 4202 } 4203 else { 4204 LIBOFFLOAD_ERROR(c_omp_invalid_device_num_env, 4205 omp_device_num_envname); 4206 } 4207 } 4208 4209 // init ORSL 4210 ORSL::init(); 4211} 4212 4213extern int __offload_init_library(void) 4214{ 4215 // do one time intialization 4216 static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT; 4217 __offload_run_once(&ctrl, __offload_init_library_once); 4218 4219 // offload is available if COI is available and the number of devices > 0 4220 bool is_available = COI::is_available && (mic_engines_total > 0); 4221 4222 // register pending libraries if there are any 4223 if (is_available && __target_libs) { 4224 mutex_locker_t locker(__target_libs_lock); 4225 4226 for (TargetImageList::iterator it = __target_libs_list.begin(); 4227 it != __target_libs_list.end(); it++) { 4228 // Register library in COI 4229 COI::ProcessRegisterLibraries(1, &it->data, &it->size, 4230 &it->origin, &it->offset); 4231 4232 // add lib to all engines 4233 for (int i = 0; i < mic_engines_total; i++) { 4234 mic_engines[i].add_lib(*it); 4235 } 4236 } 4237 4238 __target_libs = false; 4239 __target_libs_list.clear(); 4240 } 4241 4242 return is_available; 4243} 4244 4245extern "C" void __offload_register_image(const void *target_image) 4246{ 4247 const struct Image *image = static_cast<const struct Image*>(target_image); 4248 4249 // decode image 4250 const char *name = image->data; 4251 const void *data = image->data + strlen(image->data) + 1; 4252 uint64_t size = image->size; 4253 const char *origin = 0; 4254 uint64_t offset = 0; 4255 4256 // our actions depend on the image type 4257 const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data); 4258 switch (hdr->e_type) { 4259 case ET_EXEC: 4260 // Each offload application is supposed to have only one target 4261 // image representing target executable. 4262 // No thread synchronization is required here as the initialization 4263 // code is always executed in a single thread. 4264 if (__target_exe != 0) { 4265 LIBOFFLOAD_ERROR(c_multiple_target_exes); 4266 exit(1); 4267 } 4268 __target_exe = new TargetImage(name, data, size, origin, offset); 4269 4270 // Registration code for execs is always called from the context 4271 // of main and thus we can safely call any function here, 4272 // including LoadLibrary API on windows. This is the place where 4273 // we do the offload library initialization. 4274 if (__offload_init_library()) { 4275 // initialize engine if init_type is on_start 4276 if (__offload_init_type == c_init_on_start) { 4277 for (int i = 0; i < mic_engines_total; i++) { 4278 mic_engines[i].init(); 4279 } 4280 } 4281 } 4282 break; 4283 4284 case ET_DYN: 4285 // Registration code for libraries is called from the DllMain 4286 // context (on windows) and thus we cannot do anything usefull 4287 // here. So we just add it to the list of pending libraries for 4288 // the later use. 4289 __target_libs_lock.lock(); 4290 __target_libs = true; 4291 __target_libs_list.push_back(TargetImage(name, data, size, 4292 origin, offset)); 4293 __target_libs_lock.unlock(); 4294 break; 4295 4296 default: 4297 // something is definitely wrong, issue an error and exit 4298 LIBOFFLOAD_ERROR(c_unknown_binary_type); 4299 exit(1); 4300 } 4301} 4302 4303extern "C" void __offload_unregister_image(const void *target_image) 4304{ 4305 // Target image is packed as follows: 4306 // 8 bytes - size of the target binary 4307 // null-terminated string - binary name 4308 // <size> bytes - binary contents 4309 const struct Image { 4310 int64_t size; 4311 char data[]; 4312 } *image = static_cast<const struct Image*>(target_image); 4313 4314 // decode image 4315 const char *name = image->data; 4316 const void *data = image->data + strlen(image->data) + 1; 4317 4318 // our actions depend on the image type 4319 const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data); 4320 if (hdr->e_type == ET_EXEC) { 4321 // We are executing exec's desctructors. 4322 // It is time to do a library cleanup. 4323 if (timer_enabled) { 4324 Offload_Timer_Print(); 4325 } 4326 4327#ifdef MYO_SUPPORT 4328 __offload_myoFini(); 4329#endif // MYO_SUPPORT 4330 4331 __offload_fini_library(); 4332 } 4333} 4334 4335// Runtime trace interface for user programs 4336 4337void __offload_console_trace(int level) 4338{ 4339 console_enabled = level; 4340} 4341 4342// User-visible offload API 4343 4344int _Offload_number_of_devices(void) 4345{ 4346 __offload_init_library(); 4347 return mic_engines_total; 4348} 4349 4350int _Offload_get_device_number(void) 4351{ 4352 return -1; 4353} 4354 4355int _Offload_get_physical_device_number(void) 4356{ 4357 return -1; 4358} 4359 4360int _Offload_signaled(int index, void *signal) 4361{ 4362 __offload_init_library(); 4363 4364 // check index value 4365 if (index < 0 || mic_engines_total <= 0) { 4366 LIBOFFLOAD_ERROR(c_offload_signaled1, index); 4367 LIBOFFLOAD_ABORT; 4368 } 4369 4370 // find associated async task 4371 OffloadDescriptor *task = 4372 mic_engines[index % mic_engines_total].find_signal(signal, false); 4373 if (task == 0) { 4374 LIBOFFLOAD_ERROR(c_offload_signaled2, signal); 4375 LIBOFFLOAD_ABORT; 4376 } 4377 4378 return task->is_signaled(); 4379} 4380 4381void _Offload_report(int val) 4382{ 4383 if (val == OFFLOAD_REPORT_ON || 4384 val == OFFLOAD_REPORT_OFF) { 4385 offload_report_enabled = val; 4386 } 4387} 4388 4389// IDB support 4390int __dbg_is_attached = 0; 4391int __dbg_target_id = -1; 4392pid_t __dbg_target_so_pid = -1; 4393char __dbg_target_exe_name[MAX_TARGET_NAME] = {0}; 4394const int __dbg_api_major_version = 1; 4395const int __dbg_api_minor_version = 0; 4396 4397void __dbg_target_so_loaded() 4398{ 4399} 4400void __dbg_target_so_unloaded() 4401{ 4402} 4403