1/* Plugin for HSAIL execution. 2 3 Copyright (C) 2013-2020 Free Software Foundation, Inc. 4 5 Contributed by Martin Jambor <mjambor@suse.cz> and 6 Martin Liska <mliska@suse.cz>. 7 8 This file is part of the GNU Offloading and Multi Processing Library 9 (libgomp). 10 11 Libgomp is free software; you can redistribute it and/or modify it 12 under the terms of the GNU General Public License as published by 13 the Free Software Foundation; either version 3, or (at your option) 14 any later version. 15 16 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 17 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 19 more details. 20 21 Under Section 7 of GPL version 3, you are granted additional 22 permissions described in the GCC Runtime Library Exception, version 23 3.1, as published by the Free Software Foundation. 24 25 You should have received a copy of the GNU General Public License and 26 a copy of the GCC Runtime Library Exception along with this program; 27 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 28 <http://www.gnu.org/licenses/>. */ 29 30#include "config.h" 31#include <stdint.h> 32#include <stdio.h> 33#include <stdlib.h> 34#include <string.h> 35#include <pthread.h> 36#ifdef HAVE_INTTYPES_H 37#include <inttypes.h> 38#endif 39#include <stdbool.h> 40#include <hsa.h> 41#include <plugin/hsa_ext_finalize.h> 42#include <dlfcn.h> 43#include "libgomp-plugin.h" 44#include "gomp-constants.h" 45#include "secure_getenv.h" 46 47#ifdef HAVE_INTTYPES_H 48typedef uint64_t print_uint64_t; 49#else 50#define PRIu64 "lu" 51typedef unsigned long print_uint64_t; 52#endif 53 54/* As an HSA runtime is dlopened, following structure defines function 55 pointers utilized by the HSA plug-in. */ 56 57struct hsa_runtime_fn_info 58{ 59 /* HSA runtime. */ 60 hsa_status_t (*hsa_status_string_fn) (hsa_status_t status, 61 const char **status_string); 62 hsa_status_t (*hsa_agent_get_info_fn) (hsa_agent_t agent, 63 hsa_agent_info_t attribute, 64 void *value); 65 hsa_status_t (*hsa_init_fn) (void); 66 hsa_status_t (*hsa_iterate_agents_fn) 67 (hsa_status_t (*callback)(hsa_agent_t agent, void *data), void *data); 68 hsa_status_t (*hsa_region_get_info_fn) (hsa_region_t region, 69 hsa_region_info_t attribute, 70 void *value); 71 hsa_status_t (*hsa_queue_create_fn) 72 (hsa_agent_t agent, uint32_t size, hsa_queue_type_t type, 73 void (*callback)(hsa_status_t status, hsa_queue_t *source, void *data), 74 void *data, uint32_t private_segment_size, 75 uint32_t group_segment_size, hsa_queue_t **queue); 76 hsa_status_t (*hsa_agent_iterate_regions_fn) 77 (hsa_agent_t agent, 78 hsa_status_t (*callback)(hsa_region_t region, void *data), void *data); 79 hsa_status_t (*hsa_executable_destroy_fn) (hsa_executable_t executable); 80 hsa_status_t (*hsa_executable_create_fn) 81 (hsa_profile_t profile, hsa_executable_state_t executable_state, 82 const char *options, hsa_executable_t *executable); 83 hsa_status_t (*hsa_executable_global_variable_define_fn) 84 (hsa_executable_t executable, const char *variable_name, void *address); 85 hsa_status_t (*hsa_executable_load_code_object_fn) 86 (hsa_executable_t executable, hsa_agent_t agent, 87 hsa_code_object_t code_object, const char *options); 88 hsa_status_t (*hsa_executable_freeze_fn)(hsa_executable_t executable, 89 const char *options); 90 hsa_status_t (*hsa_signal_create_fn) (hsa_signal_value_t initial_value, 91 uint32_t num_consumers, 92 const hsa_agent_t *consumers, 93 hsa_signal_t *signal); 94 hsa_status_t (*hsa_memory_allocate_fn) (hsa_region_t region, size_t size, 95 void **ptr); 96 hsa_status_t (*hsa_memory_free_fn) (void *ptr); 97 hsa_status_t (*hsa_signal_destroy_fn) (hsa_signal_t signal); 98 hsa_status_t (*hsa_executable_get_symbol_fn) 99 (hsa_executable_t executable, const char *module_name, 100 const char *symbol_name, hsa_agent_t agent, int32_t call_convention, 101 hsa_executable_symbol_t *symbol); 102 hsa_status_t (*hsa_executable_symbol_get_info_fn) 103 (hsa_executable_symbol_t executable_symbol, 104 hsa_executable_symbol_info_t attribute, void *value); 105 uint64_t (*hsa_queue_add_write_index_release_fn) (const hsa_queue_t *queue, 106 uint64_t value); 107 uint64_t (*hsa_queue_load_read_index_acquire_fn) (const hsa_queue_t *queue); 108 void (*hsa_signal_store_relaxed_fn) (hsa_signal_t signal, 109 hsa_signal_value_t value); 110 void (*hsa_signal_store_release_fn) (hsa_signal_t signal, 111 hsa_signal_value_t value); 112 hsa_signal_value_t (*hsa_signal_wait_acquire_fn) 113 (hsa_signal_t signal, hsa_signal_condition_t condition, 114 hsa_signal_value_t compare_value, uint64_t timeout_hint, 115 hsa_wait_state_t wait_state_hint); 116 hsa_signal_value_t (*hsa_signal_load_acquire_fn) (hsa_signal_t signal); 117 hsa_status_t (*hsa_queue_destroy_fn) (hsa_queue_t *queue); 118 119 /* HSA finalizer. */ 120 hsa_status_t (*hsa_ext_program_add_module_fn) (hsa_ext_program_t program, 121 hsa_ext_module_t module); 122 hsa_status_t (*hsa_ext_program_create_fn) 123 (hsa_machine_model_t machine_model, hsa_profile_t profile, 124 hsa_default_float_rounding_mode_t default_float_rounding_mode, 125 const char *options, hsa_ext_program_t *program); 126 hsa_status_t (*hsa_ext_program_destroy_fn) (hsa_ext_program_t program); 127 hsa_status_t (*hsa_ext_program_finalize_fn) 128 (hsa_ext_program_t program,hsa_isa_t isa, 129 int32_t call_convention, hsa_ext_control_directives_t control_directives, 130 const char *options, hsa_code_object_type_t code_object_type, 131 hsa_code_object_t *code_object); 132}; 133 134/* HSA runtime functions that are initialized in init_hsa_context. */ 135 136static struct hsa_runtime_fn_info hsa_fns; 137 138/* Keep the following GOMP prefixed structures in sync with respective parts of 139 the compiler. */ 140 141/* Structure describing the run-time and grid properties of an HSA kernel 142 lauch. */ 143 144struct GOMP_kernel_launch_attributes 145{ 146 /* Number of dimensions the workload has. Maximum number is 3. */ 147 uint32_t ndim; 148 /* Size of the grid in the three respective dimensions. */ 149 uint32_t gdims[3]; 150 /* Size of work-groups in the respective dimensions. */ 151 uint32_t wdims[3]; 152}; 153 154/* Collection of information needed for a dispatch of a kernel from a 155 kernel. */ 156 157struct GOMP_hsa_kernel_dispatch 158{ 159 /* Pointer to a command queue associated with a kernel dispatch agent. */ 160 void *queue; 161 /* Pointer to reserved memory for OMP data struct copying. */ 162 void *omp_data_memory; 163 /* Pointer to a memory space used for kernel arguments passing. */ 164 void *kernarg_address; 165 /* Kernel object. */ 166 uint64_t object; 167 /* Synchronization signal used for dispatch synchronization. */ 168 uint64_t signal; 169 /* Private segment size. */ 170 uint32_t private_segment_size; 171 /* Group segment size. */ 172 uint32_t group_segment_size; 173 /* Number of children kernel dispatches. */ 174 uint64_t kernel_dispatch_count; 175 /* Debug purpose argument. */ 176 uint64_t debug; 177 /* Levels-var ICV. */ 178 uint64_t omp_level; 179 /* Kernel dispatch structures created for children kernel dispatches. */ 180 struct GOMP_hsa_kernel_dispatch **children_dispatches; 181 /* Number of threads. */ 182 uint32_t omp_num_threads; 183}; 184 185/* Part of the libgomp plugin interface. Return the name of the accelerator, 186 which is "hsa". */ 187 188const char * 189GOMP_OFFLOAD_get_name (void) 190{ 191 return "hsa"; 192} 193 194/* Part of the libgomp plugin interface. Return the specific capabilities the 195 HSA accelerator have. */ 196 197unsigned int 198GOMP_OFFLOAD_get_caps (void) 199{ 200 return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400; 201} 202 203/* Part of the libgomp plugin interface. Identify as HSA accelerator. */ 204 205int 206GOMP_OFFLOAD_get_type (void) 207{ 208 return OFFLOAD_TARGET_TYPE_HSA; 209} 210 211/* Return the libgomp version number we're compatible with. There is 212 no requirement for cross-version compatibility. */ 213 214unsigned 215GOMP_OFFLOAD_version (void) 216{ 217 return GOMP_VERSION; 218} 219 220/* Flag to decide whether print to stderr information about what is going on. 221 Set in init_debug depending on environment variables. */ 222 223static bool debug; 224 225/* Flag to decide if the runtime should suppress a possible fallback to host 226 execution. */ 227 228static bool suppress_host_fallback; 229 230/* Flag to locate HSA runtime shared library that is dlopened 231 by this plug-in. */ 232 233static const char *hsa_runtime_lib; 234 235/* Flag to decide if the runtime should support also CPU devices (can be 236 a simulator). */ 237 238static bool support_cpu_devices; 239 240/* Initialize debug and suppress_host_fallback according to the environment. */ 241 242static void 243init_enviroment_variables (void) 244{ 245 if (secure_getenv ("HSA_DEBUG")) 246 debug = true; 247 else 248 debug = false; 249 250 if (secure_getenv ("HSA_SUPPRESS_HOST_FALLBACK")) 251 suppress_host_fallback = true; 252 else 253 suppress_host_fallback = false; 254 255 hsa_runtime_lib = secure_getenv ("HSA_RUNTIME_LIB"); 256 if (hsa_runtime_lib == NULL) 257 hsa_runtime_lib = "libhsa-runtime64.so"; 258 259 support_cpu_devices = secure_getenv ("HSA_SUPPORT_CPU_DEVICES"); 260} 261 262/* Print a logging message with PREFIX to stderr if HSA_DEBUG value 263 is set to true. */ 264 265#define HSA_LOG(prefix, ...) \ 266 do \ 267 { \ 268 if (debug) \ 269 { \ 270 fprintf (stderr, prefix); \ 271 fprintf (stderr, __VA_ARGS__); \ 272 } \ 273 } \ 274 while (false) 275 276/* Print a debugging message to stderr. */ 277 278#define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__) 279 280/* Print a warning message to stderr. */ 281 282#define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__) 283 284/* Print HSA warning STR with an HSA STATUS code. */ 285 286static void 287hsa_warn (const char *str, hsa_status_t status) 288{ 289 if (!debug) 290 return; 291 292 const char *hsa_error_msg = "[unknown]"; 293 hsa_fns.hsa_status_string_fn (status, &hsa_error_msg); 294 295 fprintf (stderr, "HSA warning: %s\nRuntime message: %s", str, hsa_error_msg); 296} 297 298/* Report a fatal error STR together with the HSA error corresponding to STATUS 299 and terminate execution of the current process. */ 300 301static void 302hsa_fatal (const char *str, hsa_status_t status) 303{ 304 const char *hsa_error_msg = "[unknown]"; 305 hsa_fns.hsa_status_string_fn (status, &hsa_error_msg); 306 GOMP_PLUGIN_fatal ("HSA fatal error: %s\nRuntime message: %s", str, 307 hsa_error_msg); 308} 309 310/* Like hsa_fatal, except only report error message, and return FALSE 311 for propagating error processing to outside of plugin. */ 312 313static bool 314hsa_error (const char *str, hsa_status_t status) 315{ 316 const char *hsa_error_msg = "[unknown]"; 317 hsa_fns.hsa_status_string_fn (status, &hsa_error_msg); 318 GOMP_PLUGIN_error ("HSA fatal error: %s\nRuntime message: %s", str, 319 hsa_error_msg); 320 return false; 321} 322 323struct hsa_kernel_description 324{ 325 const char *name; 326 unsigned omp_data_size; 327 bool gridified_kernel_p; 328 unsigned kernel_dependencies_count; 329 const char **kernel_dependencies; 330}; 331 332struct global_var_info 333{ 334 const char *name; 335 void *address; 336}; 337 338/* Data passed by the static initializer of a compilation unit containing BRIG 339 to GOMP_offload_register. */ 340 341struct brig_image_desc 342{ 343 hsa_ext_module_t brig_module; 344 const unsigned kernel_count; 345 struct hsa_kernel_description *kernel_infos; 346 const unsigned global_variable_count; 347 struct global_var_info *global_variables; 348}; 349 350struct agent_info; 351 352/* Information required to identify, finalize and run any given kernel. */ 353 354struct kernel_info 355{ 356 /* Name of the kernel, required to locate it within the brig module. */ 357 const char *name; 358 /* Size of memory space for OMP data. */ 359 unsigned omp_data_size; 360 /* The specific agent the kernel has been or will be finalized for and run 361 on. */ 362 struct agent_info *agent; 363 /* The specific module where the kernel takes place. */ 364 struct module_info *module; 365 /* Mutex enforcing that at most once thread ever initializes a kernel for 366 use. A thread should have locked agent->modules_rwlock for reading before 367 acquiring it. */ 368 pthread_mutex_t init_mutex; 369 /* Flag indicating whether the kernel has been initialized and all fields 370 below it contain valid data. */ 371 bool initialized; 372 /* Flag indicating that the kernel has a problem that blocks an execution. */ 373 bool initialization_failed; 374 /* The object to be put into the dispatch queue. */ 375 uint64_t object; 376 /* Required size of kernel arguments. */ 377 uint32_t kernarg_segment_size; 378 /* Required size of group segment. */ 379 uint32_t group_segment_size; 380 /* Required size of private segment. */ 381 uint32_t private_segment_size; 382 /* List of all kernel dependencies. */ 383 const char **dependencies; 384 /* Number of dependencies. */ 385 unsigned dependencies_count; 386 /* Maximum OMP data size necessary for kernel from kernel dispatches. */ 387 unsigned max_omp_data_size; 388 /* True if the kernel is gridified. */ 389 bool gridified_kernel_p; 390}; 391 392/* Information about a particular brig module, its image and kernels. */ 393 394struct module_info 395{ 396 /* The next and previous module in the linked list of modules of an agent. */ 397 struct module_info *next, *prev; 398 /* The description with which the program has registered the image. */ 399 struct brig_image_desc *image_desc; 400 401 /* Number of kernels in this module. */ 402 int kernel_count; 403 /* An array of kernel_info structures describing each kernel in this 404 module. */ 405 struct kernel_info kernels[]; 406}; 407 408/* Information about shared brig library. */ 409 410struct brig_library_info 411{ 412 char *file_name; 413 hsa_ext_module_t image; 414}; 415 416/* Description of an HSA GPU agent and the program associated with it. */ 417 418struct agent_info 419{ 420 /* The HSA ID of the agent. Assigned when hsa_context is initialized. */ 421 hsa_agent_t id; 422 /* Whether the agent has been initialized. The fields below are usable only 423 if it has been. */ 424 bool initialized; 425 /* The HSA ISA of this agent. */ 426 hsa_isa_t isa; 427 /* Command queue of the agent. */ 428 hsa_queue_t *command_q; 429 /* Kernel from kernel dispatch command queue. */ 430 hsa_queue_t *kernel_dispatch_command_q; 431 /* The HSA memory region from which to allocate kernel arguments. */ 432 hsa_region_t kernarg_region; 433 434 /* Read-write lock that protects kernels which are running or about to be run 435 from interference with loading and unloading of images. Needs to be 436 locked for reading while a kernel is being run, and for writing if the 437 list of modules is manipulated (and thus the HSA program invalidated). */ 438 pthread_rwlock_t modules_rwlock; 439 /* The first module in a linked list of modules associated with this 440 kernel. */ 441 struct module_info *first_module; 442 443 /* Mutex enforcing that only one thread will finalize the HSA program. A 444 thread should have locked agent->modules_rwlock for reading before 445 acquiring it. */ 446 pthread_mutex_t prog_mutex; 447 /* Flag whether the HSA program that consists of all the modules has been 448 finalized. */ 449 bool prog_finalized; 450 /* Flag whether the program was finalized but with a failure. */ 451 bool prog_finalized_error; 452 /* HSA executable - the finalized program that is used to locate kernels. */ 453 hsa_executable_t executable; 454 /* List of BRIG libraries. */ 455 struct brig_library_info **brig_libraries; 456 /* Number of loaded shared BRIG libraries. */ 457 unsigned brig_libraries_count; 458}; 459 460/* Information about the whole HSA environment and all of its agents. */ 461 462struct hsa_context_info 463{ 464 /* Whether the structure has been initialized. */ 465 bool initialized; 466 /* Number of usable GPU HSA agents in the system. */ 467 int agent_count; 468 /* Array of agent_info structures describing the individual HSA agents. */ 469 struct agent_info *agents; 470}; 471 472/* Information about the whole HSA environment and all of its agents. */ 473 474static struct hsa_context_info hsa_context; 475 476#define DLSYM_FN(function) \ 477 hsa_fns.function##_fn = dlsym (handle, #function); \ 478 if (hsa_fns.function##_fn == NULL) \ 479 goto dl_fail; 480 481static bool 482init_hsa_runtime_functions (void) 483{ 484 void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY); 485 if (handle == NULL) 486 goto dl_fail; 487 488 DLSYM_FN (hsa_status_string) 489 DLSYM_FN (hsa_agent_get_info) 490 DLSYM_FN (hsa_init) 491 DLSYM_FN (hsa_iterate_agents) 492 DLSYM_FN (hsa_region_get_info) 493 DLSYM_FN (hsa_queue_create) 494 DLSYM_FN (hsa_agent_iterate_regions) 495 DLSYM_FN (hsa_executable_destroy) 496 DLSYM_FN (hsa_executable_create) 497 DLSYM_FN (hsa_executable_global_variable_define) 498 DLSYM_FN (hsa_executable_load_code_object) 499 DLSYM_FN (hsa_executable_freeze) 500 DLSYM_FN (hsa_signal_create) 501 DLSYM_FN (hsa_memory_allocate) 502 DLSYM_FN (hsa_memory_free) 503 DLSYM_FN (hsa_signal_destroy) 504 DLSYM_FN (hsa_executable_get_symbol) 505 DLSYM_FN (hsa_executable_symbol_get_info) 506 DLSYM_FN (hsa_queue_add_write_index_release) 507 DLSYM_FN (hsa_queue_load_read_index_acquire) 508 DLSYM_FN (hsa_signal_wait_acquire) 509 DLSYM_FN (hsa_signal_store_relaxed) 510 DLSYM_FN (hsa_signal_store_release) 511 DLSYM_FN (hsa_signal_load_acquire) 512 DLSYM_FN (hsa_queue_destroy) 513 DLSYM_FN (hsa_ext_program_add_module) 514 DLSYM_FN (hsa_ext_program_create) 515 DLSYM_FN (hsa_ext_program_destroy) 516 DLSYM_FN (hsa_ext_program_finalize) 517 return true; 518 519 dl_fail: 520 HSA_DEBUG ("while loading %s: %s\n", hsa_runtime_lib, dlerror ()); 521 return false; 522} 523 524/* Find kernel for an AGENT by name provided in KERNEL_NAME. */ 525 526static struct kernel_info * 527get_kernel_for_agent (struct agent_info *agent, const char *kernel_name) 528{ 529 struct module_info *module = agent->first_module; 530 531 while (module) 532 { 533 for (unsigned i = 0; i < module->kernel_count; i++) 534 if (strcmp (module->kernels[i].name, kernel_name) == 0) 535 return &module->kernels[i]; 536 537 module = module->next; 538 } 539 540 return NULL; 541} 542 543/* Return true if the agent is a GPU and acceptable of concurrent submissions 544 from different threads. */ 545 546static bool 547suitable_hsa_agent_p (hsa_agent_t agent) 548{ 549 hsa_device_type_t device_type; 550 hsa_status_t status 551 = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_DEVICE, 552 &device_type); 553 if (status != HSA_STATUS_SUCCESS) 554 return false; 555 556 switch (device_type) 557 { 558 case HSA_DEVICE_TYPE_GPU: 559 break; 560 case HSA_DEVICE_TYPE_CPU: 561 if (!support_cpu_devices) 562 return false; 563 break; 564 default: 565 return false; 566 } 567 568 uint32_t features = 0; 569 status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_FEATURE, 570 &features); 571 if (status != HSA_STATUS_SUCCESS 572 || !(features & HSA_AGENT_FEATURE_KERNEL_DISPATCH)) 573 return false; 574 hsa_queue_type_t queue_type; 575 status = hsa_fns.hsa_agent_get_info_fn (agent, HSA_AGENT_INFO_QUEUE_TYPE, 576 &queue_type); 577 if (status != HSA_STATUS_SUCCESS 578 || (queue_type != HSA_QUEUE_TYPE_MULTI)) 579 return false; 580 581 return true; 582} 583 584/* Callback of hsa_iterate_agents, if AGENT is a GPU device, increment 585 agent_count in hsa_context. */ 586 587static hsa_status_t 588count_gpu_agents (hsa_agent_t agent, void *data __attribute__ ((unused))) 589{ 590 if (suitable_hsa_agent_p (agent)) 591 hsa_context.agent_count++; 592 return HSA_STATUS_SUCCESS; 593} 594 595/* Callback of hsa_iterate_agents, if AGENT is a GPU device, assign the agent 596 id to the describing structure in the hsa context. The index of the 597 structure is pointed to by DATA, increment it afterwards. */ 598 599static hsa_status_t 600assign_agent_ids (hsa_agent_t agent, void *data) 601{ 602 if (suitable_hsa_agent_p (agent)) 603 { 604 int *agent_index = (int *) data; 605 hsa_context.agents[*agent_index].id = agent; 606 ++*agent_index; 607 } 608 return HSA_STATUS_SUCCESS; 609} 610 611/* Initialize hsa_context if it has not already been done. 612 Return TRUE on success. */ 613 614static bool 615init_hsa_context (void) 616{ 617 hsa_status_t status; 618 int agent_index = 0; 619 620 if (hsa_context.initialized) 621 return true; 622 init_enviroment_variables (); 623 if (!init_hsa_runtime_functions ()) 624 { 625 HSA_DEBUG ("Run-time could not be dynamically opened\n"); 626 return false; 627 } 628 status = hsa_fns.hsa_init_fn (); 629 if (status != HSA_STATUS_SUCCESS) 630 return hsa_error ("Run-time could not be initialized", status); 631 HSA_DEBUG ("HSA run-time initialized\n"); 632 status = hsa_fns.hsa_iterate_agents_fn (count_gpu_agents, NULL); 633 if (status != HSA_STATUS_SUCCESS) 634 return hsa_error ("HSA GPU devices could not be enumerated", status); 635 HSA_DEBUG ("There are %i HSA GPU devices.\n", hsa_context.agent_count); 636 637 hsa_context.agents 638 = GOMP_PLUGIN_malloc_cleared (hsa_context.agent_count 639 * sizeof (struct agent_info)); 640 status = hsa_fns.hsa_iterate_agents_fn (assign_agent_ids, &agent_index); 641 if (agent_index != hsa_context.agent_count) 642 { 643 GOMP_PLUGIN_error ("Failed to assign IDs to all HSA agents"); 644 return false; 645 } 646 hsa_context.initialized = true; 647 return true; 648} 649 650/* Callback of dispatch queues to report errors. */ 651 652static void 653queue_callback (hsa_status_t status, 654 hsa_queue_t *queue __attribute__ ((unused)), 655 void *data __attribute__ ((unused))) 656{ 657 hsa_fatal ("Asynchronous queue error", status); 658} 659 660/* Callback of hsa_agent_iterate_regions. Determine if a memory REGION can be 661 used for kernarg allocations and if so write it to the memory pointed to by 662 DATA and break the query. */ 663 664static hsa_status_t 665get_kernarg_memory_region (hsa_region_t region, void *data) 666{ 667 hsa_status_t status; 668 hsa_region_segment_t segment; 669 670 status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_SEGMENT, 671 &segment); 672 if (status != HSA_STATUS_SUCCESS) 673 return status; 674 if (segment != HSA_REGION_SEGMENT_GLOBAL) 675 return HSA_STATUS_SUCCESS; 676 677 uint32_t flags; 678 status = hsa_fns.hsa_region_get_info_fn (region, HSA_REGION_INFO_GLOBAL_FLAGS, 679 &flags); 680 if (status != HSA_STATUS_SUCCESS) 681 return status; 682 if (flags & HSA_REGION_GLOBAL_FLAG_KERNARG) 683 { 684 hsa_region_t *ret = (hsa_region_t *) data; 685 *ret = region; 686 return HSA_STATUS_INFO_BREAK; 687 } 688 return HSA_STATUS_SUCCESS; 689} 690 691/* Part of the libgomp plugin interface. Return the number of HSA devices on 692 the system. */ 693 694int 695GOMP_OFFLOAD_get_num_devices (void) 696{ 697 if (!init_hsa_context ()) 698 return 0; 699 return hsa_context.agent_count; 700} 701 702/* Part of the libgomp plugin interface. Initialize agent number N so that it 703 can be used for computation. Return TRUE on success. */ 704 705bool 706GOMP_OFFLOAD_init_device (int n) 707{ 708 if (!init_hsa_context ()) 709 return false; 710 if (n >= hsa_context.agent_count) 711 { 712 GOMP_PLUGIN_error ("Request to initialize non-existing HSA device %i", n); 713 return false; 714 } 715 struct agent_info *agent = &hsa_context.agents[n]; 716 717 if (agent->initialized) 718 return true; 719 720 if (pthread_rwlock_init (&agent->modules_rwlock, NULL)) 721 { 722 GOMP_PLUGIN_error ("Failed to initialize an HSA agent rwlock"); 723 return false; 724 } 725 if (pthread_mutex_init (&agent->prog_mutex, NULL)) 726 { 727 GOMP_PLUGIN_error ("Failed to initialize an HSA agent program mutex"); 728 return false; 729 } 730 731 uint32_t queue_size; 732 hsa_status_t status; 733 status = hsa_fns.hsa_agent_get_info_fn (agent->id, 734 HSA_AGENT_INFO_QUEUE_MAX_SIZE, 735 &queue_size); 736 if (status != HSA_STATUS_SUCCESS) 737 return hsa_error ("Error requesting maximum queue size of the HSA agent", 738 status); 739 status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AGENT_INFO_ISA, 740 &agent->isa); 741 if (status != HSA_STATUS_SUCCESS) 742 return hsa_error ("Error querying the ISA of the agent", status); 743 status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size, 744 HSA_QUEUE_TYPE_MULTI, 745 queue_callback, NULL, UINT32_MAX, 746 UINT32_MAX, 747 &agent->command_q); 748 if (status != HSA_STATUS_SUCCESS) 749 return hsa_error ("Error creating command queue", status); 750 751 status = hsa_fns.hsa_queue_create_fn (agent->id, queue_size, 752 HSA_QUEUE_TYPE_MULTI, 753 queue_callback, NULL, UINT32_MAX, 754 UINT32_MAX, 755 &agent->kernel_dispatch_command_q); 756 if (status != HSA_STATUS_SUCCESS) 757 return hsa_error ("Error creating kernel dispatch command queue", status); 758 759 agent->kernarg_region.handle = (uint64_t) -1; 760 status = hsa_fns.hsa_agent_iterate_regions_fn (agent->id, 761 get_kernarg_memory_region, 762 &agent->kernarg_region); 763 if (agent->kernarg_region.handle == (uint64_t) -1) 764 { 765 GOMP_PLUGIN_error ("Could not find suitable memory region for kernel " 766 "arguments"); 767 return false; 768 } 769 HSA_DEBUG ("HSA agent initialized, queue has id %llu\n", 770 (long long unsigned) agent->command_q->id); 771 HSA_DEBUG ("HSA agent initialized, kernel dispatch queue has id %llu\n", 772 (long long unsigned) agent->kernel_dispatch_command_q->id); 773 agent->initialized = true; 774 return true; 775} 776 777/* Verify that hsa_context has already been initialized and return the 778 agent_info structure describing device number N. Return NULL on error. */ 779 780static struct agent_info * 781get_agent_info (int n) 782{ 783 if (!hsa_context.initialized) 784 { 785 GOMP_PLUGIN_error ("Attempt to use uninitialized HSA context."); 786 return NULL; 787 } 788 if (n >= hsa_context.agent_count) 789 { 790 GOMP_PLUGIN_error ("Request to operate on anon-existing HSA device %i", n); 791 return NULL; 792 } 793 if (!hsa_context.agents[n].initialized) 794 { 795 GOMP_PLUGIN_error ("Attempt to use an uninitialized HSA agent."); 796 return NULL; 797 } 798 return &hsa_context.agents[n]; 799} 800 801/* Insert MODULE to the linked list of modules of AGENT. */ 802 803static void 804add_module_to_agent (struct agent_info *agent, struct module_info *module) 805{ 806 if (agent->first_module) 807 agent->first_module->prev = module; 808 module->next = agent->first_module; 809 module->prev = NULL; 810 agent->first_module = module; 811} 812 813/* Remove MODULE from the linked list of modules of AGENT. */ 814 815static void 816remove_module_from_agent (struct agent_info *agent, struct module_info *module) 817{ 818 if (agent->first_module == module) 819 agent->first_module = module->next; 820 if (module->prev) 821 module->prev->next = module->next; 822 if (module->next) 823 module->next->prev = module->prev; 824} 825 826/* Free the HSA program in agent and everything associated with it and set 827 agent->prog_finalized and the initialized flags of all kernels to false. 828 Return TRUE on success. */ 829 830static bool 831destroy_hsa_program (struct agent_info *agent) 832{ 833 if (!agent->prog_finalized || agent->prog_finalized_error) 834 return true; 835 836 hsa_status_t status; 837 838 HSA_DEBUG ("Destroying the current HSA program.\n"); 839 840 status = hsa_fns.hsa_executable_destroy_fn (agent->executable); 841 if (status != HSA_STATUS_SUCCESS) 842 return hsa_error ("Could not destroy HSA executable", status); 843 844 struct module_info *module; 845 for (module = agent->first_module; module; module = module->next) 846 { 847 int i; 848 for (i = 0; i < module->kernel_count; i++) 849 module->kernels[i].initialized = false; 850 } 851 agent->prog_finalized = false; 852 return true; 853} 854 855/* Initialize KERNEL from D and other parameters. Return true on success. */ 856 857static bool 858init_basic_kernel_info (struct kernel_info *kernel, 859 struct hsa_kernel_description *d, 860 struct agent_info *agent, 861 struct module_info *module) 862{ 863 kernel->agent = agent; 864 kernel->module = module; 865 kernel->name = d->name; 866 kernel->omp_data_size = d->omp_data_size; 867 kernel->gridified_kernel_p = d->gridified_kernel_p; 868 kernel->dependencies_count = d->kernel_dependencies_count; 869 kernel->dependencies = d->kernel_dependencies; 870 if (pthread_mutex_init (&kernel->init_mutex, NULL)) 871 { 872 GOMP_PLUGIN_error ("Failed to initialize an HSA kernel mutex"); 873 return false; 874 } 875 return true; 876} 877 878/* Part of the libgomp plugin interface. Load BRIG module described by struct 879 brig_image_desc in TARGET_DATA and return references to kernel descriptors 880 in TARGET_TABLE. */ 881 882int 883GOMP_OFFLOAD_load_image (int ord, unsigned version, const void *target_data, 884 struct addr_pair **target_table) 885{ 886 if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA) 887 { 888 GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin" 889 " (expected %u, received %u)", 890 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version)); 891 return -1; 892 } 893 894 struct brig_image_desc *image_desc = (struct brig_image_desc *) target_data; 895 struct agent_info *agent; 896 struct addr_pair *pair; 897 struct module_info *module; 898 struct kernel_info *kernel; 899 int kernel_count = image_desc->kernel_count; 900 901 agent = get_agent_info (ord); 902 if (!agent) 903 return -1; 904 905 if (pthread_rwlock_wrlock (&agent->modules_rwlock)) 906 { 907 GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock"); 908 return -1; 909 } 910 if (agent->prog_finalized 911 && !destroy_hsa_program (agent)) 912 return -1; 913 914 HSA_DEBUG ("Encountered %d kernels in an image\n", kernel_count); 915 pair = GOMP_PLUGIN_malloc (kernel_count * sizeof (struct addr_pair)); 916 *target_table = pair; 917 module = (struct module_info *) 918 GOMP_PLUGIN_malloc_cleared (sizeof (struct module_info) 919 + kernel_count * sizeof (struct kernel_info)); 920 module->image_desc = image_desc; 921 module->kernel_count = kernel_count; 922 923 kernel = &module->kernels[0]; 924 925 /* Allocate memory for kernel dependencies. */ 926 for (unsigned i = 0; i < kernel_count; i++) 927 { 928 pair->start = (uintptr_t) kernel; 929 pair->end = (uintptr_t) (kernel + 1); 930 931 struct hsa_kernel_description *d = &image_desc->kernel_infos[i]; 932 if (!init_basic_kernel_info (kernel, d, agent, module)) 933 return -1; 934 kernel++; 935 pair++; 936 } 937 938 add_module_to_agent (agent, module); 939 if (pthread_rwlock_unlock (&agent->modules_rwlock)) 940 { 941 GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock"); 942 return -1; 943 } 944 return kernel_count; 945} 946 947/* Add a shared BRIG library from a FILE_NAME to an AGENT. */ 948 949static struct brig_library_info * 950add_shared_library (const char *file_name, struct agent_info *agent) 951{ 952 struct brig_library_info *library = NULL; 953 954 void *f = dlopen (file_name, RTLD_NOW); 955 void *start = dlsym (f, "__brig_start"); 956 void *end = dlsym (f, "__brig_end"); 957 958 if (start == NULL || end == NULL) 959 return NULL; 960 961 unsigned size = end - start; 962 char *buf = (char *) GOMP_PLUGIN_malloc (size); 963 memcpy (buf, start, size); 964 965 library = GOMP_PLUGIN_malloc (sizeof (struct agent_info)); 966 library->file_name = (char *) GOMP_PLUGIN_malloc 967 ((strlen (file_name) + 1)); 968 strcpy (library->file_name, file_name); 969 library->image = (hsa_ext_module_t) buf; 970 971 return library; 972} 973 974/* Release memory used for BRIG shared libraries that correspond 975 to an AGENT. */ 976 977static void 978release_agent_shared_libraries (struct agent_info *agent) 979{ 980 for (unsigned i = 0; i < agent->brig_libraries_count; i++) 981 if (agent->brig_libraries[i]) 982 { 983 free (agent->brig_libraries[i]->file_name); 984 free (agent->brig_libraries[i]->image); 985 free (agent->brig_libraries[i]); 986 } 987 988 free (agent->brig_libraries); 989} 990 991/* Create and finalize the program consisting of all loaded modules. */ 992 993static void 994create_and_finalize_hsa_program (struct agent_info *agent) 995{ 996 hsa_status_t status; 997 hsa_ext_program_t prog_handle; 998 int mi = 0; 999 1000 if (pthread_mutex_lock (&agent->prog_mutex)) 1001 GOMP_PLUGIN_fatal ("Could not lock an HSA agent program mutex"); 1002 if (agent->prog_finalized) 1003 goto final; 1004 1005 status = hsa_fns.hsa_ext_program_create_fn 1006 (HSA_MACHINE_MODEL_LARGE, HSA_PROFILE_FULL, 1007 HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT, 1008 NULL, &prog_handle); 1009 if (status != HSA_STATUS_SUCCESS) 1010 hsa_fatal ("Could not create an HSA program", status); 1011 1012 HSA_DEBUG ("Created a finalized program\n"); 1013 1014 struct module_info *module = agent->first_module; 1015 while (module) 1016 { 1017 status = hsa_fns.hsa_ext_program_add_module_fn 1018 (prog_handle, module->image_desc->brig_module); 1019 if (status != HSA_STATUS_SUCCESS) 1020 hsa_fatal ("Could not add a module to the HSA program", status); 1021 module = module->next; 1022 mi++; 1023 } 1024 1025 /* Load all shared libraries. */ 1026 const char *libraries[] = { "libhsamath.so", "libhsastd.so" }; 1027 const unsigned libraries_count = sizeof (libraries) / sizeof (const char *); 1028 1029 agent->brig_libraries_count = libraries_count; 1030 agent->brig_libraries = GOMP_PLUGIN_malloc_cleared 1031 (sizeof (struct brig_library_info) * libraries_count); 1032 1033 for (unsigned i = 0; i < libraries_count; i++) 1034 { 1035 struct brig_library_info *library = add_shared_library (libraries[i], 1036 agent); 1037 if (library == NULL) 1038 { 1039 HSA_WARNING ("Could not open a shared BRIG library: %s\n", 1040 libraries[i]); 1041 continue; 1042 } 1043 1044 status = hsa_fns.hsa_ext_program_add_module_fn (prog_handle, 1045 library->image); 1046 if (status != HSA_STATUS_SUCCESS) 1047 hsa_warn ("Could not add a shared BRIG library the HSA program", 1048 status); 1049 else 1050 HSA_DEBUG ("a shared BRIG library has been added to a program: %s\n", 1051 libraries[i]); 1052 } 1053 1054 hsa_ext_control_directives_t control_directives; 1055 memset (&control_directives, 0, sizeof (control_directives)); 1056 hsa_code_object_t code_object; 1057 status = hsa_fns.hsa_ext_program_finalize_fn 1058 (prog_handle, agent->isa,HSA_EXT_FINALIZER_CALL_CONVENTION_AUTO, 1059 control_directives, "", HSA_CODE_OBJECT_TYPE_PROGRAM, &code_object); 1060 if (status != HSA_STATUS_SUCCESS) 1061 { 1062 hsa_warn ("Finalization of the HSA program failed", status); 1063 goto failure; 1064 } 1065 1066 HSA_DEBUG ("Finalization done\n"); 1067 hsa_fns.hsa_ext_program_destroy_fn (prog_handle); 1068 1069 status 1070 = hsa_fns.hsa_executable_create_fn (HSA_PROFILE_FULL, 1071 HSA_EXECUTABLE_STATE_UNFROZEN, 1072 "", &agent->executable); 1073 if (status != HSA_STATUS_SUCCESS) 1074 hsa_fatal ("Could not create HSA executable", status); 1075 1076 module = agent->first_module; 1077 while (module) 1078 { 1079 /* Initialize all global variables declared in the module. */ 1080 for (unsigned i = 0; i < module->image_desc->global_variable_count; i++) 1081 { 1082 struct global_var_info *var; 1083 var = &module->image_desc->global_variables[i]; 1084 status = hsa_fns.hsa_executable_global_variable_define_fn 1085 (agent->executable, var->name, var->address); 1086 1087 HSA_DEBUG ("Defining global variable: %s, address: %p\n", var->name, 1088 var->address); 1089 1090 if (status != HSA_STATUS_SUCCESS) 1091 hsa_fatal ("Could not define a global variable in the HSA program", 1092 status); 1093 } 1094 1095 module = module->next; 1096 } 1097 1098 status = hsa_fns.hsa_executable_load_code_object_fn (agent->executable, 1099 agent->id, 1100 code_object, ""); 1101 if (status != HSA_STATUS_SUCCESS) 1102 hsa_fatal ("Could not add a code object to the HSA executable", status); 1103 status = hsa_fns.hsa_executable_freeze_fn (agent->executable, ""); 1104 if (status != HSA_STATUS_SUCCESS) 1105 hsa_fatal ("Could not freeze the HSA executable", status); 1106 1107 HSA_DEBUG ("Froze HSA executable with the finalized code object\n"); 1108 1109 /* If all goes good, jump to final. */ 1110 goto final; 1111 1112failure: 1113 agent->prog_finalized_error = true; 1114 1115final: 1116 agent->prog_finalized = true; 1117 1118 if (pthread_mutex_unlock (&agent->prog_mutex)) 1119 GOMP_PLUGIN_fatal ("Could not unlock an HSA agent program mutex"); 1120} 1121 1122/* Create kernel dispatch data structure for given KERNEL. */ 1123 1124static struct GOMP_hsa_kernel_dispatch * 1125create_single_kernel_dispatch (struct kernel_info *kernel, 1126 unsigned omp_data_size) 1127{ 1128 struct agent_info *agent = kernel->agent; 1129 struct GOMP_hsa_kernel_dispatch *shadow 1130 = GOMP_PLUGIN_malloc_cleared (sizeof (struct GOMP_hsa_kernel_dispatch)); 1131 1132 shadow->queue = agent->command_q; 1133 shadow->omp_data_memory 1134 = omp_data_size > 0 ? GOMP_PLUGIN_malloc (omp_data_size) : NULL; 1135 unsigned dispatch_count = kernel->dependencies_count; 1136 shadow->kernel_dispatch_count = dispatch_count; 1137 1138 shadow->children_dispatches 1139 = GOMP_PLUGIN_malloc (dispatch_count * sizeof (shadow)); 1140 1141 shadow->object = kernel->object; 1142 1143 hsa_signal_t sync_signal; 1144 hsa_status_t status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &sync_signal); 1145 if (status != HSA_STATUS_SUCCESS) 1146 hsa_fatal ("Error creating the HSA sync signal", status); 1147 1148 shadow->signal = sync_signal.handle; 1149 shadow->private_segment_size = kernel->private_segment_size; 1150 shadow->group_segment_size = kernel->group_segment_size; 1151 1152 status 1153 = hsa_fns.hsa_memory_allocate_fn (agent->kernarg_region, 1154 kernel->kernarg_segment_size, 1155 &shadow->kernarg_address); 1156 if (status != HSA_STATUS_SUCCESS) 1157 hsa_fatal ("Could not allocate memory for HSA kernel arguments", status); 1158 1159 return shadow; 1160} 1161 1162/* Release data structure created for a kernel dispatch in SHADOW argument. */ 1163 1164static void 1165release_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *shadow) 1166{ 1167 HSA_DEBUG ("Released kernel dispatch: %p has value: %" PRIu64 " (%p)\n", 1168 shadow, (print_uint64_t) shadow->debug, 1169 (void *) (uintptr_t) shadow->debug); 1170 1171 hsa_fns.hsa_memory_free_fn (shadow->kernarg_address); 1172 1173 hsa_signal_t s; 1174 s.handle = shadow->signal; 1175 hsa_fns.hsa_signal_destroy_fn (s); 1176 1177 free (shadow->omp_data_memory); 1178 1179 for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++) 1180 release_kernel_dispatch (shadow->children_dispatches[i]); 1181 1182 free (shadow->children_dispatches); 1183 free (shadow); 1184} 1185 1186/* Initialize a KERNEL without its dependencies. MAX_OMP_DATA_SIZE is used 1187 to calculate maximum necessary memory for OMP data allocation. */ 1188 1189static void 1190init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size) 1191{ 1192 hsa_status_t status; 1193 struct agent_info *agent = kernel->agent; 1194 hsa_executable_symbol_t kernel_symbol; 1195 status = hsa_fns.hsa_executable_get_symbol_fn (agent->executable, NULL, 1196 kernel->name, agent->id, 1197 0, &kernel_symbol); 1198 if (status != HSA_STATUS_SUCCESS) 1199 { 1200 hsa_warn ("Could not find symbol for kernel in the code object", status); 1201 goto failure; 1202 } 1203 HSA_DEBUG ("Located kernel %s\n", kernel->name); 1204 status = hsa_fns.hsa_executable_symbol_get_info_fn 1205 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT, &kernel->object); 1206 if (status != HSA_STATUS_SUCCESS) 1207 hsa_fatal ("Could not extract a kernel object from its symbol", status); 1208 status = hsa_fns.hsa_executable_symbol_get_info_fn 1209 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE, 1210 &kernel->kernarg_segment_size); 1211 if (status != HSA_STATUS_SUCCESS) 1212 hsa_fatal ("Could not get info about kernel argument size", status); 1213 status = hsa_fns.hsa_executable_symbol_get_info_fn 1214 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE, 1215 &kernel->group_segment_size); 1216 if (status != HSA_STATUS_SUCCESS) 1217 hsa_fatal ("Could not get info about kernel group segment size", status); 1218 status = hsa_fns.hsa_executable_symbol_get_info_fn 1219 (kernel_symbol, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE, 1220 &kernel->private_segment_size); 1221 if (status != HSA_STATUS_SUCCESS) 1222 hsa_fatal ("Could not get info about kernel private segment size", 1223 status); 1224 1225 HSA_DEBUG ("Kernel structure for %s fully initialized with " 1226 "following segment sizes: \n", kernel->name); 1227 HSA_DEBUG (" group_segment_size: %u\n", 1228 (unsigned) kernel->group_segment_size); 1229 HSA_DEBUG (" private_segment_size: %u\n", 1230 (unsigned) kernel->private_segment_size); 1231 HSA_DEBUG (" kernarg_segment_size: %u\n", 1232 (unsigned) kernel->kernarg_segment_size); 1233 HSA_DEBUG (" omp_data_size: %u\n", kernel->omp_data_size); 1234 HSA_DEBUG (" gridified_kernel_p: %u\n", kernel->gridified_kernel_p); 1235 1236 if (kernel->omp_data_size > *max_omp_data_size) 1237 *max_omp_data_size = kernel->omp_data_size; 1238 1239 for (unsigned i = 0; i < kernel->dependencies_count; i++) 1240 { 1241 struct kernel_info *dependency 1242 = get_kernel_for_agent (agent, kernel->dependencies[i]); 1243 1244 if (dependency == NULL) 1245 { 1246 HSA_DEBUG ("Could not find a dependency for a kernel: %s, " 1247 "dependency name: %s\n", kernel->name, 1248 kernel->dependencies[i]); 1249 goto failure; 1250 } 1251 1252 if (dependency->dependencies_count > 0) 1253 { 1254 HSA_DEBUG ("HSA does not allow kernel dispatching code with " 1255 "a depth bigger than one\n"); 1256 goto failure; 1257 } 1258 1259 init_single_kernel (dependency, max_omp_data_size); 1260 } 1261 1262 return; 1263 1264failure: 1265 kernel->initialization_failed = true; 1266} 1267 1268/* Indent stream F by INDENT spaces. */ 1269 1270static void 1271indent_stream (FILE *f, unsigned indent) 1272{ 1273 fprintf (f, "%*s", indent, ""); 1274} 1275 1276/* Dump kernel DISPATCH data structure and indent it by INDENT spaces. */ 1277 1278static void 1279print_kernel_dispatch (struct GOMP_hsa_kernel_dispatch *dispatch, unsigned indent) 1280{ 1281 indent_stream (stderr, indent); 1282 fprintf (stderr, "this: %p\n", dispatch); 1283 indent_stream (stderr, indent); 1284 fprintf (stderr, "queue: %p\n", dispatch->queue); 1285 indent_stream (stderr, indent); 1286 fprintf (stderr, "omp_data_memory: %p\n", dispatch->omp_data_memory); 1287 indent_stream (stderr, indent); 1288 fprintf (stderr, "kernarg_address: %p\n", dispatch->kernarg_address); 1289 indent_stream (stderr, indent); 1290 fprintf (stderr, "object: %" PRIu64 "\n", (print_uint64_t) dispatch->object); 1291 indent_stream (stderr, indent); 1292 fprintf (stderr, "signal: %" PRIu64 "\n", (print_uint64_t) dispatch->signal); 1293 indent_stream (stderr, indent); 1294 fprintf (stderr, "private_segment_size: %u\n", 1295 dispatch->private_segment_size); 1296 indent_stream (stderr, indent); 1297 fprintf (stderr, "group_segment_size: %u\n", 1298 dispatch->group_segment_size); 1299 indent_stream (stderr, indent); 1300 fprintf (stderr, "children dispatches: %" PRIu64 "\n", 1301 (print_uint64_t) dispatch->kernel_dispatch_count); 1302 indent_stream (stderr, indent); 1303 fprintf (stderr, "omp_num_threads: %u\n", 1304 dispatch->omp_num_threads); 1305 fprintf (stderr, "\n"); 1306 1307 for (unsigned i = 0; i < dispatch->kernel_dispatch_count; i++) 1308 print_kernel_dispatch (dispatch->children_dispatches[i], indent + 2); 1309} 1310 1311/* Create kernel dispatch data structure for a KERNEL and all its 1312 dependencies. */ 1313 1314static struct GOMP_hsa_kernel_dispatch * 1315create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size) 1316{ 1317 struct GOMP_hsa_kernel_dispatch *shadow 1318 = create_single_kernel_dispatch (kernel, omp_data_size); 1319 shadow->omp_num_threads = 64; 1320 shadow->debug = 0; 1321 shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0; 1322 1323 /* Create kernel dispatch data structures. We do not allow to have 1324 a kernel dispatch with depth bigger than one. */ 1325 for (unsigned i = 0; i < kernel->dependencies_count; i++) 1326 { 1327 struct kernel_info *dependency 1328 = get_kernel_for_agent (kernel->agent, kernel->dependencies[i]); 1329 shadow->children_dispatches[i] 1330 = create_single_kernel_dispatch (dependency, omp_data_size); 1331 shadow->children_dispatches[i]->queue 1332 = kernel->agent->kernel_dispatch_command_q; 1333 shadow->children_dispatches[i]->omp_level = 1; 1334 } 1335 1336 return shadow; 1337} 1338 1339/* Do all the work that is necessary before running KERNEL for the first time. 1340 The function assumes the program has been created, finalized and frozen by 1341 create_and_finalize_hsa_program. */ 1342 1343static void 1344init_kernel (struct kernel_info *kernel) 1345{ 1346 if (pthread_mutex_lock (&kernel->init_mutex)) 1347 GOMP_PLUGIN_fatal ("Could not lock an HSA kernel initialization mutex"); 1348 if (kernel->initialized) 1349 { 1350 if (pthread_mutex_unlock (&kernel->init_mutex)) 1351 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization " 1352 "mutex"); 1353 1354 return; 1355 } 1356 1357 /* Precomputed maximum size of OMP data necessary for a kernel from kernel 1358 dispatch operation. */ 1359 init_single_kernel (kernel, &kernel->max_omp_data_size); 1360 1361 if (!kernel->initialization_failed) 1362 HSA_DEBUG ("\n"); 1363 1364 kernel->initialized = true; 1365 if (pthread_mutex_unlock (&kernel->init_mutex)) 1366 GOMP_PLUGIN_fatal ("Could not unlock an HSA kernel initialization " 1367 "mutex"); 1368} 1369 1370/* Parse the target attributes INPUT provided by the compiler and return true 1371 if we should run anything all. If INPUT is NULL, fill DEF with default 1372 values, then store INPUT or DEF into *RESULT. */ 1373 1374static bool 1375parse_target_attributes (void **input, 1376 struct GOMP_kernel_launch_attributes *def, 1377 struct GOMP_kernel_launch_attributes **result) 1378{ 1379 if (!input) 1380 GOMP_PLUGIN_fatal ("No target arguments provided"); 1381 1382 bool attrs_found = false; 1383 while (*input) 1384 { 1385 uintptr_t id = (uintptr_t) *input; 1386 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) == GOMP_DEVICE_HSA 1387 && ((id & GOMP_TARGET_ARG_ID_MASK) 1388 == GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES)) 1389 { 1390 input++; 1391 attrs_found = true; 1392 break; 1393 } 1394 1395 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) 1396 input++; 1397 input++; 1398 } 1399 1400 if (!attrs_found) 1401 { 1402 def->ndim = 1; 1403 def->gdims[0] = 1; 1404 def->gdims[1] = 1; 1405 def->gdims[2] = 1; 1406 def->wdims[0] = 1; 1407 def->wdims[1] = 1; 1408 def->wdims[2] = 1; 1409 *result = def; 1410 HSA_DEBUG ("GOMP_OFFLOAD_run called with no launch attributes\n"); 1411 return true; 1412 } 1413 1414 struct GOMP_kernel_launch_attributes *kla; 1415 kla = (struct GOMP_kernel_launch_attributes *) *input; 1416 *result = kla; 1417 if (kla->ndim == 0 || kla->ndim > 3) 1418 GOMP_PLUGIN_fatal ("Invalid number of dimensions (%u)", kla->ndim); 1419 1420 HSA_DEBUG ("GOMP_OFFLOAD_run called with %u dimensions:\n", kla->ndim); 1421 unsigned i; 1422 for (i = 0; i < kla->ndim; i++) 1423 { 1424 HSA_DEBUG (" Dimension %u: grid size %u and group size %u\n", i, 1425 kla->gdims[i], kla->wdims[i]); 1426 if (kla->gdims[i] == 0) 1427 return false; 1428 } 1429 return true; 1430} 1431 1432/* Return the group size given the requested GROUP size, GRID size and number 1433 of grid dimensions NDIM. */ 1434 1435static uint32_t 1436get_group_size (uint32_t ndim, uint32_t grid, uint32_t group) 1437{ 1438 if (group == 0) 1439 { 1440 /* TODO: Provide a default via environment or device characteristics. */ 1441 if (ndim == 1) 1442 group = 64; 1443 else if (ndim == 2) 1444 group = 8; 1445 else 1446 group = 4; 1447 } 1448 1449 if (group > grid) 1450 group = grid; 1451 return group; 1452} 1453 1454/* Return true if the HSA runtime can run function FN_PTR. */ 1455 1456bool 1457GOMP_OFFLOAD_can_run (void *fn_ptr) 1458{ 1459 struct kernel_info *kernel = (struct kernel_info *) fn_ptr; 1460 struct agent_info *agent = kernel->agent; 1461 create_and_finalize_hsa_program (agent); 1462 1463 if (agent->prog_finalized_error) 1464 goto failure; 1465 1466 init_kernel (kernel); 1467 if (kernel->initialization_failed) 1468 goto failure; 1469 1470 return true; 1471 1472failure: 1473 if (suppress_host_fallback) 1474 GOMP_PLUGIN_fatal ("HSA host fallback has been suppressed"); 1475 HSA_DEBUG ("HSA target cannot be launched, doing a host fallback\n"); 1476 return false; 1477} 1478 1479/* Atomically store pair of uint16_t values (HEADER and REST) to a PACKET. */ 1480 1481void 1482packet_store_release (uint32_t* packet, uint16_t header, uint16_t rest) 1483{ 1484 __atomic_store_n (packet, header | (rest << 16), __ATOMIC_RELEASE); 1485} 1486 1487/* Run KERNEL on its agent, pass VARS to it as arguments and take 1488 launchattributes from KLA. */ 1489 1490void 1491run_kernel (struct kernel_info *kernel, void *vars, 1492 struct GOMP_kernel_launch_attributes *kla) 1493{ 1494 struct agent_info *agent = kernel->agent; 1495 if (pthread_rwlock_rdlock (&agent->modules_rwlock)) 1496 GOMP_PLUGIN_fatal ("Unable to read-lock an HSA agent rwlock"); 1497 1498 if (!agent->initialized) 1499 GOMP_PLUGIN_fatal ("Agent must be initialized"); 1500 1501 if (!kernel->initialized) 1502 GOMP_PLUGIN_fatal ("Called kernel must be initialized"); 1503 1504 struct GOMP_hsa_kernel_dispatch *shadow 1505 = create_kernel_dispatch (kernel, kernel->max_omp_data_size); 1506 1507 if (debug) 1508 { 1509 fprintf (stderr, "\nKernel has following dependencies:\n"); 1510 print_kernel_dispatch (shadow, 2); 1511 } 1512 1513 uint64_t index 1514 = hsa_fns.hsa_queue_add_write_index_release_fn (agent->command_q, 1); 1515 HSA_DEBUG ("Got AQL index %llu\n", (long long int) index); 1516 1517 /* Wait until the queue is not full before writing the packet. */ 1518 while (index - hsa_fns.hsa_queue_load_read_index_acquire_fn (agent->command_q) 1519 >= agent->command_q->size) 1520 ; 1521 1522 hsa_kernel_dispatch_packet_t *packet; 1523 packet = ((hsa_kernel_dispatch_packet_t *) agent->command_q->base_address) 1524 + index % agent->command_q->size; 1525 1526 memset (((uint8_t *) packet) + 4, 0, sizeof (*packet) - 4); 1527 packet->grid_size_x = kla->gdims[0]; 1528 packet->workgroup_size_x = get_group_size (kla->ndim, kla->gdims[0], 1529 kla->wdims[0]); 1530 1531 if (kla->ndim >= 2) 1532 { 1533 packet->grid_size_y = kla->gdims[1]; 1534 packet->workgroup_size_y = get_group_size (kla->ndim, kla->gdims[1], 1535 kla->wdims[1]); 1536 } 1537 else 1538 { 1539 packet->grid_size_y = 1; 1540 packet->workgroup_size_y = 1; 1541 } 1542 1543 if (kla->ndim == 3) 1544 { 1545 packet->grid_size_z = kla->gdims[2]; 1546 packet->workgroup_size_z = get_group_size (kla->ndim, kla->gdims[2], 1547 kla->wdims[2]); 1548 } 1549 else 1550 { 1551 packet->grid_size_z = 1; 1552 packet->workgroup_size_z = 1; 1553 } 1554 1555 packet->private_segment_size = kernel->private_segment_size; 1556 packet->group_segment_size = kernel->group_segment_size; 1557 packet->kernel_object = kernel->object; 1558 packet->kernarg_address = shadow->kernarg_address; 1559 hsa_signal_t s; 1560 s.handle = shadow->signal; 1561 packet->completion_signal = s; 1562 hsa_fns.hsa_signal_store_relaxed_fn (s, 1); 1563 memcpy (shadow->kernarg_address, &vars, sizeof (vars)); 1564 1565 /* PR hsa/70337. */ 1566 size_t vars_size = sizeof (vars); 1567 if (kernel->kernarg_segment_size > vars_size) 1568 { 1569 if (kernel->kernarg_segment_size != vars_size 1570 + sizeof (struct hsa_kernel_runtime *)) 1571 GOMP_PLUGIN_fatal ("Kernel segment size has an unexpected value"); 1572 memcpy (packet->kernarg_address + vars_size, &shadow, 1573 sizeof (struct hsa_kernel_runtime *)); 1574 } 1575 1576 HSA_DEBUG ("Copying kernel runtime pointer to kernarg_address\n"); 1577 1578 uint16_t header; 1579 header = HSA_PACKET_TYPE_KERNEL_DISPATCH << HSA_PACKET_HEADER_TYPE; 1580 header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE; 1581 header |= HSA_FENCE_SCOPE_SYSTEM << HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE; 1582 1583 HSA_DEBUG ("Going to dispatch kernel %s\n", kernel->name); 1584 1585 packet_store_release ((uint32_t *) packet, header, 1586 (uint16_t) kla->ndim << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS); 1587 1588 hsa_fns.hsa_signal_store_release_fn (agent->command_q->doorbell_signal, 1589 index); 1590 1591 /* TODO: GPU agents in Carrizo APUs cannot properly update L2 cache for 1592 signal wait and signal load operations on their own and we need to 1593 periodically call the hsa_signal_load_acquire on completion signals of 1594 children kernels in the CPU to make that happen. As soon the 1595 limitation will be resolved, this workaround can be removed. */ 1596 1597 HSA_DEBUG ("Kernel dispatched, waiting for completion\n"); 1598 1599 /* Root signal waits with 1ms timeout. */ 1600 while (hsa_fns.hsa_signal_wait_acquire_fn (s, HSA_SIGNAL_CONDITION_LT, 1, 1601 1000 * 1000, 1602 HSA_WAIT_STATE_BLOCKED) != 0) 1603 for (unsigned i = 0; i < shadow->kernel_dispatch_count; i++) 1604 { 1605 hsa_signal_t child_s; 1606 child_s.handle = shadow->children_dispatches[i]->signal; 1607 1608 HSA_DEBUG ("Waiting for children completion signal: %" PRIu64 "\n", 1609 (print_uint64_t) shadow->children_dispatches[i]->signal); 1610 hsa_fns.hsa_signal_load_acquire_fn (child_s); 1611 } 1612 1613 release_kernel_dispatch (shadow); 1614 1615 if (pthread_rwlock_unlock (&agent->modules_rwlock)) 1616 GOMP_PLUGIN_fatal ("Unable to unlock an HSA agent rwlock"); 1617} 1618 1619/* Part of the libgomp plugin interface. Run a kernel on device N (the number 1620 is actually ignored, we assume the FN_PTR has been mapped using the correct 1621 device) and pass it an array of pointers in VARS as a parameter. The kernel 1622 is identified by FN_PTR which must point to a kernel_info structure. */ 1623 1624void 1625GOMP_OFFLOAD_run (int n __attribute__((unused)), 1626 void *fn_ptr, void *vars, void **args) 1627{ 1628 struct kernel_info *kernel = (struct kernel_info *) fn_ptr; 1629 struct GOMP_kernel_launch_attributes def; 1630 struct GOMP_kernel_launch_attributes *kla; 1631 if (!parse_target_attributes (args, &def, &kla)) 1632 { 1633 HSA_DEBUG ("Will not run HSA kernel because the grid size is zero\n"); 1634 return; 1635 } 1636 run_kernel (kernel, vars, kla); 1637} 1638 1639/* Information to be passed to a thread running a kernel asycnronously. */ 1640 1641struct async_run_info 1642{ 1643 int device; 1644 void *tgt_fn; 1645 void *tgt_vars; 1646 void **args; 1647 void *async_data; 1648}; 1649 1650/* Thread routine to run a kernel asynchronously. */ 1651 1652static void * 1653run_kernel_asynchronously (void *thread_arg) 1654{ 1655 struct async_run_info *info = (struct async_run_info *) thread_arg; 1656 int device = info->device; 1657 void *tgt_fn = info->tgt_fn; 1658 void *tgt_vars = info->tgt_vars; 1659 void **args = info->args; 1660 void *async_data = info->async_data; 1661 1662 free (info); 1663 GOMP_OFFLOAD_run (device, tgt_fn, tgt_vars, args); 1664 GOMP_PLUGIN_target_task_completion (async_data); 1665 return NULL; 1666} 1667 1668/* Part of the libgomp plugin interface. Run a kernel like GOMP_OFFLOAD_run 1669 does, but asynchronously and call GOMP_PLUGIN_target_task_completion when it 1670 has finished. */ 1671 1672void 1673GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars, 1674 void **args, void *async_data) 1675{ 1676 pthread_t pt; 1677 struct async_run_info *info; 1678 HSA_DEBUG ("GOMP_OFFLOAD_async_run invoked\n"); 1679 info = GOMP_PLUGIN_malloc (sizeof (struct async_run_info)); 1680 1681 info->device = device; 1682 info->tgt_fn = tgt_fn; 1683 info->tgt_vars = tgt_vars; 1684 info->args = args; 1685 info->async_data = async_data; 1686 1687 int err = pthread_create (&pt, NULL, &run_kernel_asynchronously, info); 1688 if (err != 0) 1689 GOMP_PLUGIN_fatal ("HSA asynchronous thread creation failed: %s", 1690 strerror (err)); 1691 err = pthread_detach (pt); 1692 if (err != 0) 1693 GOMP_PLUGIN_fatal ("Failed to detach a thread to run HSA kernel " 1694 "asynchronously: %s", strerror (err)); 1695} 1696 1697/* Deinitialize all information associated with MODULE and kernels within 1698 it. Return TRUE on success. */ 1699 1700static bool 1701destroy_module (struct module_info *module) 1702{ 1703 int i; 1704 for (i = 0; i < module->kernel_count; i++) 1705 if (pthread_mutex_destroy (&module->kernels[i].init_mutex)) 1706 { 1707 GOMP_PLUGIN_error ("Failed to destroy an HSA kernel initialization " 1708 "mutex"); 1709 return false; 1710 } 1711 return true; 1712} 1713 1714/* Part of the libgomp plugin interface. Unload BRIG module described by 1715 struct brig_image_desc in TARGET_DATA from agent number N. Return 1716 TRUE on success. */ 1717 1718bool 1719GOMP_OFFLOAD_unload_image (int n, unsigned version, const void *target_data) 1720{ 1721 if (GOMP_VERSION_DEV (version) > GOMP_VERSION_HSA) 1722 { 1723 GOMP_PLUGIN_error ("Offload data incompatible with HSA plugin" 1724 " (expected %u, received %u)", 1725 GOMP_VERSION_HSA, GOMP_VERSION_DEV (version)); 1726 return false; 1727 } 1728 1729 struct agent_info *agent; 1730 agent = get_agent_info (n); 1731 if (!agent) 1732 return false; 1733 1734 if (pthread_rwlock_wrlock (&agent->modules_rwlock)) 1735 { 1736 GOMP_PLUGIN_error ("Unable to write-lock an HSA agent rwlock"); 1737 return false; 1738 } 1739 struct module_info *module = agent->first_module; 1740 while (module) 1741 { 1742 if (module->image_desc == target_data) 1743 break; 1744 module = module->next; 1745 } 1746 if (!module) 1747 { 1748 GOMP_PLUGIN_error ("Attempt to unload an image that has never been " 1749 "loaded before"); 1750 return false; 1751 } 1752 1753 remove_module_from_agent (agent, module); 1754 if (!destroy_module (module)) 1755 return false; 1756 free (module); 1757 if (!destroy_hsa_program (agent)) 1758 return false; 1759 if (pthread_rwlock_unlock (&agent->modules_rwlock)) 1760 { 1761 GOMP_PLUGIN_error ("Unable to unlock an HSA agent rwlock"); 1762 return false; 1763 } 1764 return true; 1765} 1766 1767/* Part of the libgomp plugin interface. Deinitialize all information and 1768 status associated with agent number N. We do not attempt any 1769 synchronization, assuming the user and libgomp will not attempt 1770 deinitialization of a device that is in any way being used at the same 1771 time. Return TRUE on success. */ 1772 1773bool 1774GOMP_OFFLOAD_fini_device (int n) 1775{ 1776 struct agent_info *agent = get_agent_info (n); 1777 if (!agent) 1778 return false; 1779 1780 if (!agent->initialized) 1781 return true; 1782 1783 struct module_info *next_module = agent->first_module; 1784 while (next_module) 1785 { 1786 struct module_info *module = next_module; 1787 next_module = module->next; 1788 if (!destroy_module (module)) 1789 return false; 1790 free (module); 1791 } 1792 agent->first_module = NULL; 1793 if (!destroy_hsa_program (agent)) 1794 return false; 1795 1796 release_agent_shared_libraries (agent); 1797 1798 hsa_status_t status = hsa_fns.hsa_queue_destroy_fn (agent->command_q); 1799 if (status != HSA_STATUS_SUCCESS) 1800 return hsa_error ("Error destroying command queue", status); 1801 status = hsa_fns.hsa_queue_destroy_fn (agent->kernel_dispatch_command_q); 1802 if (status != HSA_STATUS_SUCCESS) 1803 return hsa_error ("Error destroying kernel dispatch command queue", status); 1804 if (pthread_mutex_destroy (&agent->prog_mutex)) 1805 { 1806 GOMP_PLUGIN_error ("Failed to destroy an HSA agent program mutex"); 1807 return false; 1808 } 1809 if (pthread_rwlock_destroy (&agent->modules_rwlock)) 1810 { 1811 GOMP_PLUGIN_error ("Failed to destroy an HSA agent rwlock"); 1812 return false; 1813 } 1814 agent->initialized = false; 1815 return true; 1816} 1817 1818/* Part of the libgomp plugin interface. Not implemented as it is not required 1819 for HSA. */ 1820 1821void * 1822GOMP_OFFLOAD_alloc (int ord, size_t size) 1823{ 1824 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_alloc is not implemented because " 1825 "it should never be called"); 1826 return NULL; 1827} 1828 1829/* Part of the libgomp plugin interface. Not implemented as it is not required 1830 for HSA. */ 1831 1832bool 1833GOMP_OFFLOAD_free (int ord, void *ptr) 1834{ 1835 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_free is not implemented because " 1836 "it should never be called"); 1837 return false; 1838} 1839 1840/* Part of the libgomp plugin interface. Not implemented as it is not required 1841 for HSA. */ 1842 1843bool 1844GOMP_OFFLOAD_dev2host (int ord, void *dst, const void *src, size_t n) 1845{ 1846 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2host is not implemented because " 1847 "it should never be called"); 1848 return false; 1849} 1850 1851/* Part of the libgomp plugin interface. Not implemented as it is not required 1852 for HSA. */ 1853 1854bool 1855GOMP_OFFLOAD_host2dev (int ord, void *dst, const void *src, size_t n) 1856{ 1857 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_host2dev is not implemented because " 1858 "it should never be called"); 1859 return false; 1860} 1861 1862/* Part of the libgomp plugin interface. Not implemented as it is not required 1863 for HSA. */ 1864 1865bool 1866GOMP_OFFLOAD_dev2dev (int ord, void *dst, const void *src, size_t n) 1867{ 1868 GOMP_PLUGIN_error ("HSA GOMP_OFFLOAD_dev2dev is not implemented because " 1869 "it should never be called"); 1870 return false; 1871} 1872