1/* Copyright (C) 2013-2015 Free Software Foundation, Inc. 2 Contributed by Jakub Jelinek <jakub@redhat.com>. 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26/* This file contains the support of offloading. */ 27 28#include "config.h" 29#include "libgomp.h" 30#include "oacc-plugin.h" 31#include "oacc-int.h" 32#include "gomp-constants.h" 33#include <limits.h> 34#include <stdbool.h> 35#include <stdlib.h> 36#ifdef HAVE_INTTYPES_H 37# include <inttypes.h> /* For PRIu64. */ 38#endif 39#include <string.h> 40#include <assert.h> 41 42#ifdef PLUGIN_SUPPORT 43#include <dlfcn.h> 44#include "plugin-suffix.h" 45#endif 46 47static void gomp_target_init (void); 48 49/* The whole initialization code for offloading plugins is only run one. */ 50static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; 51 52/* Mutex for offload image registration. */ 53static gomp_mutex_t register_lock; 54 55/* This structure describes an offload image. 56 It contains type of the target device, pointer to host table descriptor, and 57 pointer to target data. */ 58struct offload_image_descr { 59 enum offload_target_type type; 60 void *host_table; 61 void *target_data; 62}; 63 64/* Array of descriptors of offload images. */ 65static struct offload_image_descr *offload_images; 66 67/* Total number of offload images. */ 68static int num_offload_images; 69 70/* Array of descriptors for all available devices. */ 71static struct gomp_device_descr *devices; 72 73/* Total number of available devices. */ 74static int num_devices; 75 76/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ 77static int num_devices_openmp; 78 79/* Similar to gomp_realloc, but release register_lock before gomp_fatal. */ 80 81static void * 82gomp_realloc_unlock (void *old, size_t size) 83{ 84 void *ret = realloc (old, size); 85 if (ret == NULL) 86 { 87 gomp_mutex_unlock (®ister_lock); 88 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); 89 } 90 return ret; 91} 92 93/* The comparison function. */ 94 95attribute_hidden int 96splay_compare (splay_tree_key x, splay_tree_key y) 97{ 98 if (x->host_start == x->host_end 99 && y->host_start == y->host_end) 100 return 0; 101 if (x->host_end <= y->host_start) 102 return -1; 103 if (x->host_start >= y->host_end) 104 return 1; 105 return 0; 106} 107 108#include "splay-tree.h" 109 110attribute_hidden void 111gomp_init_targets_once (void) 112{ 113 (void) pthread_once (&gomp_is_initialized, gomp_target_init); 114} 115 116attribute_hidden int 117gomp_get_num_devices (void) 118{ 119 gomp_init_targets_once (); 120 return num_devices_openmp; 121} 122 123static struct gomp_device_descr * 124resolve_device (int device_id) 125{ 126 if (device_id == GOMP_DEVICE_ICV) 127 { 128 struct gomp_task_icv *icv = gomp_icv (false); 129 device_id = icv->default_device_var; 130 } 131 132 if (device_id < 0 || device_id >= gomp_get_num_devices ()) 133 return NULL; 134 135 return &devices[device_id]; 136} 137 138 139/* Handle the case where splay_tree_lookup found oldn for newn. 140 Helper function of gomp_map_vars. */ 141 142static inline void 143gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn, 144 splay_tree_key newn, unsigned char kind) 145{ 146 if ((kind & GOMP_MAP_FLAG_FORCE) 147 || oldn->host_start > newn->host_start 148 || oldn->host_end < newn->host_end) 149 { 150 gomp_mutex_unlock (&devicep->lock); 151 gomp_fatal ("Trying to map into device [%p..%p) object when " 152 "[%p..%p) is already mapped", 153 (void *) newn->host_start, (void *) newn->host_end, 154 (void *) oldn->host_start, (void *) oldn->host_end); 155 } 156 oldn->refcount++; 157} 158 159static int 160get_kind (bool is_openacc, void *kinds, int idx) 161{ 162 return is_openacc ? ((unsigned short *) kinds)[idx] 163 : ((unsigned char *) kinds)[idx]; 164} 165 166attribute_hidden struct target_mem_desc * 167gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, 168 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, 169 bool is_openacc, bool is_target) 170{ 171 size_t i, tgt_align, tgt_size, not_found_cnt = 0; 172 const int rshift = is_openacc ? 8 : 3; 173 const int typemask = is_openacc ? 0xff : 0x7; 174 struct splay_tree_s *mem_map = &devicep->mem_map; 175 struct splay_tree_key_s cur_node; 176 struct target_mem_desc *tgt 177 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); 178 tgt->list_count = mapnum; 179 tgt->refcount = 1; 180 tgt->device_descr = devicep; 181 182 if (mapnum == 0) 183 return tgt; 184 185 tgt_align = sizeof (void *); 186 tgt_size = 0; 187 if (is_target) 188 { 189 size_t align = 4 * sizeof (void *); 190 tgt_align = align; 191 tgt_size = mapnum * sizeof (void *); 192 } 193 194 gomp_mutex_lock (&devicep->lock); 195 196 for (i = 0; i < mapnum; i++) 197 { 198 int kind = get_kind (is_openacc, kinds, i); 199 if (hostaddrs[i] == NULL) 200 { 201 tgt->list[i] = NULL; 202 continue; 203 } 204 cur_node.host_start = (uintptr_t) hostaddrs[i]; 205 if (!GOMP_MAP_POINTER_P (kind & typemask)) 206 cur_node.host_end = cur_node.host_start + sizes[i]; 207 else 208 cur_node.host_end = cur_node.host_start + sizeof (void *); 209 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); 210 if (n) 211 { 212 tgt->list[i] = n; 213 gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask); 214 } 215 else 216 { 217 tgt->list[i] = NULL; 218 219 size_t align = (size_t) 1 << (kind >> rshift); 220 not_found_cnt++; 221 if (tgt_align < align) 222 tgt_align = align; 223 tgt_size = (tgt_size + align - 1) & ~(align - 1); 224 tgt_size += cur_node.host_end - cur_node.host_start; 225 if ((kind & typemask) == GOMP_MAP_TO_PSET) 226 { 227 size_t j; 228 for (j = i + 1; j < mapnum; j++) 229 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) 230 & typemask)) 231 break; 232 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start 233 || ((uintptr_t) hostaddrs[j] + sizeof (void *) 234 > cur_node.host_end)) 235 break; 236 else 237 { 238 tgt->list[j] = NULL; 239 i++; 240 } 241 } 242 } 243 } 244 245 if (devaddrs) 246 { 247 if (mapnum != 1) 248 { 249 gomp_mutex_unlock (&devicep->lock); 250 gomp_fatal ("unexpected aggregation"); 251 } 252 tgt->to_free = devaddrs[0]; 253 tgt->tgt_start = (uintptr_t) tgt->to_free; 254 tgt->tgt_end = tgt->tgt_start + sizes[0]; 255 } 256 else if (not_found_cnt || is_target) 257 { 258 /* Allocate tgt_align aligned tgt_size block of memory. */ 259 /* FIXME: Perhaps change interface to allocate properly aligned 260 memory. */ 261 tgt->to_free = devicep->alloc_func (devicep->target_id, 262 tgt_size + tgt_align - 1); 263 tgt->tgt_start = (uintptr_t) tgt->to_free; 264 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); 265 tgt->tgt_end = tgt->tgt_start + tgt_size; 266 } 267 else 268 { 269 tgt->to_free = NULL; 270 tgt->tgt_start = 0; 271 tgt->tgt_end = 0; 272 } 273 274 tgt_size = 0; 275 if (is_target) 276 tgt_size = mapnum * sizeof (void *); 277 278 tgt->array = NULL; 279 if (not_found_cnt) 280 { 281 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); 282 splay_tree_node array = tgt->array; 283 size_t j; 284 285 for (i = 0; i < mapnum; i++) 286 if (tgt->list[i] == NULL) 287 { 288 int kind = get_kind (is_openacc, kinds, i); 289 if (hostaddrs[i] == NULL) 290 continue; 291 splay_tree_key k = &array->key; 292 k->host_start = (uintptr_t) hostaddrs[i]; 293 if (!GOMP_MAP_POINTER_P (kind & typemask)) 294 k->host_end = k->host_start + sizes[i]; 295 else 296 k->host_end = k->host_start + sizeof (void *); 297 splay_tree_key n = splay_tree_lookup (mem_map, k); 298 if (n) 299 { 300 tgt->list[i] = n; 301 gomp_map_vars_existing (devicep, n, k, kind & typemask); 302 } 303 else 304 { 305 size_t align = (size_t) 1 << (kind >> rshift); 306 tgt->list[i] = k; 307 tgt_size = (tgt_size + align - 1) & ~(align - 1); 308 k->tgt = tgt; 309 k->tgt_offset = tgt_size; 310 tgt_size += k->host_end - k->host_start; 311 k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); 312 k->refcount = 1; 313 k->async_refcount = 0; 314 tgt->refcount++; 315 array->left = NULL; 316 array->right = NULL; 317 splay_tree_insert (mem_map, array); 318 switch (kind & typemask) 319 { 320 case GOMP_MAP_ALLOC: 321 case GOMP_MAP_FROM: 322 case GOMP_MAP_FORCE_ALLOC: 323 case GOMP_MAP_FORCE_FROM: 324 break; 325 case GOMP_MAP_TO: 326 case GOMP_MAP_TOFROM: 327 case GOMP_MAP_FORCE_TO: 328 case GOMP_MAP_FORCE_TOFROM: 329 /* FIXME: Perhaps add some smarts, like if copying 330 several adjacent fields from host to target, use some 331 host buffer to avoid sending each var individually. */ 332 devicep->host2dev_func (devicep->target_id, 333 (void *) (tgt->tgt_start 334 + k->tgt_offset), 335 (void *) k->host_start, 336 k->host_end - k->host_start); 337 break; 338 case GOMP_MAP_POINTER: 339 cur_node.host_start 340 = (uintptr_t) *(void **) k->host_start; 341 if (cur_node.host_start == (uintptr_t) NULL) 342 { 343 cur_node.tgt_offset = (uintptr_t) NULL; 344 /* FIXME: see above FIXME comment. */ 345 devicep->host2dev_func (devicep->target_id, 346 (void *) (tgt->tgt_start 347 + k->tgt_offset), 348 (void *) &cur_node.tgt_offset, 349 sizeof (void *)); 350 break; 351 } 352 /* Add bias to the pointer value. */ 353 cur_node.host_start += sizes[i]; 354 cur_node.host_end = cur_node.host_start + 1; 355 n = splay_tree_lookup (mem_map, &cur_node); 356 if (n == NULL) 357 { 358 /* Could be possibly zero size array section. */ 359 cur_node.host_end--; 360 n = splay_tree_lookup (mem_map, &cur_node); 361 if (n == NULL) 362 { 363 cur_node.host_start--; 364 n = splay_tree_lookup (mem_map, &cur_node); 365 cur_node.host_start++; 366 } 367 } 368 if (n == NULL) 369 { 370 gomp_mutex_unlock (&devicep->lock); 371 gomp_fatal ("Pointer target of array section " 372 "wasn't mapped"); 373 } 374 cur_node.host_start -= n->host_start; 375 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset 376 + cur_node.host_start; 377 /* At this point tgt_offset is target address of the 378 array section. Now subtract bias to get what we want 379 to initialize the pointer with. */ 380 cur_node.tgt_offset -= sizes[i]; 381 /* FIXME: see above FIXME comment. */ 382 devicep->host2dev_func (devicep->target_id, 383 (void *) (tgt->tgt_start 384 + k->tgt_offset), 385 (void *) &cur_node.tgt_offset, 386 sizeof (void *)); 387 break; 388 case GOMP_MAP_TO_PSET: 389 /* FIXME: see above FIXME comment. */ 390 devicep->host2dev_func (devicep->target_id, 391 (void *) (tgt->tgt_start 392 + k->tgt_offset), 393 (void *) k->host_start, 394 k->host_end - k->host_start); 395 396 for (j = i + 1; j < mapnum; j++) 397 if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j) 398 & typemask)) 399 break; 400 else if ((uintptr_t) hostaddrs[j] < k->host_start 401 || ((uintptr_t) hostaddrs[j] + sizeof (void *) 402 > k->host_end)) 403 break; 404 else 405 { 406 tgt->list[j] = k; 407 k->refcount++; 408 cur_node.host_start 409 = (uintptr_t) *(void **) hostaddrs[j]; 410 if (cur_node.host_start == (uintptr_t) NULL) 411 { 412 cur_node.tgt_offset = (uintptr_t) NULL; 413 /* FIXME: see above FIXME comment. */ 414 devicep->host2dev_func (devicep->target_id, 415 (void *) (tgt->tgt_start + k->tgt_offset 416 + ((uintptr_t) hostaddrs[j] 417 - k->host_start)), 418 (void *) &cur_node.tgt_offset, 419 sizeof (void *)); 420 i++; 421 continue; 422 } 423 /* Add bias to the pointer value. */ 424 cur_node.host_start += sizes[j]; 425 cur_node.host_end = cur_node.host_start + 1; 426 n = splay_tree_lookup (mem_map, &cur_node); 427 if (n == NULL) 428 { 429 /* Could be possibly zero size array section. */ 430 cur_node.host_end--; 431 n = splay_tree_lookup (mem_map, &cur_node); 432 if (n == NULL) 433 { 434 cur_node.host_start--; 435 n = splay_tree_lookup (mem_map, &cur_node); 436 cur_node.host_start++; 437 } 438 } 439 if (n == NULL) 440 { 441 gomp_mutex_unlock (&devicep->lock); 442 gomp_fatal ("Pointer target of array section " 443 "wasn't mapped"); 444 } 445 cur_node.host_start -= n->host_start; 446 cur_node.tgt_offset = n->tgt->tgt_start 447 + n->tgt_offset 448 + cur_node.host_start; 449 /* At this point tgt_offset is target address of the 450 array section. Now subtract bias to get what we 451 want to initialize the pointer with. */ 452 cur_node.tgt_offset -= sizes[j]; 453 /* FIXME: see above FIXME comment. */ 454 devicep->host2dev_func (devicep->target_id, 455 (void *) (tgt->tgt_start + k->tgt_offset 456 + ((uintptr_t) hostaddrs[j] 457 - k->host_start)), 458 (void *) &cur_node.tgt_offset, 459 sizeof (void *)); 460 i++; 461 } 462 break; 463 case GOMP_MAP_FORCE_PRESENT: 464 { 465 /* We already looked up the memory region above and it 466 was missing. */ 467 size_t size = k->host_end - k->host_start; 468 gomp_mutex_unlock (&devicep->lock); 469#ifdef HAVE_INTTYPES_H 470 gomp_fatal ("present clause: !acc_is_present (%p, " 471 "%"PRIu64" (0x%"PRIx64"))", 472 (void *) k->host_start, 473 (uint64_t) size, (uint64_t) size); 474#else 475 gomp_fatal ("present clause: !acc_is_present (%p, " 476 "%lu (0x%lx))", (void *) k->host_start, 477 (unsigned long) size, (unsigned long) size); 478#endif 479 } 480 break; 481 case GOMP_MAP_FORCE_DEVICEPTR: 482 assert (k->host_end - k->host_start == sizeof (void *)); 483 484 devicep->host2dev_func (devicep->target_id, 485 (void *) (tgt->tgt_start 486 + k->tgt_offset), 487 (void *) k->host_start, 488 sizeof (void *)); 489 break; 490 default: 491 gomp_mutex_unlock (&devicep->lock); 492 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__, 493 kind); 494 } 495 array++; 496 } 497 } 498 } 499 500 if (is_target) 501 { 502 for (i = 0; i < mapnum; i++) 503 { 504 if (tgt->list[i] == NULL) 505 cur_node.tgt_offset = (uintptr_t) NULL; 506 else 507 cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start 508 + tgt->list[i]->tgt_offset; 509 /* FIXME: see above FIXME comment. */ 510 devicep->host2dev_func (devicep->target_id, 511 (void *) (tgt->tgt_start 512 + i * sizeof (void *)), 513 (void *) &cur_node.tgt_offset, 514 sizeof (void *)); 515 } 516 } 517 518 gomp_mutex_unlock (&devicep->lock); 519 return tgt; 520} 521 522static void 523gomp_unmap_tgt (struct target_mem_desc *tgt) 524{ 525 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ 526 if (tgt->tgt_end) 527 tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free); 528 529 free (tgt->array); 530 free (tgt); 531} 532 533/* Decrease the refcount for a set of mapped variables, and queue asychronous 534 copies from the device back to the host after any work that has been issued. 535 Because the regions are still "live", increment an asynchronous reference 536 count to indicate that they should not be unmapped from host-side data 537 structures until the asynchronous copy has completed. */ 538 539attribute_hidden void 540gomp_copy_from_async (struct target_mem_desc *tgt) 541{ 542 struct gomp_device_descr *devicep = tgt->device_descr; 543 size_t i; 544 545 gomp_mutex_lock (&devicep->lock); 546 547 for (i = 0; i < tgt->list_count; i++) 548 if (tgt->list[i] == NULL) 549 ; 550 else if (tgt->list[i]->refcount > 1) 551 { 552 tgt->list[i]->refcount--; 553 tgt->list[i]->async_refcount++; 554 } 555 else 556 { 557 splay_tree_key k = tgt->list[i]; 558 if (k->copy_from) 559 devicep->dev2host_func (devicep->target_id, (void *) k->host_start, 560 (void *) (k->tgt->tgt_start + k->tgt_offset), 561 k->host_end - k->host_start); 562 } 563 564 gomp_mutex_unlock (&devicep->lock); 565} 566 567/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant 568 variables back from device to host: if it is false, it is assumed that this 569 has been done already, i.e. by gomp_copy_from_async above. */ 570 571attribute_hidden void 572gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom) 573{ 574 struct gomp_device_descr *devicep = tgt->device_descr; 575 576 if (tgt->list_count == 0) 577 { 578 free (tgt); 579 return; 580 } 581 582 gomp_mutex_lock (&devicep->lock); 583 584 size_t i; 585 for (i = 0; i < tgt->list_count; i++) 586 if (tgt->list[i] == NULL) 587 ; 588 else if (tgt->list[i]->refcount > 1) 589 tgt->list[i]->refcount--; 590 else if (tgt->list[i]->async_refcount > 0) 591 tgt->list[i]->async_refcount--; 592 else 593 { 594 splay_tree_key k = tgt->list[i]; 595 if (k->copy_from && do_copyfrom) 596 devicep->dev2host_func (devicep->target_id, (void *) k->host_start, 597 (void *) (k->tgt->tgt_start + k->tgt_offset), 598 k->host_end - k->host_start); 599 splay_tree_remove (&devicep->mem_map, k); 600 if (k->tgt->refcount > 1) 601 k->tgt->refcount--; 602 else 603 gomp_unmap_tgt (k->tgt); 604 } 605 606 if (tgt->refcount > 1) 607 tgt->refcount--; 608 else 609 gomp_unmap_tgt (tgt); 610 611 gomp_mutex_unlock (&devicep->lock); 612} 613 614static void 615gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, 616 size_t *sizes, void *kinds, bool is_openacc) 617{ 618 size_t i; 619 struct splay_tree_key_s cur_node; 620 const int typemask = is_openacc ? 0xff : 0x7; 621 622 if (!devicep) 623 return; 624 625 if (mapnum == 0) 626 return; 627 628 gomp_mutex_lock (&devicep->lock); 629 for (i = 0; i < mapnum; i++) 630 if (sizes[i]) 631 { 632 cur_node.host_start = (uintptr_t) hostaddrs[i]; 633 cur_node.host_end = cur_node.host_start + sizes[i]; 634 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); 635 if (n) 636 { 637 int kind = get_kind (is_openacc, kinds, i); 638 if (n->host_start > cur_node.host_start 639 || n->host_end < cur_node.host_end) 640 { 641 gomp_mutex_unlock (&devicep->lock); 642 gomp_fatal ("Trying to update [%p..%p) object when " 643 "only [%p..%p) is mapped", 644 (void *) cur_node.host_start, 645 (void *) cur_node.host_end, 646 (void *) n->host_start, 647 (void *) n->host_end); 648 } 649 if (GOMP_MAP_COPY_TO_P (kind & typemask)) 650 devicep->host2dev_func (devicep->target_id, 651 (void *) (n->tgt->tgt_start 652 + n->tgt_offset 653 + cur_node.host_start 654 - n->host_start), 655 (void *) cur_node.host_start, 656 cur_node.host_end - cur_node.host_start); 657 if (GOMP_MAP_COPY_FROM_P (kind & typemask)) 658 devicep->dev2host_func (devicep->target_id, 659 (void *) cur_node.host_start, 660 (void *) (n->tgt->tgt_start 661 + n->tgt_offset 662 + cur_node.host_start 663 - n->host_start), 664 cur_node.host_end - cur_node.host_start); 665 } 666 else 667 { 668 gomp_mutex_unlock (&devicep->lock); 669 gomp_fatal ("Trying to update [%p..%p) object that is not mapped", 670 (void *) cur_node.host_start, 671 (void *) cur_node.host_end); 672 } 673 } 674 gomp_mutex_unlock (&devicep->lock); 675} 676 677/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP. 678 And insert to splay tree the mapping between addresses from HOST_TABLE and 679 from loaded target image. */ 680 681static void 682gomp_offload_image_to_device (struct gomp_device_descr *devicep, 683 void *host_table, void *target_data, 684 bool is_register_lock) 685{ 686 void **host_func_table = ((void ***) host_table)[0]; 687 void **host_funcs_end = ((void ***) host_table)[1]; 688 void **host_var_table = ((void ***) host_table)[2]; 689 void **host_vars_end = ((void ***) host_table)[3]; 690 691 /* The func table contains only addresses, the var table contains addresses 692 and corresponding sizes. */ 693 int num_funcs = host_funcs_end - host_func_table; 694 int num_vars = (host_vars_end - host_var_table) / 2; 695 696 /* Load image to device and get target addresses for the image. */ 697 struct addr_pair *target_table = NULL; 698 int i, num_target_entries 699 = devicep->load_image_func (devicep->target_id, target_data, &target_table); 700 701 if (num_target_entries != num_funcs + num_vars) 702 { 703 gomp_mutex_unlock (&devicep->lock); 704 if (is_register_lock) 705 gomp_mutex_unlock (®ister_lock); 706 gomp_fatal ("Can't map target functions or variables"); 707 } 708 709 /* Insert host-target address mapping into splay tree. */ 710 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); 711 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); 712 tgt->refcount = 1; 713 tgt->tgt_start = 0; 714 tgt->tgt_end = 0; 715 tgt->to_free = NULL; 716 tgt->prev = NULL; 717 tgt->list_count = 0; 718 tgt->device_descr = devicep; 719 splay_tree_node array = tgt->array; 720 721 for (i = 0; i < num_funcs; i++) 722 { 723 splay_tree_key k = &array->key; 724 k->host_start = (uintptr_t) host_func_table[i]; 725 k->host_end = k->host_start + 1; 726 k->tgt = tgt; 727 k->tgt_offset = target_table[i].start; 728 k->refcount = 1; 729 k->async_refcount = 0; 730 k->copy_from = false; 731 array->left = NULL; 732 array->right = NULL; 733 splay_tree_insert (&devicep->mem_map, array); 734 array++; 735 } 736 737 for (i = 0; i < num_vars; i++) 738 { 739 struct addr_pair *target_var = &target_table[num_funcs + i]; 740 if (target_var->end - target_var->start 741 != (uintptr_t) host_var_table[i * 2 + 1]) 742 { 743 gomp_mutex_unlock (&devicep->lock); 744 if (is_register_lock) 745 gomp_mutex_unlock (®ister_lock); 746 gomp_fatal ("Can't map target variables (size mismatch)"); 747 } 748 749 splay_tree_key k = &array->key; 750 k->host_start = (uintptr_t) host_var_table[i * 2]; 751 k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1]; 752 k->tgt = tgt; 753 k->tgt_offset = target_var->start; 754 k->refcount = 1; 755 k->async_refcount = 0; 756 k->copy_from = false; 757 array->left = NULL; 758 array->right = NULL; 759 splay_tree_insert (&devicep->mem_map, array); 760 array++; 761 } 762 763 free (target_table); 764} 765 766/* This function should be called from every offload image while loading. 767 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of 768 the target, and TARGET_DATA needed by target plugin. */ 769 770void 771GOMP_offload_register (void *host_table, enum offload_target_type target_type, 772 void *target_data) 773{ 774 int i; 775 gomp_mutex_lock (®ister_lock); 776 777 /* Load image to all initialized devices. */ 778 for (i = 0; i < num_devices; i++) 779 { 780 struct gomp_device_descr *devicep = &devices[i]; 781 gomp_mutex_lock (&devicep->lock); 782 if (devicep->type == target_type && devicep->is_initialized) 783 gomp_offload_image_to_device (devicep, host_table, target_data, true); 784 gomp_mutex_unlock (&devicep->lock); 785 } 786 787 /* Insert image to array of pending images. */ 788 offload_images 789 = gomp_realloc_unlock (offload_images, 790 (num_offload_images + 1) 791 * sizeof (struct offload_image_descr)); 792 offload_images[num_offload_images].type = target_type; 793 offload_images[num_offload_images].host_table = host_table; 794 offload_images[num_offload_images].target_data = target_data; 795 796 num_offload_images++; 797 gomp_mutex_unlock (®ister_lock); 798} 799 800/* This function should be called from every offload image while unloading. 801 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of 802 the target, and TARGET_DATA needed by target plugin. */ 803 804void 805GOMP_offload_unregister (void *host_table, enum offload_target_type target_type, 806 void *target_data) 807{ 808 void **host_func_table = ((void ***) host_table)[0]; 809 void **host_funcs_end = ((void ***) host_table)[1]; 810 void **host_var_table = ((void ***) host_table)[2]; 811 void **host_vars_end = ((void ***) host_table)[3]; 812 int i; 813 814 /* The func table contains only addresses, the var table contains addresses 815 and corresponding sizes. */ 816 int num_funcs = host_funcs_end - host_func_table; 817 int num_vars = (host_vars_end - host_var_table) / 2; 818 819 gomp_mutex_lock (®ister_lock); 820 821 /* Unload image from all initialized devices. */ 822 for (i = 0; i < num_devices; i++) 823 { 824 int j; 825 struct gomp_device_descr *devicep = &devices[i]; 826 gomp_mutex_lock (&devicep->lock); 827 if (devicep->type != target_type || !devicep->is_initialized) 828 { 829 gomp_mutex_unlock (&devicep->lock); 830 continue; 831 } 832 833 devicep->unload_image_func (devicep->target_id, target_data); 834 835 /* Remove mapping from splay tree. */ 836 struct splay_tree_key_s k; 837 splay_tree_key node = NULL; 838 if (num_funcs > 0) 839 { 840 k.host_start = (uintptr_t) host_func_table[0]; 841 k.host_end = k.host_start + 1; 842 node = splay_tree_lookup (&devicep->mem_map, &k); 843 } 844 else if (num_vars > 0) 845 { 846 k.host_start = (uintptr_t) host_var_table[0]; 847 k.host_end = k.host_start + (uintptr_t) host_var_table[1]; 848 node = splay_tree_lookup (&devicep->mem_map, &k); 849 } 850 851 for (j = 0; j < num_funcs; j++) 852 { 853 k.host_start = (uintptr_t) host_func_table[j]; 854 k.host_end = k.host_start + 1; 855 splay_tree_remove (&devicep->mem_map, &k); 856 } 857 858 for (j = 0; j < num_vars; j++) 859 { 860 k.host_start = (uintptr_t) host_var_table[j * 2]; 861 k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1]; 862 splay_tree_remove (&devicep->mem_map, &k); 863 } 864 865 if (node) 866 { 867 free (node->tgt); 868 free (node); 869 } 870 871 gomp_mutex_unlock (&devicep->lock); 872 } 873 874 /* Remove image from array of pending images. */ 875 for (i = 0; i < num_offload_images; i++) 876 if (offload_images[i].target_data == target_data) 877 { 878 offload_images[i] = offload_images[--num_offload_images]; 879 break; 880 } 881 882 gomp_mutex_unlock (®ister_lock); 883} 884 885/* This function initializes the target device, specified by DEVICEP. DEVICEP 886 must be locked on entry, and remains locked on return. */ 887 888attribute_hidden void 889gomp_init_device (struct gomp_device_descr *devicep) 890{ 891 int i; 892 devicep->init_device_func (devicep->target_id); 893 894 /* Load to device all images registered by the moment. */ 895 for (i = 0; i < num_offload_images; i++) 896 { 897 struct offload_image_descr *image = &offload_images[i]; 898 if (image->type == devicep->type) 899 gomp_offload_image_to_device (devicep, image->host_table, 900 image->target_data, false); 901 } 902 903 devicep->is_initialized = true; 904} 905 906/* Free address mapping tables. MM must be locked on entry, and remains locked 907 on return. */ 908 909attribute_hidden void 910gomp_free_memmap (struct splay_tree_s *mem_map) 911{ 912 while (mem_map->root) 913 { 914 struct target_mem_desc *tgt = mem_map->root->key.tgt; 915 916 splay_tree_remove (mem_map, &mem_map->root->key); 917 free (tgt->array); 918 free (tgt); 919 } 920} 921 922/* This function de-initializes the target device, specified by DEVICEP. 923 DEVICEP must be locked on entry, and remains locked on return. */ 924 925attribute_hidden void 926gomp_fini_device (struct gomp_device_descr *devicep) 927{ 928 if (devicep->is_initialized) 929 devicep->fini_device_func (devicep->target_id); 930 931 devicep->is_initialized = false; 932} 933 934/* Called when encountering a target directive. If DEVICE 935 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is 936 GOMP_DEVICE_HOST_FALLBACK (or any value 937 larger than last available hw device), use host fallback. 938 FN is address of host code, UNUSED is part of the current ABI, but 939 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays 940 with MAPNUM entries, with addresses of the host objects, 941 sizes of the host objects (resp. for pointer kind pointer bias 942 and assumed sizeof (void *) size) and kinds. */ 943 944void 945GOMP_target (int device, void (*fn) (void *), const void *unused, 946 size_t mapnum, void **hostaddrs, size_t *sizes, 947 unsigned char *kinds) 948{ 949 struct gomp_device_descr *devicep = resolve_device (device); 950 951 if (devicep == NULL 952 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) 953 { 954 /* Host fallback. */ 955 struct gomp_thread old_thr, *thr = gomp_thread (); 956 old_thr = *thr; 957 memset (thr, '\0', sizeof (*thr)); 958 if (gomp_places_list) 959 { 960 thr->place = old_thr.place; 961 thr->ts.place_partition_len = gomp_places_list_len; 962 } 963 fn (hostaddrs); 964 gomp_free_thread (thr); 965 *thr = old_thr; 966 return; 967 } 968 969 gomp_mutex_lock (&devicep->lock); 970 if (!devicep->is_initialized) 971 gomp_init_device (devicep); 972 gomp_mutex_unlock (&devicep->lock); 973 974 void *fn_addr; 975 976 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) 977 fn_addr = (void *) fn; 978 else 979 { 980 gomp_mutex_lock (&devicep->lock); 981 struct splay_tree_key_s k; 982 k.host_start = (uintptr_t) fn; 983 k.host_end = k.host_start + 1; 984 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); 985 if (tgt_fn == NULL) 986 { 987 gomp_mutex_unlock (&devicep->lock); 988 gomp_fatal ("Target function wasn't mapped"); 989 } 990 gomp_mutex_unlock (&devicep->lock); 991 992 fn_addr = (void *) tgt_fn->tgt_offset; 993 } 994 995 struct target_mem_desc *tgt_vars 996 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, 997 true); 998 struct gomp_thread old_thr, *thr = gomp_thread (); 999 old_thr = *thr; 1000 memset (thr, '\0', sizeof (*thr)); 1001 if (gomp_places_list) 1002 { 1003 thr->place = old_thr.place; 1004 thr->ts.place_partition_len = gomp_places_list_len; 1005 } 1006 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start); 1007 gomp_free_thread (thr); 1008 *thr = old_thr; 1009 gomp_unmap_vars (tgt_vars, true); 1010} 1011 1012void 1013GOMP_target_data (int device, const void *unused, size_t mapnum, 1014 void **hostaddrs, size_t *sizes, unsigned char *kinds) 1015{ 1016 struct gomp_device_descr *devicep = resolve_device (device); 1017 1018 if (devicep == NULL 1019 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) 1020 { 1021 /* Host fallback. */ 1022 struct gomp_task_icv *icv = gomp_icv (false); 1023 if (icv->target_data) 1024 { 1025 /* Even when doing a host fallback, if there are any active 1026 #pragma omp target data constructs, need to remember the 1027 new #pragma omp target data, otherwise GOMP_target_end_data 1028 would get out of sync. */ 1029 struct target_mem_desc *tgt 1030 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false); 1031 tgt->prev = icv->target_data; 1032 icv->target_data = tgt; 1033 } 1034 return; 1035 } 1036 1037 gomp_mutex_lock (&devicep->lock); 1038 if (!devicep->is_initialized) 1039 gomp_init_device (devicep); 1040 gomp_mutex_unlock (&devicep->lock); 1041 1042 struct target_mem_desc *tgt 1043 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, 1044 false); 1045 struct gomp_task_icv *icv = gomp_icv (true); 1046 tgt->prev = icv->target_data; 1047 icv->target_data = tgt; 1048} 1049 1050void 1051GOMP_target_end_data (void) 1052{ 1053 struct gomp_task_icv *icv = gomp_icv (false); 1054 if (icv->target_data) 1055 { 1056 struct target_mem_desc *tgt = icv->target_data; 1057 icv->target_data = tgt->prev; 1058 gomp_unmap_vars (tgt, true); 1059 } 1060} 1061 1062void 1063GOMP_target_update (int device, const void *unused, size_t mapnum, 1064 void **hostaddrs, size_t *sizes, unsigned char *kinds) 1065{ 1066 struct gomp_device_descr *devicep = resolve_device (device); 1067 1068 if (devicep == NULL 1069 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) 1070 return; 1071 1072 gomp_mutex_lock (&devicep->lock); 1073 if (!devicep->is_initialized) 1074 gomp_init_device (devicep); 1075 gomp_mutex_unlock (&devicep->lock); 1076 1077 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); 1078} 1079 1080void 1081GOMP_teams (unsigned int num_teams, unsigned int thread_limit) 1082{ 1083 if (thread_limit) 1084 { 1085 struct gomp_task_icv *icv = gomp_icv (true); 1086 icv->thread_limit_var 1087 = thread_limit > INT_MAX ? UINT_MAX : thread_limit; 1088 } 1089 (void) num_teams; 1090} 1091 1092#ifdef PLUGIN_SUPPORT 1093 1094/* This function tries to load a plugin for DEVICE. Name of plugin is passed 1095 in PLUGIN_NAME. 1096 The handles of the found functions are stored in the corresponding fields 1097 of DEVICE. The function returns TRUE on success and FALSE otherwise. */ 1098 1099static bool 1100gomp_load_plugin_for_device (struct gomp_device_descr *device, 1101 const char *plugin_name) 1102{ 1103 const char *err = NULL, *last_missing = NULL; 1104 int optional_present, optional_total; 1105 1106 /* Clear any existing error. */ 1107 dlerror (); 1108 1109 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); 1110 if (!plugin_handle) 1111 { 1112 err = dlerror (); 1113 goto out; 1114 } 1115 1116 /* Check if all required functions are available in the plugin and store 1117 their handlers. */ 1118#define DLSYM(f) \ 1119 do \ 1120 { \ 1121 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f); \ 1122 err = dlerror (); \ 1123 if (err != NULL) \ 1124 goto out; \ 1125 } \ 1126 while (0) 1127 /* Similar, but missing functions are not an error. */ 1128#define DLSYM_OPT(f, n) \ 1129 do \ 1130 { \ 1131 const char *tmp_err; \ 1132 device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n); \ 1133 tmp_err = dlerror (); \ 1134 if (tmp_err == NULL) \ 1135 optional_present++; \ 1136 else \ 1137 last_missing = #n; \ 1138 optional_total++; \ 1139 } \ 1140 while (0) 1141 1142 DLSYM (get_name); 1143 DLSYM (get_caps); 1144 DLSYM (get_type); 1145 DLSYM (get_num_devices); 1146 DLSYM (init_device); 1147 DLSYM (fini_device); 1148 DLSYM (load_image); 1149 DLSYM (unload_image); 1150 DLSYM (alloc); 1151 DLSYM (free); 1152 DLSYM (dev2host); 1153 DLSYM (host2dev); 1154 device->capabilities = device->get_caps_func (); 1155 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 1156 DLSYM (run); 1157 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) 1158 { 1159 optional_present = optional_total = 0; 1160 DLSYM_OPT (openacc.exec, openacc_parallel); 1161 DLSYM_OPT (openacc.register_async_cleanup, 1162 openacc_register_async_cleanup); 1163 DLSYM_OPT (openacc.async_test, openacc_async_test); 1164 DLSYM_OPT (openacc.async_test_all, openacc_async_test_all); 1165 DLSYM_OPT (openacc.async_wait, openacc_async_wait); 1166 DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async); 1167 DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all); 1168 DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async); 1169 DLSYM_OPT (openacc.async_set_async, openacc_async_set_async); 1170 DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data); 1171 DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data); 1172 /* Require all the OpenACC handlers if we have 1173 GOMP_OFFLOAD_CAP_OPENACC_200. */ 1174 if (optional_present != optional_total) 1175 { 1176 err = "plugin missing OpenACC handler function"; 1177 goto out; 1178 } 1179 optional_present = optional_total = 0; 1180 DLSYM_OPT (openacc.cuda.get_current_device, 1181 openacc_get_current_cuda_device); 1182 DLSYM_OPT (openacc.cuda.get_current_context, 1183 openacc_get_current_cuda_context); 1184 DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream); 1185 DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream); 1186 /* Make sure all the CUDA functions are there if any of them are. */ 1187 if (optional_present && optional_present != optional_total) 1188 { 1189 err = "plugin missing OpenACC CUDA handler function"; 1190 goto out; 1191 } 1192 } 1193#undef DLSYM 1194#undef DLSYM_OPT 1195 1196 out: 1197 if (err != NULL) 1198 { 1199 gomp_error ("while loading %s: %s", plugin_name, err); 1200 if (last_missing) 1201 gomp_error ("missing function was %s", last_missing); 1202 if (plugin_handle) 1203 dlclose (plugin_handle); 1204 } 1205 return err == NULL; 1206} 1207 1208/* This function initializes the runtime needed for offloading. 1209 It parses the list of offload targets and tries to load the plugins for 1210 these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP 1211 will be set, and the array DEVICES initialized, containing descriptors for 1212 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows 1213 by the others. */ 1214 1215static void 1216gomp_target_init (void) 1217{ 1218 const char *prefix ="libgomp-plugin-"; 1219 const char *suffix = SONAME_SUFFIX (1); 1220 const char *cur, *next; 1221 char *plugin_name; 1222 int i, new_num_devices; 1223 1224 num_devices = 0; 1225 devices = NULL; 1226 1227 cur = OFFLOAD_TARGETS; 1228 if (*cur) 1229 do 1230 { 1231 struct gomp_device_descr current_device; 1232 1233 next = strchr (cur, ','); 1234 1235 plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur)) 1236 + strlen (prefix) + strlen (suffix)); 1237 if (!plugin_name) 1238 { 1239 num_devices = 0; 1240 break; 1241 } 1242 1243 strcpy (plugin_name, prefix); 1244 strncat (plugin_name, cur, next ? next - cur : strlen (cur)); 1245 strcat (plugin_name, suffix); 1246 1247 if (gomp_load_plugin_for_device (¤t_device, plugin_name)) 1248 { 1249 new_num_devices = current_device.get_num_devices_func (); 1250 if (new_num_devices >= 1) 1251 { 1252 /* Augment DEVICES and NUM_DEVICES. */ 1253 1254 devices = realloc (devices, (num_devices + new_num_devices) 1255 * sizeof (struct gomp_device_descr)); 1256 if (!devices) 1257 { 1258 num_devices = 0; 1259 free (plugin_name); 1260 break; 1261 } 1262 1263 current_device.name = current_device.get_name_func (); 1264 /* current_device.capabilities has already been set. */ 1265 current_device.type = current_device.get_type_func (); 1266 current_device.mem_map.root = NULL; 1267 current_device.is_initialized = false; 1268 current_device.openacc.data_environ = NULL; 1269 for (i = 0; i < new_num_devices; i++) 1270 { 1271 current_device.target_id = i; 1272 devices[num_devices] = current_device; 1273 gomp_mutex_init (&devices[num_devices].lock); 1274 num_devices++; 1275 } 1276 } 1277 } 1278 1279 free (plugin_name); 1280 cur = next + 1; 1281 } 1282 while (next); 1283 1284 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set 1285 NUM_DEVICES_OPENMP. */ 1286 struct gomp_device_descr *devices_s 1287 = malloc (num_devices * sizeof (struct gomp_device_descr)); 1288 if (!devices_s) 1289 { 1290 num_devices = 0; 1291 free (devices); 1292 devices = NULL; 1293 } 1294 num_devices_openmp = 0; 1295 for (i = 0; i < num_devices; i++) 1296 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 1297 devices_s[num_devices_openmp++] = devices[i]; 1298 int num_devices_after_openmp = num_devices_openmp; 1299 for (i = 0; i < num_devices; i++) 1300 if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) 1301 devices_s[num_devices_after_openmp++] = devices[i]; 1302 free (devices); 1303 devices = devices_s; 1304 1305 for (i = 0; i < num_devices; i++) 1306 { 1307 /* The 'devices' array can be moved (by the realloc call) until we have 1308 found all the plugins, so registering with the OpenACC runtime (which 1309 takes a copy of the pointer argument) must be delayed until now. */ 1310 if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) 1311 goacc_register (&devices[i]); 1312 } 1313} 1314 1315#else /* PLUGIN_SUPPORT */ 1316/* If dlfcn.h is unavailable we always fallback to host execution. 1317 GOMP_target* routines are just stubs for this case. */ 1318static void 1319gomp_target_init (void) 1320{ 1321} 1322#endif /* PLUGIN_SUPPORT */ 1323