1/* Copyright (C) 2013-2022 Free Software Foundation, Inc. 2 Contributed by Jakub Jelinek <jakub@redhat.com>. 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26/* This file contains the support of offloading. */ 27 28#include "libgomp.h" 29#include "oacc-plugin.h" 30#include "oacc-int.h" 31#include "gomp-constants.h" 32#include <limits.h> 33#include <stdbool.h> 34#include <stdlib.h> 35#ifdef HAVE_INTTYPES_H 36# include <inttypes.h> /* For PRIu64. */ 37#endif 38#include <string.h> 39#include <assert.h> 40#include <errno.h> 41 42#ifdef PLUGIN_SUPPORT 43#include <dlfcn.h> 44#include "plugin-suffix.h" 45#endif 46 47typedef uintptr_t *hash_entry_type; 48static inline void * htab_alloc (size_t size) { return gomp_malloc (size); } 49static inline void htab_free (void *ptr) { free (ptr); } 50#include "hashtab.h" 51 52static inline hashval_t 53htab_hash (hash_entry_type element) 54{ 55 return hash_pointer ((void *) element); 56} 57 58static inline bool 59htab_eq (hash_entry_type x, hash_entry_type y) 60{ 61 return x == y; 62} 63 64#define FIELD_TGT_EMPTY (~(size_t) 0) 65 66static void gomp_target_init (void); 67 68/* The whole initialization code for offloading plugins is only run one. */ 69static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT; 70 71/* Mutex for offload image registration. */ 72static gomp_mutex_t register_lock; 73 74/* This structure describes an offload image. 75 It contains type of the target device, pointer to host table descriptor, and 76 pointer to target data. */ 77struct offload_image_descr { 78 unsigned version; 79 enum offload_target_type type; 80 const void *host_table; 81 const void *target_data; 82}; 83 84/* Array of descriptors of offload images. */ 85static struct offload_image_descr *offload_images; 86 87/* Total number of offload images. */ 88static int num_offload_images; 89 90/* Array of descriptors for all available devices. */ 91static struct gomp_device_descr *devices; 92 93/* Total number of available devices. */ 94static int num_devices; 95 96/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices. */ 97static int num_devices_openmp; 98 99/* Similar to gomp_realloc, but release register_lock before gomp_fatal. */ 100 101static void * 102gomp_realloc_unlock (void *old, size_t size) 103{ 104 void *ret = realloc (old, size); 105 if (ret == NULL) 106 { 107 gomp_mutex_unlock (®ister_lock); 108 gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size); 109 } 110 return ret; 111} 112 113attribute_hidden void 114gomp_init_targets_once (void) 115{ 116 (void) pthread_once (&gomp_is_initialized, gomp_target_init); 117} 118 119attribute_hidden int 120gomp_get_num_devices (void) 121{ 122 gomp_init_targets_once (); 123 return num_devices_openmp; 124} 125 126static struct gomp_device_descr * 127resolve_device (int device_id) 128{ 129 if (device_id == GOMP_DEVICE_ICV) 130 { 131 struct gomp_task_icv *icv = gomp_icv (false); 132 device_id = icv->default_device_var; 133 } 134 135 if (device_id < 0 || device_id >= gomp_get_num_devices ()) 136 { 137 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY 138 && device_id != GOMP_DEVICE_HOST_FALLBACK 139 && device_id != num_devices_openmp) 140 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " 141 "but device not found"); 142 143 return NULL; 144 } 145 146 gomp_mutex_lock (&devices[device_id].lock); 147 if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED) 148 gomp_init_device (&devices[device_id]); 149 else if (devices[device_id].state == GOMP_DEVICE_FINALIZED) 150 { 151 gomp_mutex_unlock (&devices[device_id].lock); 152 153 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY) 154 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, " 155 "but device is finalized"); 156 157 return NULL; 158 } 159 gomp_mutex_unlock (&devices[device_id].lock); 160 161 return &devices[device_id]; 162} 163 164 165static inline splay_tree_key 166gomp_map_lookup (splay_tree mem_map, splay_tree_key key) 167{ 168 if (key->host_start != key->host_end) 169 return splay_tree_lookup (mem_map, key); 170 171 key->host_end++; 172 splay_tree_key n = splay_tree_lookup (mem_map, key); 173 key->host_end--; 174 if (n) 175 return n; 176 key->host_start--; 177 n = splay_tree_lookup (mem_map, key); 178 key->host_start++; 179 if (n) 180 return n; 181 return splay_tree_lookup (mem_map, key); 182} 183 184static inline splay_tree_key 185gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key) 186{ 187 if (key->host_start != key->host_end) 188 return splay_tree_lookup (mem_map, key); 189 190 key->host_end++; 191 splay_tree_key n = splay_tree_lookup (mem_map, key); 192 key->host_end--; 193 return n; 194} 195 196static inline void 197gomp_device_copy (struct gomp_device_descr *devicep, 198 bool (*copy_func) (int, void *, const void *, size_t), 199 const char *dst, void *dstaddr, 200 const char *src, const void *srcaddr, 201 size_t size) 202{ 203 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size)) 204 { 205 gomp_mutex_unlock (&devicep->lock); 206 gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed", 207 src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size); 208 } 209} 210 211static inline void 212goacc_device_copy_async (struct gomp_device_descr *devicep, 213 bool (*copy_func) (int, void *, const void *, size_t, 214 struct goacc_asyncqueue *), 215 const char *dst, void *dstaddr, 216 const char *src, const void *srcaddr, 217 const void *srcaddr_orig, 218 size_t size, struct goacc_asyncqueue *aq) 219{ 220 if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq)) 221 { 222 gomp_mutex_unlock (&devicep->lock); 223 if (srcaddr_orig && srcaddr_orig != srcaddr) 224 gomp_fatal ("Copying of %s object [%p..%p)" 225 " via buffer %s object [%p..%p)" 226 " to %s object [%p..%p) failed", 227 src, srcaddr_orig, srcaddr_orig + size, 228 src, srcaddr, srcaddr + size, 229 dst, dstaddr, dstaddr + size); 230 else 231 gomp_fatal ("Copying of %s object [%p..%p)" 232 " to %s object [%p..%p) failed", 233 src, srcaddr, srcaddr + size, 234 dst, dstaddr, dstaddr + size); 235 } 236} 237 238/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses) 239 host to device memory transfers. */ 240 241struct gomp_coalesce_chunk 242{ 243 /* The starting and ending point of a coalesced chunk of memory. */ 244 size_t start, end; 245}; 246 247struct gomp_coalesce_buf 248{ 249 /* Buffer into which gomp_copy_host2dev will memcpy data and from which 250 it will be copied to the device. */ 251 void *buf; 252 struct target_mem_desc *tgt; 253 /* Array with offsets, chunks[i].start is the starting offset and 254 chunks[i].end ending offset relative to tgt->tgt_start device address 255 of chunks which are to be copied to buf and later copied to device. */ 256 struct gomp_coalesce_chunk *chunks; 257 /* Number of chunks in chunks array, or -1 if coalesce buffering should not 258 be performed. */ 259 long chunk_cnt; 260 /* During construction of chunks array, how many memory regions are within 261 the last chunk. If there is just one memory region for a chunk, we copy 262 it directly to device rather than going through buf. */ 263 long use_cnt; 264}; 265 266/* Maximum size of memory region considered for coalescing. Larger copies 267 are performed directly. */ 268#define MAX_COALESCE_BUF_SIZE (32 * 1024) 269 270/* Maximum size of a gap in between regions to consider them being copied 271 within the same chunk. All the device offsets considered are within 272 newly allocated device memory, so it isn't fatal if we copy some padding 273 in between from host to device. The gaps come either from alignment 274 padding or from memory regions which are not supposed to be copied from 275 host to device (e.g. map(alloc:), map(from:) etc.). */ 276#define MAX_COALESCE_BUF_GAP (4 * 1024) 277 278/* Add region with device tgt_start relative offset and length to CBUF. 279 280 This must not be used for asynchronous copies, because the host data might 281 not be computed yet (by an earlier asynchronous compute region, for 282 example). 283 TODO ... but we could allow CBUF usage for EPHEMERAL data? (Open question: 284 is it more performant to use libgomp CBUF buffering or individual device 285 asyncronous copying?) */ 286 287static inline void 288gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len) 289{ 290 if (len > MAX_COALESCE_BUF_SIZE || len == 0) 291 return; 292 if (cbuf->chunk_cnt) 293 { 294 if (cbuf->chunk_cnt < 0) 295 return; 296 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end) 297 { 298 cbuf->chunk_cnt = -1; 299 return; 300 } 301 if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP) 302 { 303 cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len; 304 cbuf->use_cnt++; 305 return; 306 } 307 /* If the last chunk is only used by one mapping, discard it, 308 as it will be one host to device copy anyway and 309 memcpying it around will only waste cycles. */ 310 if (cbuf->use_cnt == 1) 311 cbuf->chunk_cnt--; 312 } 313 cbuf->chunks[cbuf->chunk_cnt].start = start; 314 cbuf->chunks[cbuf->chunk_cnt].end = start + len; 315 cbuf->chunk_cnt++; 316 cbuf->use_cnt = 1; 317} 318 319/* Return true for mapping kinds which need to copy data from the 320 host to device for regions that weren't previously mapped. */ 321 322static inline bool 323gomp_to_device_kind_p (int kind) 324{ 325 switch (kind) 326 { 327 case GOMP_MAP_ALLOC: 328 case GOMP_MAP_FROM: 329 case GOMP_MAP_FORCE_ALLOC: 330 case GOMP_MAP_FORCE_FROM: 331 case GOMP_MAP_ALWAYS_FROM: 332 return false; 333 default: 334 return true; 335 } 336} 337 338/* Copy host memory to an offload device. In asynchronous mode (if AQ is 339 non-NULL), when the source data is stack or may otherwise be deallocated 340 before the asynchronous copy takes place, EPHEMERAL must be passed as 341 TRUE. */ 342 343attribute_hidden void 344gomp_copy_host2dev (struct gomp_device_descr *devicep, 345 struct goacc_asyncqueue *aq, 346 void *d, const void *h, size_t sz, 347 bool ephemeral, struct gomp_coalesce_buf *cbuf) 348{ 349 if (__builtin_expect (aq != NULL, 0)) 350 { 351 /* See 'gomp_coalesce_buf_add'. */ 352 assert (!cbuf); 353 354 void *h_buf = (void *) h; 355 if (ephemeral) 356 { 357 /* We're queueing up an asynchronous copy from data that may 358 disappear before the transfer takes place (i.e. because it is a 359 stack local in a function that is no longer executing). Make a 360 copy of the data into a temporary buffer in those cases. */ 361 h_buf = gomp_malloc (sz); 362 memcpy (h_buf, h, sz); 363 } 364 goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func, 365 "dev", d, "host", h_buf, h, sz, aq); 366 if (ephemeral) 367 /* Free temporary buffer once the transfer has completed. */ 368 devicep->openacc.async.queue_callback_func (aq, free, h_buf); 369 370 return; 371 } 372 373 if (cbuf) 374 { 375 uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start; 376 if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end) 377 { 378 long first = 0; 379 long last = cbuf->chunk_cnt - 1; 380 while (first <= last) 381 { 382 long middle = (first + last) >> 1; 383 if (cbuf->chunks[middle].end <= doff) 384 first = middle + 1; 385 else if (cbuf->chunks[middle].start <= doff) 386 { 387 if (doff + sz > cbuf->chunks[middle].end) 388 { 389 gomp_mutex_unlock (&devicep->lock); 390 gomp_fatal ("internal libgomp cbuf error"); 391 } 392 memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start), 393 h, sz); 394 return; 395 } 396 else 397 last = middle - 1; 398 } 399 } 400 } 401 402 gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz); 403} 404 405attribute_hidden void 406gomp_copy_dev2host (struct gomp_device_descr *devicep, 407 struct goacc_asyncqueue *aq, 408 void *h, const void *d, size_t sz) 409{ 410 if (__builtin_expect (aq != NULL, 0)) 411 goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func, 412 "host", h, "dev", d, NULL, sz, aq); 413 else 414 gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz); 415} 416 417static void 418gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr) 419{ 420 if (!devicep->free_func (devicep->target_id, devptr)) 421 { 422 gomp_mutex_unlock (&devicep->lock); 423 gomp_fatal ("error in freeing device memory block at %p", devptr); 424 } 425} 426 427/* Increment reference count of a splay_tree_key region K by 1. 428 If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only 429 increment the value if refcount is not yet contained in the set (used for 430 OpenMP 5.0, which specifies that a region's refcount is adjusted at most 431 once for each construct). */ 432 433static inline void 434gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set) 435{ 436 if (k == NULL || k->refcount == REFCOUNT_INFINITY) 437 return; 438 439 uintptr_t *refcount_ptr = &k->refcount; 440 441 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)) 442 refcount_ptr = &k->structelem_refcount; 443 else if (REFCOUNT_STRUCTELEM_P (k->refcount)) 444 refcount_ptr = k->structelem_refcount_ptr; 445 446 if (refcount_set) 447 { 448 if (htab_find (*refcount_set, refcount_ptr)) 449 return; 450 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT); 451 *slot = refcount_ptr; 452 } 453 454 *refcount_ptr += 1; 455 return; 456} 457 458/* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P 459 is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to 460 track already seen refcounts, and only adjust the value if refcount is not 461 yet contained in the set (like gomp_increment_refcount). 462 463 Return out-values: set *DO_COPY to true if we set the refcount to zero, or 464 it is already zero and we know we decremented it earlier. This signals that 465 associated maps should be copied back to host. 466 467 *DO_REMOVE is set to true when we this is the first handling of this refcount 468 and we are setting it to zero. This signals a removal of this key from the 469 splay-tree map. 470 471 Copy and removal are separated due to cases like handling of structure 472 elements, e.g. each map of a structure element representing a possible copy 473 out of a structure field has to be handled individually, but we only signal 474 removal for one (the first encountered) sibing map. */ 475 476static inline void 477gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p, 478 bool *do_copy, bool *do_remove) 479{ 480 if (k == NULL || k->refcount == REFCOUNT_INFINITY) 481 { 482 *do_copy = *do_remove = false; 483 return; 484 } 485 486 uintptr_t *refcount_ptr = &k->refcount; 487 488 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)) 489 refcount_ptr = &k->structelem_refcount; 490 else if (REFCOUNT_STRUCTELEM_P (k->refcount)) 491 refcount_ptr = k->structelem_refcount_ptr; 492 493 bool new_encountered_refcount; 494 bool set_to_zero = false; 495 bool is_zero = false; 496 497 uintptr_t orig_refcount = *refcount_ptr; 498 499 if (refcount_set) 500 { 501 if (htab_find (*refcount_set, refcount_ptr)) 502 { 503 new_encountered_refcount = false; 504 goto end; 505 } 506 507 uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT); 508 *slot = refcount_ptr; 509 new_encountered_refcount = true; 510 } 511 else 512 /* If no refcount_set being used, assume all keys are being decremented 513 for the first time. */ 514 new_encountered_refcount = true; 515 516 if (delete_p) 517 *refcount_ptr = 0; 518 else if (*refcount_ptr > 0) 519 *refcount_ptr -= 1; 520 521 end: 522 if (*refcount_ptr == 0) 523 { 524 if (orig_refcount > 0) 525 set_to_zero = true; 526 527 is_zero = true; 528 } 529 530 *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero)); 531 *do_remove = (new_encountered_refcount && set_to_zero); 532} 533 534/* Handle the case where gomp_map_lookup, splay_tree_lookup or 535 gomp_map_0len_lookup found oldn for newn. 536 Helper function of gomp_map_vars. */ 537 538static inline void 539gomp_map_vars_existing (struct gomp_device_descr *devicep, 540 struct goacc_asyncqueue *aq, splay_tree_key oldn, 541 splay_tree_key newn, struct target_var_desc *tgt_var, 542 unsigned char kind, bool always_to_flag, bool implicit, 543 struct gomp_coalesce_buf *cbuf, 544 htab_t *refcount_set) 545{ 546 assert (kind != GOMP_MAP_ATTACH 547 || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION); 548 549 tgt_var->key = oldn; 550 tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind); 551 tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind); 552 tgt_var->is_attach = false; 553 tgt_var->offset = newn->host_start - oldn->host_start; 554 555 /* For implicit maps, old contained in new is valid. */ 556 bool implicit_subset = (implicit 557 && newn->host_start <= oldn->host_start 558 && oldn->host_end <= newn->host_end); 559 if (implicit_subset) 560 tgt_var->length = oldn->host_end - oldn->host_start; 561 else 562 tgt_var->length = newn->host_end - newn->host_start; 563 564 if ((kind & GOMP_MAP_FLAG_FORCE) 565 /* For implicit maps, old contained in new is valid. */ 566 || !(implicit_subset 567 /* Otherwise, new contained inside old is considered valid. */ 568 || (oldn->host_start <= newn->host_start 569 && newn->host_end <= oldn->host_end))) 570 { 571 gomp_mutex_unlock (&devicep->lock); 572 gomp_fatal ("Trying to map into device [%p..%p) object when " 573 "[%p..%p) is already mapped", 574 (void *) newn->host_start, (void *) newn->host_end, 575 (void *) oldn->host_start, (void *) oldn->host_end); 576 } 577 578 if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag) 579 { 580 /* Implicit + always should not happen. If this does occur, below 581 address/length adjustment is a TODO. */ 582 assert (!implicit_subset); 583 584 if (oldn->aux && oldn->aux->attach_count) 585 { 586 /* We have to be careful not to overwrite still attached pointers 587 during the copyback to host. */ 588 uintptr_t addr = newn->host_start; 589 while (addr < newn->host_end) 590 { 591 size_t i = (addr - oldn->host_start) / sizeof (void *); 592 if (oldn->aux->attach_count[i] == 0) 593 gomp_copy_host2dev (devicep, aq, 594 (void *) (oldn->tgt->tgt_start 595 + oldn->tgt_offset 596 + addr - oldn->host_start), 597 (void *) addr, 598 sizeof (void *), false, cbuf); 599 addr += sizeof (void *); 600 } 601 } 602 else 603 gomp_copy_host2dev (devicep, aq, 604 (void *) (oldn->tgt->tgt_start + oldn->tgt_offset 605 + newn->host_start - oldn->host_start), 606 (void *) newn->host_start, 607 newn->host_end - newn->host_start, false, cbuf); 608 } 609 610 gomp_increment_refcount (oldn, refcount_set); 611} 612 613static int 614get_kind (bool short_mapkind, void *kinds, int idx) 615{ 616 if (!short_mapkind) 617 return ((unsigned char *) kinds)[idx]; 618 619 int val = ((unsigned short *) kinds)[idx]; 620 if (GOMP_MAP_IMPLICIT_P (val)) 621 val &= ~GOMP_MAP_IMPLICIT; 622 return val; 623} 624 625 626static bool 627get_implicit (bool short_mapkind, void *kinds, int idx) 628{ 629 if (!short_mapkind) 630 return false; 631 632 int val = ((unsigned short *) kinds)[idx]; 633 return GOMP_MAP_IMPLICIT_P (val); 634} 635 636static void 637gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq, 638 uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias, 639 struct gomp_coalesce_buf *cbuf, 640 bool allow_zero_length_array_sections) 641{ 642 struct gomp_device_descr *devicep = tgt->device_descr; 643 struct splay_tree_s *mem_map = &devicep->mem_map; 644 struct splay_tree_key_s cur_node; 645 646 cur_node.host_start = host_ptr; 647 if (cur_node.host_start == (uintptr_t) NULL) 648 { 649 cur_node.tgt_offset = (uintptr_t) NULL; 650 gomp_copy_host2dev (devicep, aq, 651 (void *) (tgt->tgt_start + target_offset), 652 (void *) &cur_node.tgt_offset, sizeof (void *), 653 true, cbuf); 654 return; 655 } 656 /* Add bias to the pointer value. */ 657 cur_node.host_start += bias; 658 cur_node.host_end = cur_node.host_start; 659 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 660 if (n == NULL) 661 { 662 if (allow_zero_length_array_sections) 663 cur_node.tgt_offset = 0; 664 else 665 { 666 gomp_mutex_unlock (&devicep->lock); 667 gomp_fatal ("Pointer target of array section wasn't mapped"); 668 } 669 } 670 else 671 { 672 cur_node.host_start -= n->host_start; 673 cur_node.tgt_offset 674 = n->tgt->tgt_start + n->tgt_offset + cur_node.host_start; 675 /* At this point tgt_offset is target address of the 676 array section. Now subtract bias to get what we want 677 to initialize the pointer with. */ 678 cur_node.tgt_offset -= bias; 679 } 680 gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset), 681 (void *) &cur_node.tgt_offset, sizeof (void *), 682 true, cbuf); 683} 684 685static void 686gomp_map_fields_existing (struct target_mem_desc *tgt, 687 struct goacc_asyncqueue *aq, splay_tree_key n, 688 size_t first, size_t i, void **hostaddrs, 689 size_t *sizes, void *kinds, 690 struct gomp_coalesce_buf *cbuf, htab_t *refcount_set) 691{ 692 struct gomp_device_descr *devicep = tgt->device_descr; 693 struct splay_tree_s *mem_map = &devicep->mem_map; 694 struct splay_tree_key_s cur_node; 695 int kind; 696 bool implicit; 697 const bool short_mapkind = true; 698 const int typemask = short_mapkind ? 0xff : 0x7; 699 700 cur_node.host_start = (uintptr_t) hostaddrs[i]; 701 cur_node.host_end = cur_node.host_start + sizes[i]; 702 splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node); 703 kind = get_kind (short_mapkind, kinds, i); 704 implicit = get_implicit (short_mapkind, kinds, i); 705 if (n2 706 && n2->tgt == n->tgt 707 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) 708 { 709 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], 710 kind & typemask, false, implicit, cbuf, 711 refcount_set); 712 return; 713 } 714 if (sizes[i] == 0) 715 { 716 if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1]) 717 { 718 cur_node.host_start--; 719 n2 = splay_tree_lookup (mem_map, &cur_node); 720 cur_node.host_start++; 721 if (n2 722 && n2->tgt == n->tgt 723 && n2->host_start - n->host_start 724 == n2->tgt_offset - n->tgt_offset) 725 { 726 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], 727 kind & typemask, false, implicit, cbuf, 728 refcount_set); 729 return; 730 } 731 } 732 cur_node.host_end++; 733 n2 = splay_tree_lookup (mem_map, &cur_node); 734 cur_node.host_end--; 735 if (n2 736 && n2->tgt == n->tgt 737 && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset) 738 { 739 gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i], 740 kind & typemask, false, implicit, cbuf, 741 refcount_set); 742 return; 743 } 744 } 745 gomp_mutex_unlock (&devicep->lock); 746 gomp_fatal ("Trying to map into device [%p..%p) structure element when " 747 "other mapped elements from the same structure weren't mapped " 748 "together with it", (void *) cur_node.host_start, 749 (void *) cur_node.host_end); 750} 751 752attribute_hidden void 753gomp_attach_pointer (struct gomp_device_descr *devicep, 754 struct goacc_asyncqueue *aq, splay_tree mem_map, 755 splay_tree_key n, uintptr_t attach_to, size_t bias, 756 struct gomp_coalesce_buf *cbufp, 757 bool allow_zero_length_array_sections) 758{ 759 struct splay_tree_key_s s; 760 size_t size, idx; 761 762 if (n == NULL) 763 { 764 gomp_mutex_unlock (&devicep->lock); 765 gomp_fatal ("enclosing struct not mapped for attach"); 766 } 767 768 size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *); 769 /* We might have a pointer in a packed struct: however we cannot have more 770 than one such pointer in each pointer-sized portion of the struct, so 771 this is safe. */ 772 idx = (attach_to - n->host_start) / sizeof (void *); 773 774 if (!n->aux) 775 n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux)); 776 777 if (!n->aux->attach_count) 778 n->aux->attach_count 779 = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size); 780 781 if (n->aux->attach_count[idx] < UINTPTR_MAX) 782 n->aux->attach_count[idx]++; 783 else 784 { 785 gomp_mutex_unlock (&devicep->lock); 786 gomp_fatal ("attach count overflow"); 787 } 788 789 if (n->aux->attach_count[idx] == 1) 790 { 791 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to 792 - n->host_start; 793 uintptr_t target = (uintptr_t) *(void **) attach_to; 794 splay_tree_key tn; 795 uintptr_t data; 796 797 if ((void *) target == NULL) 798 { 799 gomp_mutex_unlock (&devicep->lock); 800 gomp_fatal ("attempt to attach null pointer"); 801 } 802 803 s.host_start = target + bias; 804 s.host_end = s.host_start + 1; 805 tn = splay_tree_lookup (mem_map, &s); 806 807 if (!tn) 808 { 809 if (allow_zero_length_array_sections) 810 /* When allowing attachment to zero-length array sections, we 811 allow attaching to NULL pointers when the target region is not 812 mapped. */ 813 data = 0; 814 else 815 { 816 gomp_mutex_unlock (&devicep->lock); 817 gomp_fatal ("pointer target not mapped for attach"); 818 } 819 } 820 else 821 data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start; 822 823 gomp_debug (1, 824 "%s: attaching host %p, target %p (struct base %p) to %p\n", 825 __FUNCTION__, (void *) attach_to, (void *) devptr, 826 (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data); 827 828 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data, 829 sizeof (void *), true, cbufp); 830 } 831 else 832 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, 833 (void *) attach_to, (int) n->aux->attach_count[idx]); 834} 835 836attribute_hidden void 837gomp_detach_pointer (struct gomp_device_descr *devicep, 838 struct goacc_asyncqueue *aq, splay_tree_key n, 839 uintptr_t detach_from, bool finalize, 840 struct gomp_coalesce_buf *cbufp) 841{ 842 size_t idx; 843 844 if (n == NULL) 845 { 846 gomp_mutex_unlock (&devicep->lock); 847 gomp_fatal ("enclosing struct not mapped for detach"); 848 } 849 850 idx = (detach_from - n->host_start) / sizeof (void *); 851 852 if (!n->aux || !n->aux->attach_count) 853 { 854 gomp_mutex_unlock (&devicep->lock); 855 gomp_fatal ("no attachment counters for struct"); 856 } 857 858 if (finalize) 859 n->aux->attach_count[idx] = 1; 860 861 if (n->aux->attach_count[idx] == 0) 862 { 863 gomp_mutex_unlock (&devicep->lock); 864 gomp_fatal ("attach count underflow"); 865 } 866 else 867 n->aux->attach_count[idx]--; 868 869 if (n->aux->attach_count[idx] == 0) 870 { 871 uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from 872 - n->host_start; 873 uintptr_t target = (uintptr_t) *(void **) detach_from; 874 875 gomp_debug (1, 876 "%s: detaching host %p, target %p (struct base %p) to %p\n", 877 __FUNCTION__, (void *) detach_from, (void *) devptr, 878 (void *) (n->tgt->tgt_start + n->tgt_offset), 879 (void *) target); 880 881 gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target, 882 sizeof (void *), true, cbufp); 883 } 884 else 885 gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__, 886 (void *) detach_from, (int) n->aux->attach_count[idx]); 887} 888 889attribute_hidden uintptr_t 890gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i) 891{ 892 if (tgt->list[i].key != NULL) 893 return tgt->list[i].key->tgt->tgt_start 894 + tgt->list[i].key->tgt_offset 895 + tgt->list[i].offset; 896 897 switch (tgt->list[i].offset) 898 { 899 case OFFSET_INLINED: 900 return (uintptr_t) hostaddrs[i]; 901 902 case OFFSET_POINTER: 903 return 0; 904 905 case OFFSET_STRUCT: 906 return tgt->list[i + 1].key->tgt->tgt_start 907 + tgt->list[i + 1].key->tgt_offset 908 + tgt->list[i + 1].offset 909 + (uintptr_t) hostaddrs[i] 910 - (uintptr_t) hostaddrs[i + 1]; 911 912 default: 913 return tgt->tgt_start + tgt->list[i].offset; 914 } 915} 916 917static inline __attribute__((always_inline)) struct target_mem_desc * 918gomp_map_vars_internal (struct gomp_device_descr *devicep, 919 struct goacc_asyncqueue *aq, size_t mapnum, 920 void **hostaddrs, void **devaddrs, size_t *sizes, 921 void *kinds, bool short_mapkind, 922 htab_t *refcount_set, 923 enum gomp_map_vars_kind pragma_kind) 924{ 925 size_t i, tgt_align, tgt_size, not_found_cnt = 0; 926 bool has_firstprivate = false; 927 bool has_always_ptrset = false; 928 bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0; 929 const int rshift = short_mapkind ? 8 : 3; 930 const int typemask = short_mapkind ? 0xff : 0x7; 931 struct splay_tree_s *mem_map = &devicep->mem_map; 932 struct splay_tree_key_s cur_node; 933 struct target_mem_desc *tgt 934 = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); 935 tgt->list_count = mapnum; 936 tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1; 937 tgt->device_descr = devicep; 938 tgt->prev = NULL; 939 struct gomp_coalesce_buf cbuf, *cbufp = NULL; 940 941 if (mapnum == 0) 942 { 943 tgt->tgt_start = 0; 944 tgt->tgt_end = 0; 945 return tgt; 946 } 947 948 tgt_align = sizeof (void *); 949 tgt_size = 0; 950 cbuf.chunks = NULL; 951 cbuf.chunk_cnt = -1; 952 cbuf.use_cnt = 0; 953 cbuf.buf = NULL; 954 if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET) 955 { 956 size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk); 957 cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size); 958 cbuf.chunk_cnt = 0; 959 } 960 if (pragma_kind == GOMP_MAP_VARS_TARGET) 961 { 962 size_t align = 4 * sizeof (void *); 963 tgt_align = align; 964 tgt_size = mapnum * sizeof (void *); 965 cbuf.chunk_cnt = 1; 966 cbuf.use_cnt = 1 + (mapnum > 1); 967 cbuf.chunks[0].start = 0; 968 cbuf.chunks[0].end = tgt_size; 969 } 970 971 gomp_mutex_lock (&devicep->lock); 972 if (devicep->state == GOMP_DEVICE_FINALIZED) 973 { 974 gomp_mutex_unlock (&devicep->lock); 975 free (tgt); 976 return NULL; 977 } 978 979 for (i = 0; i < mapnum; i++) 980 { 981 int kind = get_kind (short_mapkind, kinds, i); 982 bool implicit = get_implicit (short_mapkind, kinds, i); 983 if (hostaddrs[i] == NULL 984 || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT) 985 { 986 tgt->list[i].key = NULL; 987 tgt->list[i].offset = OFFSET_INLINED; 988 continue; 989 } 990 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR 991 || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) 992 { 993 tgt->list[i].key = NULL; 994 if (!not_found_cnt) 995 { 996 /* In OpenMP < 5.0 and OpenACC the mapping has to be done 997 on a separate construct prior to using use_device_{addr,ptr}. 998 In OpenMP 5.0, map directives need to be ordered by the 999 middle-end before the use_device_* clauses. If 1000 !not_found_cnt, all mappings requested (if any) are already 1001 mapped, so use_device_{addr,ptr} can be resolved right away. 1002 Otherwise, if not_found_cnt, gomp_map_lookup might fail 1003 now but would succeed after performing the mappings in the 1004 following loop. We can't defer this always to the second 1005 loop, because it is not even invoked when !not_found_cnt 1006 after the first loop. */ 1007 cur_node.host_start = (uintptr_t) hostaddrs[i]; 1008 cur_node.host_end = cur_node.host_start; 1009 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 1010 if (n != NULL) 1011 { 1012 cur_node.host_start -= n->host_start; 1013 hostaddrs[i] 1014 = (void *) (n->tgt->tgt_start + n->tgt_offset 1015 + cur_node.host_start); 1016 } 1017 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) 1018 { 1019 gomp_mutex_unlock (&devicep->lock); 1020 gomp_fatal ("use_device_ptr pointer wasn't mapped"); 1021 } 1022 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) 1023 /* If not present, continue using the host address. */ 1024 ; 1025 else 1026 __builtin_unreachable (); 1027 tgt->list[i].offset = OFFSET_INLINED; 1028 } 1029 else 1030 tgt->list[i].offset = 0; 1031 continue; 1032 } 1033 else if ((kind & typemask) == GOMP_MAP_STRUCT) 1034 { 1035 size_t first = i + 1; 1036 size_t last = i + sizes[i]; 1037 cur_node.host_start = (uintptr_t) hostaddrs[i]; 1038 cur_node.host_end = (uintptr_t) hostaddrs[last] 1039 + sizes[last]; 1040 tgt->list[i].key = NULL; 1041 tgt->list[i].offset = OFFSET_STRUCT; 1042 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); 1043 if (n == NULL) 1044 { 1045 size_t align = (size_t) 1 << (kind >> rshift); 1046 if (tgt_align < align) 1047 tgt_align = align; 1048 tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start; 1049 tgt_size = (tgt_size + align - 1) & ~(align - 1); 1050 tgt_size += cur_node.host_end - cur_node.host_start; 1051 not_found_cnt += last - i; 1052 for (i = first; i <= last; i++) 1053 { 1054 tgt->list[i].key = NULL; 1055 if (!aq 1056 && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i) 1057 & typemask)) 1058 gomp_coalesce_buf_add (&cbuf, 1059 tgt_size - cur_node.host_end 1060 + (uintptr_t) hostaddrs[i], 1061 sizes[i]); 1062 } 1063 i--; 1064 continue; 1065 } 1066 for (i = first; i <= last; i++) 1067 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, 1068 sizes, kinds, NULL, refcount_set); 1069 i--; 1070 continue; 1071 } 1072 else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER) 1073 { 1074 tgt->list[i].key = NULL; 1075 tgt->list[i].offset = OFFSET_POINTER; 1076 has_firstprivate = true; 1077 continue; 1078 } 1079 else if ((kind & typemask) == GOMP_MAP_ATTACH 1080 || ((kind & typemask) 1081 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION)) 1082 { 1083 tgt->list[i].key = NULL; 1084 has_firstprivate = true; 1085 continue; 1086 } 1087 cur_node.host_start = (uintptr_t) hostaddrs[i]; 1088 if (!GOMP_MAP_POINTER_P (kind & typemask)) 1089 cur_node.host_end = cur_node.host_start + sizes[i]; 1090 else 1091 cur_node.host_end = cur_node.host_start + sizeof (void *); 1092 if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE) 1093 { 1094 tgt->list[i].key = NULL; 1095 1096 size_t align = (size_t) 1 << (kind >> rshift); 1097 if (tgt_align < align) 1098 tgt_align = align; 1099 tgt_size = (tgt_size + align - 1) & ~(align - 1); 1100 if (!aq) 1101 gomp_coalesce_buf_add (&cbuf, tgt_size, 1102 cur_node.host_end - cur_node.host_start); 1103 tgt_size += cur_node.host_end - cur_node.host_start; 1104 has_firstprivate = true; 1105 continue; 1106 } 1107 splay_tree_key n; 1108 if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) 1109 { 1110 n = gomp_map_0len_lookup (mem_map, &cur_node); 1111 if (!n) 1112 { 1113 tgt->list[i].key = NULL; 1114 tgt->list[i].offset = OFFSET_POINTER; 1115 continue; 1116 } 1117 } 1118 else 1119 n = splay_tree_lookup (mem_map, &cur_node); 1120 if (n && n->refcount != REFCOUNT_LINK) 1121 { 1122 int always_to_cnt = 0; 1123 if ((kind & typemask) == GOMP_MAP_TO_PSET) 1124 { 1125 bool has_nullptr = false; 1126 size_t j; 1127 for (j = 0; j < n->tgt->list_count; j++) 1128 if (n->tgt->list[j].key == n) 1129 { 1130 has_nullptr = n->tgt->list[j].has_null_ptr_assoc; 1131 break; 1132 } 1133 if (n->tgt->list_count == 0) 1134 { 1135 /* 'declare target'; assume has_nullptr; it could also be 1136 statically assigned pointer, but that it should be to 1137 the equivalent variable on the host. */ 1138 assert (n->refcount == REFCOUNT_INFINITY); 1139 has_nullptr = true; 1140 } 1141 else 1142 assert (j < n->tgt->list_count); 1143 /* Re-map the data if there is an 'always' modifier or if it a 1144 null pointer was there and non a nonnull has been found; that 1145 permits transparent re-mapping for Fortran array descriptors 1146 which were previously mapped unallocated. */ 1147 for (j = i + 1; j < mapnum; j++) 1148 { 1149 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask; 1150 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind) 1151 && (!has_nullptr 1152 || !GOMP_MAP_POINTER_P (ptr_kind) 1153 || *(void **) hostaddrs[j] == NULL)) 1154 break; 1155 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start 1156 || ((uintptr_t) hostaddrs[j] + sizeof (void *) 1157 > cur_node.host_end)) 1158 break; 1159 else 1160 { 1161 has_always_ptrset = true; 1162 ++always_to_cnt; 1163 } 1164 } 1165 } 1166 gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i], 1167 kind & typemask, always_to_cnt > 0, implicit, 1168 NULL, refcount_set); 1169 i += always_to_cnt; 1170 } 1171 else 1172 { 1173 tgt->list[i].key = NULL; 1174 1175 if ((kind & typemask) == GOMP_MAP_IF_PRESENT) 1176 { 1177 /* Not present, hence, skip entry - including its MAP_POINTER, 1178 when existing. */ 1179 tgt->list[i].offset = OFFSET_POINTER; 1180 if (i + 1 < mapnum 1181 && ((typemask & get_kind (short_mapkind, kinds, i + 1)) 1182 == GOMP_MAP_POINTER)) 1183 { 1184 ++i; 1185 tgt->list[i].key = NULL; 1186 tgt->list[i].offset = 0; 1187 } 1188 continue; 1189 } 1190 size_t align = (size_t) 1 << (kind >> rshift); 1191 not_found_cnt++; 1192 if (tgt_align < align) 1193 tgt_align = align; 1194 tgt_size = (tgt_size + align - 1) & ~(align - 1); 1195 if (!aq 1196 && gomp_to_device_kind_p (kind & typemask)) 1197 gomp_coalesce_buf_add (&cbuf, tgt_size, 1198 cur_node.host_end - cur_node.host_start); 1199 tgt_size += cur_node.host_end - cur_node.host_start; 1200 if ((kind & typemask) == GOMP_MAP_TO_PSET) 1201 { 1202 size_t j; 1203 int kind; 1204 for (j = i + 1; j < mapnum; j++) 1205 if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind, 1206 kinds, j)) & typemask)) 1207 && !GOMP_MAP_ALWAYS_POINTER_P (kind)) 1208 break; 1209 else if ((uintptr_t) hostaddrs[j] < cur_node.host_start 1210 || ((uintptr_t) hostaddrs[j] + sizeof (void *) 1211 > cur_node.host_end)) 1212 break; 1213 else 1214 { 1215 tgt->list[j].key = NULL; 1216 i++; 1217 } 1218 } 1219 } 1220 } 1221 1222 if (devaddrs) 1223 { 1224 if (mapnum != 1) 1225 { 1226 gomp_mutex_unlock (&devicep->lock); 1227 gomp_fatal ("unexpected aggregation"); 1228 } 1229 tgt->to_free = devaddrs[0]; 1230 tgt->tgt_start = (uintptr_t) tgt->to_free; 1231 tgt->tgt_end = tgt->tgt_start + sizes[0]; 1232 } 1233 else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET) 1234 { 1235 /* Allocate tgt_align aligned tgt_size block of memory. */ 1236 /* FIXME: Perhaps change interface to allocate properly aligned 1237 memory. */ 1238 tgt->to_free = devicep->alloc_func (devicep->target_id, 1239 tgt_size + tgt_align - 1); 1240 if (!tgt->to_free) 1241 { 1242 gomp_mutex_unlock (&devicep->lock); 1243 gomp_fatal ("device memory allocation fail"); 1244 } 1245 1246 tgt->tgt_start = (uintptr_t) tgt->to_free; 1247 tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); 1248 tgt->tgt_end = tgt->tgt_start + tgt_size; 1249 1250 if (cbuf.use_cnt == 1) 1251 cbuf.chunk_cnt--; 1252 if (cbuf.chunk_cnt > 0) 1253 { 1254 cbuf.buf 1255 = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start); 1256 if (cbuf.buf) 1257 { 1258 cbuf.tgt = tgt; 1259 cbufp = &cbuf; 1260 } 1261 } 1262 } 1263 else 1264 { 1265 tgt->to_free = NULL; 1266 tgt->tgt_start = 0; 1267 tgt->tgt_end = 0; 1268 } 1269 1270 tgt_size = 0; 1271 if (pragma_kind == GOMP_MAP_VARS_TARGET) 1272 tgt_size = mapnum * sizeof (void *); 1273 1274 tgt->array = NULL; 1275 if (not_found_cnt || has_firstprivate || has_always_ptrset) 1276 { 1277 if (not_found_cnt) 1278 tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); 1279 splay_tree_node array = tgt->array; 1280 size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY; 1281 uintptr_t field_tgt_base = 0; 1282 splay_tree_key field_tgt_structelem_first = NULL; 1283 1284 for (i = 0; i < mapnum; i++) 1285 if (has_always_ptrset 1286 && tgt->list[i].key 1287 && (get_kind (short_mapkind, kinds, i) & typemask) 1288 == GOMP_MAP_TO_PSET) 1289 { 1290 splay_tree_key k = tgt->list[i].key; 1291 bool has_nullptr = false; 1292 size_t j; 1293 for (j = 0; j < k->tgt->list_count; j++) 1294 if (k->tgt->list[j].key == k) 1295 { 1296 has_nullptr = k->tgt->list[j].has_null_ptr_assoc; 1297 break; 1298 } 1299 if (k->tgt->list_count == 0) 1300 has_nullptr = true; 1301 else 1302 assert (j < k->tgt->list_count); 1303 1304 tgt->list[i].has_null_ptr_assoc = false; 1305 for (j = i + 1; j < mapnum; j++) 1306 { 1307 int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask; 1308 if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind) 1309 && (!has_nullptr 1310 || !GOMP_MAP_POINTER_P (ptr_kind) 1311 || *(void **) hostaddrs[j] == NULL)) 1312 break; 1313 else if ((uintptr_t) hostaddrs[j] < k->host_start 1314 || ((uintptr_t) hostaddrs[j] + sizeof (void *) 1315 > k->host_end)) 1316 break; 1317 else 1318 { 1319 if (*(void **) hostaddrs[j] == NULL) 1320 tgt->list[i].has_null_ptr_assoc = true; 1321 tgt->list[j].key = k; 1322 tgt->list[j].copy_from = false; 1323 tgt->list[j].always_copy_from = false; 1324 tgt->list[j].is_attach = false; 1325 gomp_increment_refcount (k, refcount_set); 1326 gomp_map_pointer (k->tgt, aq, 1327 (uintptr_t) *(void **) hostaddrs[j], 1328 k->tgt_offset + ((uintptr_t) hostaddrs[j] 1329 - k->host_start), 1330 sizes[j], cbufp, false); 1331 } 1332 } 1333 i = j - 1; 1334 } 1335 else if (tgt->list[i].key == NULL) 1336 { 1337 int kind = get_kind (short_mapkind, kinds, i); 1338 bool implicit = get_implicit (short_mapkind, kinds, i); 1339 if (hostaddrs[i] == NULL) 1340 continue; 1341 switch (kind & typemask) 1342 { 1343 size_t align, len, first, last; 1344 splay_tree_key n; 1345 case GOMP_MAP_FIRSTPRIVATE: 1346 align = (size_t) 1 << (kind >> rshift); 1347 tgt_size = (tgt_size + align - 1) & ~(align - 1); 1348 tgt->list[i].offset = tgt_size; 1349 len = sizes[i]; 1350 gomp_copy_host2dev (devicep, aq, 1351 (void *) (tgt->tgt_start + tgt_size), 1352 (void *) hostaddrs[i], len, false, cbufp); 1353 tgt_size += len; 1354 continue; 1355 case GOMP_MAP_FIRSTPRIVATE_INT: 1356 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: 1357 continue; 1358 case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT: 1359 /* The OpenACC 'host_data' construct only allows 'use_device' 1360 "mapping" clauses, so in the first loop, 'not_found_cnt' 1361 must always have been zero, so all OpenACC 'use_device' 1362 clauses have already been handled. (We can only easily test 1363 'use_device' with 'if_present' clause here.) */ 1364 assert (tgt->list[i].offset == OFFSET_INLINED); 1365 /* Nevertheless, FALLTHRU to the normal handling, to keep the 1366 code conceptually simple, similar to the first loop. */ 1367 case GOMP_MAP_USE_DEVICE_PTR: 1368 if (tgt->list[i].offset == 0) 1369 { 1370 cur_node.host_start = (uintptr_t) hostaddrs[i]; 1371 cur_node.host_end = cur_node.host_start; 1372 n = gomp_map_lookup (mem_map, &cur_node); 1373 if (n != NULL) 1374 { 1375 cur_node.host_start -= n->host_start; 1376 hostaddrs[i] 1377 = (void *) (n->tgt->tgt_start + n->tgt_offset 1378 + cur_node.host_start); 1379 } 1380 else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR) 1381 { 1382 gomp_mutex_unlock (&devicep->lock); 1383 gomp_fatal ("use_device_ptr pointer wasn't mapped"); 1384 } 1385 else if ((kind & typemask) 1386 == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT) 1387 /* If not present, continue using the host address. */ 1388 ; 1389 else 1390 __builtin_unreachable (); 1391 tgt->list[i].offset = OFFSET_INLINED; 1392 } 1393 continue; 1394 case GOMP_MAP_STRUCT: 1395 first = i + 1; 1396 last = i + sizes[i]; 1397 cur_node.host_start = (uintptr_t) hostaddrs[i]; 1398 cur_node.host_end = (uintptr_t) hostaddrs[last] 1399 + sizes[last]; 1400 if (tgt->list[first].key != NULL) 1401 continue; 1402 n = splay_tree_lookup (mem_map, &cur_node); 1403 if (n == NULL) 1404 { 1405 size_t align = (size_t) 1 << (kind >> rshift); 1406 tgt_size -= (uintptr_t) hostaddrs[first] 1407 - (uintptr_t) hostaddrs[i]; 1408 tgt_size = (tgt_size + align - 1) & ~(align - 1); 1409 tgt_size += (uintptr_t) hostaddrs[first] 1410 - (uintptr_t) hostaddrs[i]; 1411 field_tgt_base = (uintptr_t) hostaddrs[first]; 1412 field_tgt_offset = tgt_size; 1413 field_tgt_clear = last; 1414 field_tgt_structelem_first = NULL; 1415 tgt_size += cur_node.host_end 1416 - (uintptr_t) hostaddrs[first]; 1417 continue; 1418 } 1419 for (i = first; i <= last; i++) 1420 gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs, 1421 sizes, kinds, cbufp, refcount_set); 1422 i--; 1423 continue; 1424 case GOMP_MAP_ALWAYS_POINTER: 1425 cur_node.host_start = (uintptr_t) hostaddrs[i]; 1426 cur_node.host_end = cur_node.host_start + sizeof (void *); 1427 n = splay_tree_lookup (mem_map, &cur_node); 1428 if (n == NULL 1429 || n->host_start > cur_node.host_start 1430 || n->host_end < cur_node.host_end) 1431 { 1432 gomp_mutex_unlock (&devicep->lock); 1433 gomp_fatal ("always pointer not mapped"); 1434 } 1435 if ((get_kind (short_mapkind, kinds, i - 1) & typemask) 1436 != GOMP_MAP_ALWAYS_POINTER) 1437 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1); 1438 if (cur_node.tgt_offset) 1439 cur_node.tgt_offset -= sizes[i]; 1440 gomp_copy_host2dev (devicep, aq, 1441 (void *) (n->tgt->tgt_start 1442 + n->tgt_offset 1443 + cur_node.host_start 1444 - n->host_start), 1445 (void *) &cur_node.tgt_offset, 1446 sizeof (void *), true, cbufp); 1447 cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset 1448 + cur_node.host_start - n->host_start; 1449 continue; 1450 case GOMP_MAP_IF_PRESENT: 1451 /* Not present - otherwise handled above. Skip over its 1452 MAP_POINTER as well. */ 1453 if (i + 1 < mapnum 1454 && ((typemask & get_kind (short_mapkind, kinds, i + 1)) 1455 == GOMP_MAP_POINTER)) 1456 ++i; 1457 continue; 1458 case GOMP_MAP_ATTACH: 1459 case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION: 1460 { 1461 cur_node.host_start = (uintptr_t) hostaddrs[i]; 1462 cur_node.host_end = cur_node.host_start + sizeof (void *); 1463 splay_tree_key n = splay_tree_lookup (mem_map, &cur_node); 1464 if (n != NULL) 1465 { 1466 tgt->list[i].key = n; 1467 tgt->list[i].offset = cur_node.host_start - n->host_start; 1468 tgt->list[i].length = n->host_end - n->host_start; 1469 tgt->list[i].copy_from = false; 1470 tgt->list[i].always_copy_from = false; 1471 tgt->list[i].is_attach = true; 1472 /* OpenACC 'attach'/'detach' doesn't affect 1473 structured/dynamic reference counts ('n->refcount', 1474 'n->dynamic_refcount'). */ 1475 1476 bool zlas 1477 = ((kind & typemask) 1478 == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION); 1479 gomp_attach_pointer (devicep, aq, mem_map, n, 1480 (uintptr_t) hostaddrs[i], sizes[i], 1481 cbufp, zlas); 1482 } 1483 else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0) 1484 { 1485 gomp_mutex_unlock (&devicep->lock); 1486 gomp_fatal ("outer struct not mapped for attach"); 1487 } 1488 continue; 1489 } 1490 default: 1491 break; 1492 } 1493 splay_tree_key k = &array->key; 1494 k->host_start = (uintptr_t) hostaddrs[i]; 1495 if (!GOMP_MAP_POINTER_P (kind & typemask)) 1496 k->host_end = k->host_start + sizes[i]; 1497 else 1498 k->host_end = k->host_start + sizeof (void *); 1499 splay_tree_key n = splay_tree_lookup (mem_map, k); 1500 if (n && n->refcount != REFCOUNT_LINK) 1501 gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i], 1502 kind & typemask, false, implicit, cbufp, 1503 refcount_set); 1504 else 1505 { 1506 k->aux = NULL; 1507 if (n && n->refcount == REFCOUNT_LINK) 1508 { 1509 /* Replace target address of the pointer with target address 1510 of mapped object in the splay tree. */ 1511 splay_tree_remove (mem_map, n); 1512 k->aux 1513 = gomp_malloc_cleared (sizeof (struct splay_tree_aux)); 1514 k->aux->link_key = n; 1515 } 1516 size_t align = (size_t) 1 << (kind >> rshift); 1517 tgt->list[i].key = k; 1518 k->tgt = tgt; 1519 k->refcount = 0; 1520 k->dynamic_refcount = 0; 1521 if (field_tgt_clear != FIELD_TGT_EMPTY) 1522 { 1523 k->tgt_offset = k->host_start - field_tgt_base 1524 + field_tgt_offset; 1525 if (openmp_p) 1526 { 1527 k->refcount = REFCOUNT_STRUCTELEM; 1528 if (field_tgt_structelem_first == NULL) 1529 { 1530 /* Set to first structure element of sequence. */ 1531 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST; 1532 field_tgt_structelem_first = k; 1533 } 1534 else 1535 /* Point to refcount of leading element, but do not 1536 increment again. */ 1537 k->structelem_refcount_ptr 1538 = &field_tgt_structelem_first->structelem_refcount; 1539 1540 if (i == field_tgt_clear) 1541 { 1542 k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST; 1543 field_tgt_structelem_first = NULL; 1544 } 1545 } 1546 if (i == field_tgt_clear) 1547 field_tgt_clear = FIELD_TGT_EMPTY; 1548 } 1549 else 1550 { 1551 tgt_size = (tgt_size + align - 1) & ~(align - 1); 1552 k->tgt_offset = tgt_size; 1553 tgt_size += k->host_end - k->host_start; 1554 } 1555 /* First increment, from 0 to 1. gomp_increment_refcount 1556 encapsulates the different increment cases, so use this 1557 instead of directly setting 1 during initialization. */ 1558 gomp_increment_refcount (k, refcount_set); 1559 1560 tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask); 1561 tgt->list[i].always_copy_from 1562 = GOMP_MAP_ALWAYS_FROM_P (kind & typemask); 1563 tgt->list[i].is_attach = false; 1564 tgt->list[i].offset = 0; 1565 tgt->list[i].length = k->host_end - k->host_start; 1566 tgt->refcount++; 1567 array->left = NULL; 1568 array->right = NULL; 1569 splay_tree_insert (mem_map, array); 1570 switch (kind & typemask) 1571 { 1572 case GOMP_MAP_ALLOC: 1573 case GOMP_MAP_FROM: 1574 case GOMP_MAP_FORCE_ALLOC: 1575 case GOMP_MAP_FORCE_FROM: 1576 case GOMP_MAP_ALWAYS_FROM: 1577 break; 1578 case GOMP_MAP_TO: 1579 case GOMP_MAP_TOFROM: 1580 case GOMP_MAP_FORCE_TO: 1581 case GOMP_MAP_FORCE_TOFROM: 1582 case GOMP_MAP_ALWAYS_TO: 1583 case GOMP_MAP_ALWAYS_TOFROM: 1584 gomp_copy_host2dev (devicep, aq, 1585 (void *) (tgt->tgt_start 1586 + k->tgt_offset), 1587 (void *) k->host_start, 1588 k->host_end - k->host_start, 1589 false, cbufp); 1590 break; 1591 case GOMP_MAP_POINTER: 1592 case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION: 1593 gomp_map_pointer 1594 (tgt, aq, (uintptr_t) *(void **) k->host_start, 1595 k->tgt_offset, sizes[i], cbufp, 1596 ((kind & typemask) 1597 == GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION)); 1598 break; 1599 case GOMP_MAP_TO_PSET: 1600 gomp_copy_host2dev (devicep, aq, 1601 (void *) (tgt->tgt_start 1602 + k->tgt_offset), 1603 (void *) k->host_start, 1604 k->host_end - k->host_start, 1605 false, cbufp); 1606 tgt->list[i].has_null_ptr_assoc = false; 1607 1608 for (j = i + 1; j < mapnum; j++) 1609 { 1610 int ptr_kind = (get_kind (short_mapkind, kinds, j) 1611 & typemask); 1612 if (!GOMP_MAP_POINTER_P (ptr_kind) 1613 && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)) 1614 break; 1615 else if ((uintptr_t) hostaddrs[j] < k->host_start 1616 || ((uintptr_t) hostaddrs[j] + sizeof (void *) 1617 > k->host_end)) 1618 break; 1619 else 1620 { 1621 tgt->list[j].key = k; 1622 tgt->list[j].copy_from = false; 1623 tgt->list[j].always_copy_from = false; 1624 tgt->list[j].is_attach = false; 1625 tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]); 1626 /* For OpenMP, the use of refcount_sets causes 1627 errors if we set k->refcount = 1 above but also 1628 increment it again here, for decrementing will 1629 not properly match, since we decrement only once 1630 for each key's refcount. Therefore avoid this 1631 increment for OpenMP constructs. */ 1632 if (!openmp_p) 1633 gomp_increment_refcount (k, refcount_set); 1634 gomp_map_pointer (tgt, aq, 1635 (uintptr_t) *(void **) hostaddrs[j], 1636 k->tgt_offset 1637 + ((uintptr_t) hostaddrs[j] 1638 - k->host_start), 1639 sizes[j], cbufp, false); 1640 } 1641 } 1642 i = j - 1; 1643 break; 1644 case GOMP_MAP_FORCE_PRESENT: 1645 { 1646 /* We already looked up the memory region above and it 1647 was missing. */ 1648 size_t size = k->host_end - k->host_start; 1649 gomp_mutex_unlock (&devicep->lock); 1650#ifdef HAVE_INTTYPES_H 1651 gomp_fatal ("present clause: !acc_is_present (%p, " 1652 "%"PRIu64" (0x%"PRIx64"))", 1653 (void *) k->host_start, 1654 (uint64_t) size, (uint64_t) size); 1655#else 1656 gomp_fatal ("present clause: !acc_is_present (%p, " 1657 "%lu (0x%lx))", (void *) k->host_start, 1658 (unsigned long) size, (unsigned long) size); 1659#endif 1660 } 1661 break; 1662 case GOMP_MAP_FORCE_DEVICEPTR: 1663 assert (k->host_end - k->host_start == sizeof (void *)); 1664 gomp_copy_host2dev (devicep, aq, 1665 (void *) (tgt->tgt_start 1666 + k->tgt_offset), 1667 (void *) k->host_start, 1668 sizeof (void *), false, cbufp); 1669 break; 1670 default: 1671 gomp_mutex_unlock (&devicep->lock); 1672 gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__, 1673 kind); 1674 } 1675 1676 if (k->aux && k->aux->link_key) 1677 { 1678 /* Set link pointer on target to the device address of the 1679 mapped object. */ 1680 void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset); 1681 /* We intentionally do not use coalescing here, as it's not 1682 data allocated by the current call to this function. */ 1683 gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset, 1684 &tgt_addr, sizeof (void *), true, NULL); 1685 } 1686 array++; 1687 } 1688 } 1689 } 1690 1691 if (pragma_kind == GOMP_MAP_VARS_TARGET) 1692 { 1693 for (i = 0; i < mapnum; i++) 1694 { 1695 cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i); 1696 gomp_copy_host2dev (devicep, aq, 1697 (void *) (tgt->tgt_start + i * sizeof (void *)), 1698 (void *) &cur_node.tgt_offset, sizeof (void *), 1699 true, cbufp); 1700 } 1701 } 1702 1703 if (cbufp) 1704 { 1705 /* See 'gomp_coalesce_buf_add'. */ 1706 assert (!aq); 1707 1708 long c = 0; 1709 for (c = 0; c < cbuf.chunk_cnt; ++c) 1710 gomp_copy_host2dev (devicep, aq, 1711 (void *) (tgt->tgt_start + cbuf.chunks[c].start), 1712 (char *) cbuf.buf + (cbuf.chunks[c].start 1713 - cbuf.chunks[0].start), 1714 cbuf.chunks[c].end - cbuf.chunks[c].start, 1715 true, NULL); 1716 free (cbuf.buf); 1717 cbuf.buf = NULL; 1718 cbufp = NULL; 1719 } 1720 1721 /* If the variable from "omp target enter data" map-list was already mapped, 1722 tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or 1723 gomp_exit_data. */ 1724 if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0) 1725 { 1726 free (tgt); 1727 tgt = NULL; 1728 } 1729 1730 gomp_mutex_unlock (&devicep->lock); 1731 return tgt; 1732} 1733 1734static struct target_mem_desc * 1735gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, 1736 void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, 1737 bool short_mapkind, htab_t *refcount_set, 1738 enum gomp_map_vars_kind pragma_kind) 1739{ 1740 /* This management of a local refcount_set is for convenience of callers 1741 who do not share a refcount_set over multiple map/unmap uses. */ 1742 htab_t local_refcount_set = NULL; 1743 if (refcount_set == NULL) 1744 { 1745 local_refcount_set = htab_create (mapnum); 1746 refcount_set = &local_refcount_set; 1747 } 1748 1749 struct target_mem_desc *tgt; 1750 tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs, 1751 sizes, kinds, short_mapkind, refcount_set, 1752 pragma_kind); 1753 if (local_refcount_set) 1754 htab_free (local_refcount_set); 1755 1756 return tgt; 1757} 1758 1759attribute_hidden struct target_mem_desc * 1760goacc_map_vars (struct gomp_device_descr *devicep, 1761 struct goacc_asyncqueue *aq, size_t mapnum, 1762 void **hostaddrs, void **devaddrs, size_t *sizes, 1763 void *kinds, bool short_mapkind, 1764 enum gomp_map_vars_kind pragma_kind) 1765{ 1766 return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs, 1767 sizes, kinds, short_mapkind, NULL, 1768 GOMP_MAP_VARS_OPENACC | pragma_kind); 1769} 1770 1771static void 1772gomp_unmap_tgt (struct target_mem_desc *tgt) 1773{ 1774 /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ 1775 if (tgt->tgt_end) 1776 gomp_free_device_memory (tgt->device_descr, tgt->to_free); 1777 1778 free (tgt->array); 1779 free (tgt); 1780} 1781 1782static bool 1783gomp_unref_tgt (void *ptr) 1784{ 1785 bool is_tgt_unmapped = false; 1786 1787 struct target_mem_desc *tgt = (struct target_mem_desc *) ptr; 1788 1789 if (tgt->refcount > 1) 1790 tgt->refcount--; 1791 else 1792 { 1793 gomp_unmap_tgt (tgt); 1794 is_tgt_unmapped = true; 1795 } 1796 1797 return is_tgt_unmapped; 1798} 1799 1800static void 1801gomp_unref_tgt_void (void *ptr) 1802{ 1803 (void) gomp_unref_tgt (ptr); 1804} 1805 1806static void 1807gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k) 1808{ 1809 splay_tree_remove (sp, k); 1810 if (k->aux) 1811 { 1812 if (k->aux->link_key) 1813 splay_tree_insert (sp, (splay_tree_node) k->aux->link_key); 1814 if (k->aux->attach_count) 1815 free (k->aux->attach_count); 1816 free (k->aux); 1817 k->aux = NULL; 1818 } 1819} 1820 1821static inline __attribute__((always_inline)) bool 1822gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k, 1823 struct goacc_asyncqueue *aq) 1824{ 1825 bool is_tgt_unmapped = false; 1826 1827 if (REFCOUNT_STRUCTELEM_P (k->refcount)) 1828 { 1829 if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false) 1830 /* Infer the splay_tree_key of the first structelem key using the 1831 pointer to the first structleme_refcount. */ 1832 k = (splay_tree_key) ((char *) k->structelem_refcount_ptr 1833 - offsetof (struct splay_tree_key_s, 1834 structelem_refcount)); 1835 assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount)); 1836 1837 /* The array created by gomp_map_vars is an array of splay_tree_nodes, 1838 with the splay_tree_keys embedded inside. */ 1839 splay_tree_node node = 1840 (splay_tree_node) ((char *) k 1841 - offsetof (struct splay_tree_node_s, key)); 1842 while (true) 1843 { 1844 /* Starting from the _FIRST key, and continue for all following 1845 sibling keys. */ 1846 gomp_remove_splay_tree_key (&devicep->mem_map, k); 1847 if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount)) 1848 break; 1849 else 1850 k = &(++node)->key; 1851 } 1852 } 1853 else 1854 gomp_remove_splay_tree_key (&devicep->mem_map, k); 1855 1856 if (aq) 1857 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void, 1858 (void *) k->tgt); 1859 else 1860 is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt); 1861 return is_tgt_unmapped; 1862} 1863 1864attribute_hidden bool 1865gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) 1866{ 1867 return gomp_remove_var_internal (devicep, k, NULL); 1868} 1869 1870/* Remove a variable asynchronously. This actually removes the variable 1871 mapping immediately, but retains the linked target_mem_desc until the 1872 asynchronous operation has completed (as it may still refer to target 1873 memory). The device lock must be held before entry, and remains locked on 1874 exit. */ 1875 1876attribute_hidden void 1877gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k, 1878 struct goacc_asyncqueue *aq) 1879{ 1880 (void) gomp_remove_var_internal (devicep, k, aq); 1881} 1882 1883/* Unmap variables described by TGT. If DO_COPYFROM is true, copy relevant 1884 variables back from device to host: if it is false, it is assumed that this 1885 has been done already. */ 1886 1887static inline __attribute__((always_inline)) void 1888gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom, 1889 htab_t *refcount_set, struct goacc_asyncqueue *aq) 1890{ 1891 struct gomp_device_descr *devicep = tgt->device_descr; 1892 1893 if (tgt->list_count == 0) 1894 { 1895 free (tgt); 1896 return; 1897 } 1898 1899 gomp_mutex_lock (&devicep->lock); 1900 if (devicep->state == GOMP_DEVICE_FINALIZED) 1901 { 1902 gomp_mutex_unlock (&devicep->lock); 1903 free (tgt->array); 1904 free (tgt); 1905 return; 1906 } 1907 1908 size_t i; 1909 1910 /* We must perform detachments before any copies back to the host. */ 1911 for (i = 0; i < tgt->list_count; i++) 1912 { 1913 splay_tree_key k = tgt->list[i].key; 1914 1915 if (k != NULL && tgt->list[i].is_attach) 1916 gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start 1917 + tgt->list[i].offset, 1918 false, NULL); 1919 } 1920 1921 for (i = 0; i < tgt->list_count; i++) 1922 { 1923 splay_tree_key k = tgt->list[i].key; 1924 if (k == NULL) 1925 continue; 1926 1927 /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference 1928 counts ('n->refcount', 'n->dynamic_refcount'). */ 1929 if (tgt->list[i].is_attach) 1930 continue; 1931 1932 bool do_copy, do_remove; 1933 gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove); 1934 1935 if ((do_copy && do_copyfrom && tgt->list[i].copy_from) 1936 || tgt->list[i].always_copy_from) 1937 gomp_copy_dev2host (devicep, aq, 1938 (void *) (k->host_start + tgt->list[i].offset), 1939 (void *) (k->tgt->tgt_start + k->tgt_offset 1940 + tgt->list[i].offset), 1941 tgt->list[i].length); 1942 if (do_remove) 1943 { 1944 struct target_mem_desc *k_tgt = k->tgt; 1945 bool is_tgt_unmapped = gomp_remove_var (devicep, k); 1946 /* It would be bad if TGT got unmapped while we're still iterating 1947 over its LIST_COUNT, and also expect to use it in the following 1948 code. */ 1949 assert (!is_tgt_unmapped 1950 || k_tgt != tgt); 1951 } 1952 } 1953 1954 if (aq) 1955 devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void, 1956 (void *) tgt); 1957 else 1958 gomp_unref_tgt ((void *) tgt); 1959 1960 gomp_mutex_unlock (&devicep->lock); 1961} 1962 1963static void 1964gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, 1965 htab_t *refcount_set) 1966{ 1967 /* This management of a local refcount_set is for convenience of callers 1968 who do not share a refcount_set over multiple map/unmap uses. */ 1969 htab_t local_refcount_set = NULL; 1970 if (refcount_set == NULL) 1971 { 1972 local_refcount_set = htab_create (tgt->list_count); 1973 refcount_set = &local_refcount_set; 1974 } 1975 1976 gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL); 1977 1978 if (local_refcount_set) 1979 htab_free (local_refcount_set); 1980} 1981 1982attribute_hidden void 1983goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom, 1984 struct goacc_asyncqueue *aq) 1985{ 1986 gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq); 1987} 1988 1989static void 1990gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs, 1991 size_t *sizes, void *kinds, bool short_mapkind) 1992{ 1993 size_t i; 1994 struct splay_tree_key_s cur_node; 1995 const int typemask = short_mapkind ? 0xff : 0x7; 1996 1997 if (!devicep) 1998 return; 1999 2000 if (mapnum == 0) 2001 return; 2002 2003 gomp_mutex_lock (&devicep->lock); 2004 if (devicep->state == GOMP_DEVICE_FINALIZED) 2005 { 2006 gomp_mutex_unlock (&devicep->lock); 2007 return; 2008 } 2009 2010 for (i = 0; i < mapnum; i++) 2011 if (sizes[i]) 2012 { 2013 cur_node.host_start = (uintptr_t) hostaddrs[i]; 2014 cur_node.host_end = cur_node.host_start + sizes[i]; 2015 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); 2016 if (n) 2017 { 2018 int kind = get_kind (short_mapkind, kinds, i); 2019 if (n->host_start > cur_node.host_start 2020 || n->host_end < cur_node.host_end) 2021 { 2022 gomp_mutex_unlock (&devicep->lock); 2023 gomp_fatal ("Trying to update [%p..%p) object when " 2024 "only [%p..%p) is mapped", 2025 (void *) cur_node.host_start, 2026 (void *) cur_node.host_end, 2027 (void *) n->host_start, 2028 (void *) n->host_end); 2029 } 2030 2031 if (n->aux && n->aux->attach_count) 2032 { 2033 uintptr_t addr = cur_node.host_start; 2034 while (addr < cur_node.host_end) 2035 { 2036 /* We have to be careful not to overwrite still attached 2037 pointers during host<->device updates. */ 2038 size_t i = (addr - cur_node.host_start) / sizeof (void *); 2039 if (n->aux->attach_count[i] == 0) 2040 { 2041 void *devaddr = (void *) (n->tgt->tgt_start 2042 + n->tgt_offset 2043 + addr - n->host_start); 2044 if (GOMP_MAP_COPY_TO_P (kind & typemask)) 2045 gomp_copy_host2dev (devicep, NULL, 2046 devaddr, (void *) addr, 2047 sizeof (void *), false, NULL); 2048 if (GOMP_MAP_COPY_FROM_P (kind & typemask)) 2049 gomp_copy_dev2host (devicep, NULL, 2050 (void *) addr, devaddr, 2051 sizeof (void *)); 2052 } 2053 addr += sizeof (void *); 2054 } 2055 } 2056 else 2057 { 2058 void *hostaddr = (void *) cur_node.host_start; 2059 void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset 2060 + cur_node.host_start 2061 - n->host_start); 2062 size_t size = cur_node.host_end - cur_node.host_start; 2063 2064 if (GOMP_MAP_COPY_TO_P (kind & typemask)) 2065 gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size, 2066 false, NULL); 2067 if (GOMP_MAP_COPY_FROM_P (kind & typemask)) 2068 gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size); 2069 } 2070 } 2071 } 2072 gomp_mutex_unlock (&devicep->lock); 2073} 2074 2075/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP. 2076 And insert to splay tree the mapping between addresses from HOST_TABLE and 2077 from loaded target image. We rely in the host and device compiler 2078 emitting variable and functions in the same order. */ 2079 2080static void 2081gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, 2082 const void *host_table, const void *target_data, 2083 bool is_register_lock) 2084{ 2085 void **host_func_table = ((void ***) host_table)[0]; 2086 void **host_funcs_end = ((void ***) host_table)[1]; 2087 void **host_var_table = ((void ***) host_table)[2]; 2088 void **host_vars_end = ((void ***) host_table)[3]; 2089 2090 /* The func table contains only addresses, the var table contains addresses 2091 and corresponding sizes. */ 2092 int num_funcs = host_funcs_end - host_func_table; 2093 int num_vars = (host_vars_end - host_var_table) / 2; 2094 2095 /* Others currently is only 'device_num' */ 2096 int num_others = 1; 2097 2098 /* Load image to device and get target addresses for the image. */ 2099 struct addr_pair *target_table = NULL; 2100 int i, num_target_entries; 2101 2102 num_target_entries 2103 = devicep->load_image_func (devicep->target_id, version, 2104 target_data, &target_table); 2105 2106 if (num_target_entries != num_funcs + num_vars 2107 /* Others (device_num) are included as trailing entries in pair list. */ 2108 && num_target_entries != num_funcs + num_vars + num_others) 2109 { 2110 gomp_mutex_unlock (&devicep->lock); 2111 if (is_register_lock) 2112 gomp_mutex_unlock (®ister_lock); 2113 gomp_fatal ("Cannot map target functions or variables" 2114 " (expected %u, have %u)", num_funcs + num_vars, 2115 num_target_entries); 2116 } 2117 2118 /* Insert host-target address mapping into splay tree. */ 2119 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); 2120 tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); 2121 tgt->refcount = REFCOUNT_INFINITY; 2122 tgt->tgt_start = 0; 2123 tgt->tgt_end = 0; 2124 tgt->to_free = NULL; 2125 tgt->prev = NULL; 2126 tgt->list_count = 0; 2127 tgt->device_descr = devicep; 2128 splay_tree_node array = tgt->array; 2129 2130 for (i = 0; i < num_funcs; i++) 2131 { 2132 splay_tree_key k = &array->key; 2133 k->host_start = (uintptr_t) host_func_table[i]; 2134 k->host_end = k->host_start + 1; 2135 k->tgt = tgt; 2136 k->tgt_offset = target_table[i].start; 2137 k->refcount = REFCOUNT_INFINITY; 2138 k->dynamic_refcount = 0; 2139 k->aux = NULL; 2140 array->left = NULL; 2141 array->right = NULL; 2142 splay_tree_insert (&devicep->mem_map, array); 2143 array++; 2144 } 2145 2146 /* Most significant bit of the size in host and target tables marks 2147 "omp declare target link" variables. */ 2148 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); 2149 const uintptr_t size_mask = ~link_bit; 2150 2151 for (i = 0; i < num_vars; i++) 2152 { 2153 struct addr_pair *target_var = &target_table[num_funcs + i]; 2154 uintptr_t target_size = target_var->end - target_var->start; 2155 bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1]; 2156 2157 if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size) 2158 { 2159 gomp_mutex_unlock (&devicep->lock); 2160 if (is_register_lock) 2161 gomp_mutex_unlock (®ister_lock); 2162 gomp_fatal ("Cannot map target variables (size mismatch)"); 2163 } 2164 2165 splay_tree_key k = &array->key; 2166 k->host_start = (uintptr_t) host_var_table[i * 2]; 2167 k->host_end 2168 = k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); 2169 k->tgt = tgt; 2170 k->tgt_offset = target_var->start; 2171 k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY; 2172 k->dynamic_refcount = 0; 2173 k->aux = NULL; 2174 array->left = NULL; 2175 array->right = NULL; 2176 splay_tree_insert (&devicep->mem_map, array); 2177 array++; 2178 } 2179 2180 /* Last entry is for the on-device 'device_num' variable. Tolerate case 2181 where plugin does not return this entry. */ 2182 if (num_funcs + num_vars < num_target_entries) 2183 { 2184 struct addr_pair *device_num_var = &target_table[num_funcs + num_vars]; 2185 /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR 2186 was found in this image. */ 2187 if (device_num_var->start != 0) 2188 { 2189 /* The index of the devicep within devices[] is regarded as its 2190 'device number', which is different from the per-device type 2191 devicep->target_id. */ 2192 int device_num_val = (int) (devicep - &devices[0]); 2193 if (device_num_var->end - device_num_var->start != sizeof (int)) 2194 { 2195 gomp_mutex_unlock (&devicep->lock); 2196 if (is_register_lock) 2197 gomp_mutex_unlock (®ister_lock); 2198 gomp_fatal ("offload plugin managed 'device_num' not of expected " 2199 "format"); 2200 } 2201 2202 /* Copy device_num value to place on device memory, hereby actually 2203 designating its device number into effect. */ 2204 gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start, 2205 &device_num_val, sizeof (int), false, NULL); 2206 } 2207 } 2208 2209 free (target_table); 2210} 2211 2212/* Unload the mappings described by target_data from device DEVICE_P. 2213 The device must be locked. */ 2214 2215static void 2216gomp_unload_image_from_device (struct gomp_device_descr *devicep, 2217 unsigned version, 2218 const void *host_table, const void *target_data) 2219{ 2220 void **host_func_table = ((void ***) host_table)[0]; 2221 void **host_funcs_end = ((void ***) host_table)[1]; 2222 void **host_var_table = ((void ***) host_table)[2]; 2223 void **host_vars_end = ((void ***) host_table)[3]; 2224 2225 /* The func table contains only addresses, the var table contains addresses 2226 and corresponding sizes. */ 2227 int num_funcs = host_funcs_end - host_func_table; 2228 int num_vars = (host_vars_end - host_var_table) / 2; 2229 2230 struct splay_tree_key_s k; 2231 splay_tree_key node = NULL; 2232 2233 /* Find mapping at start of node array */ 2234 if (num_funcs || num_vars) 2235 { 2236 k.host_start = (num_funcs ? (uintptr_t) host_func_table[0] 2237 : (uintptr_t) host_var_table[0]); 2238 k.host_end = k.host_start + 1; 2239 node = splay_tree_lookup (&devicep->mem_map, &k); 2240 } 2241 2242 if (!devicep->unload_image_func (devicep->target_id, version, target_data)) 2243 { 2244 gomp_mutex_unlock (&devicep->lock); 2245 gomp_fatal ("image unload fail"); 2246 } 2247 2248 /* Remove mappings from splay tree. */ 2249 int i; 2250 for (i = 0; i < num_funcs; i++) 2251 { 2252 k.host_start = (uintptr_t) host_func_table[i]; 2253 k.host_end = k.host_start + 1; 2254 splay_tree_remove (&devicep->mem_map, &k); 2255 } 2256 2257 /* Most significant bit of the size in host and target tables marks 2258 "omp declare target link" variables. */ 2259 const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1); 2260 const uintptr_t size_mask = ~link_bit; 2261 bool is_tgt_unmapped = false; 2262 2263 for (i = 0; i < num_vars; i++) 2264 { 2265 k.host_start = (uintptr_t) host_var_table[i * 2]; 2266 k.host_end 2267 = k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]); 2268 2269 if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1])) 2270 splay_tree_remove (&devicep->mem_map, &k); 2271 else 2272 { 2273 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k); 2274 is_tgt_unmapped = gomp_remove_var (devicep, n); 2275 } 2276 } 2277 2278 if (node && !is_tgt_unmapped) 2279 { 2280 free (node->tgt); 2281 free (node); 2282 } 2283} 2284 2285/* This function should be called from every offload image while loading. 2286 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of 2287 the target, and TARGET_DATA needed by target plugin. */ 2288 2289void 2290GOMP_offload_register_ver (unsigned version, const void *host_table, 2291 int target_type, const void *target_data) 2292{ 2293 int i; 2294 2295 if (GOMP_VERSION_LIB (version) > GOMP_VERSION) 2296 gomp_fatal ("Library too old for offload (version %u < %u)", 2297 GOMP_VERSION, GOMP_VERSION_LIB (version)); 2298 2299 gomp_mutex_lock (®ister_lock); 2300 2301 /* Load image to all initialized devices. */ 2302 for (i = 0; i < num_devices; i++) 2303 { 2304 struct gomp_device_descr *devicep = &devices[i]; 2305 gomp_mutex_lock (&devicep->lock); 2306 if (devicep->type == target_type 2307 && devicep->state == GOMP_DEVICE_INITIALIZED) 2308 gomp_load_image_to_device (devicep, version, 2309 host_table, target_data, true); 2310 gomp_mutex_unlock (&devicep->lock); 2311 } 2312 2313 /* Insert image to array of pending images. */ 2314 offload_images 2315 = gomp_realloc_unlock (offload_images, 2316 (num_offload_images + 1) 2317 * sizeof (struct offload_image_descr)); 2318 offload_images[num_offload_images].version = version; 2319 offload_images[num_offload_images].type = target_type; 2320 offload_images[num_offload_images].host_table = host_table; 2321 offload_images[num_offload_images].target_data = target_data; 2322 2323 num_offload_images++; 2324 gomp_mutex_unlock (®ister_lock); 2325} 2326 2327void 2328GOMP_offload_register (const void *host_table, int target_type, 2329 const void *target_data) 2330{ 2331 GOMP_offload_register_ver (0, host_table, target_type, target_data); 2332} 2333 2334/* This function should be called from every offload image while unloading. 2335 It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of 2336 the target, and TARGET_DATA needed by target plugin. */ 2337 2338void 2339GOMP_offload_unregister_ver (unsigned version, const void *host_table, 2340 int target_type, const void *target_data) 2341{ 2342 int i; 2343 2344 gomp_mutex_lock (®ister_lock); 2345 2346 /* Unload image from all initialized devices. */ 2347 for (i = 0; i < num_devices; i++) 2348 { 2349 struct gomp_device_descr *devicep = &devices[i]; 2350 gomp_mutex_lock (&devicep->lock); 2351 if (devicep->type == target_type 2352 && devicep->state == GOMP_DEVICE_INITIALIZED) 2353 gomp_unload_image_from_device (devicep, version, 2354 host_table, target_data); 2355 gomp_mutex_unlock (&devicep->lock); 2356 } 2357 2358 /* Remove image from array of pending images. */ 2359 for (i = 0; i < num_offload_images; i++) 2360 if (offload_images[i].target_data == target_data) 2361 { 2362 offload_images[i] = offload_images[--num_offload_images]; 2363 break; 2364 } 2365 2366 gomp_mutex_unlock (®ister_lock); 2367} 2368 2369void 2370GOMP_offload_unregister (const void *host_table, int target_type, 2371 const void *target_data) 2372{ 2373 GOMP_offload_unregister_ver (0, host_table, target_type, target_data); 2374} 2375 2376/* This function initializes the target device, specified by DEVICEP. DEVICEP 2377 must be locked on entry, and remains locked on return. */ 2378 2379attribute_hidden void 2380gomp_init_device (struct gomp_device_descr *devicep) 2381{ 2382 int i; 2383 if (!devicep->init_device_func (devicep->target_id)) 2384 { 2385 gomp_mutex_unlock (&devicep->lock); 2386 gomp_fatal ("device initialization failed"); 2387 } 2388 2389 /* Load to device all images registered by the moment. */ 2390 for (i = 0; i < num_offload_images; i++) 2391 { 2392 struct offload_image_descr *image = &offload_images[i]; 2393 if (image->type == devicep->type) 2394 gomp_load_image_to_device (devicep, image->version, 2395 image->host_table, image->target_data, 2396 false); 2397 } 2398 2399 /* Initialize OpenACC asynchronous queues. */ 2400 goacc_init_asyncqueues (devicep); 2401 2402 devicep->state = GOMP_DEVICE_INITIALIZED; 2403} 2404 2405/* This function finalizes the target device, specified by DEVICEP. DEVICEP 2406 must be locked on entry, and remains locked on return. */ 2407 2408attribute_hidden bool 2409gomp_fini_device (struct gomp_device_descr *devicep) 2410{ 2411 bool ret = goacc_fini_asyncqueues (devicep); 2412 ret &= devicep->fini_device_func (devicep->target_id); 2413 devicep->state = GOMP_DEVICE_FINALIZED; 2414 return ret; 2415} 2416 2417attribute_hidden void 2418gomp_unload_device (struct gomp_device_descr *devicep) 2419{ 2420 if (devicep->state == GOMP_DEVICE_INITIALIZED) 2421 { 2422 unsigned i; 2423 2424 /* Unload from device all images registered at the moment. */ 2425 for (i = 0; i < num_offload_images; i++) 2426 { 2427 struct offload_image_descr *image = &offload_images[i]; 2428 if (image->type == devicep->type) 2429 gomp_unload_image_from_device (devicep, image->version, 2430 image->host_table, 2431 image->target_data); 2432 } 2433 } 2434} 2435 2436/* Host fallback for GOMP_target{,_ext} routines. */ 2437 2438static void 2439gomp_target_fallback (void (*fn) (void *), void **hostaddrs, 2440 struct gomp_device_descr *devicep, void **args) 2441{ 2442 struct gomp_thread old_thr, *thr = gomp_thread (); 2443 2444 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY 2445 && devicep != NULL) 2446 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot " 2447 "be used for offloading"); 2448 2449 old_thr = *thr; 2450 memset (thr, '\0', sizeof (*thr)); 2451 if (gomp_places_list) 2452 { 2453 thr->place = old_thr.place; 2454 thr->ts.place_partition_len = gomp_places_list_len; 2455 } 2456 if (args) 2457 while (*args) 2458 { 2459 intptr_t id = (intptr_t) *args++, val; 2460 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) 2461 val = (intptr_t) *args++; 2462 else 2463 val = id >> GOMP_TARGET_ARG_VALUE_SHIFT; 2464 if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL) 2465 continue; 2466 id &= GOMP_TARGET_ARG_ID_MASK; 2467 if (id != GOMP_TARGET_ARG_THREAD_LIMIT) 2468 continue; 2469 val = val > INT_MAX ? INT_MAX : val; 2470 if (val) 2471 gomp_icv (true)->thread_limit_var = val; 2472 break; 2473 } 2474 2475 fn (hostaddrs); 2476 gomp_free_thread (thr); 2477 *thr = old_thr; 2478} 2479 2480/* Calculate alignment and size requirements of a private copy of data shared 2481 as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ 2482 2483static inline void 2484calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, 2485 unsigned short *kinds, size_t *tgt_align, 2486 size_t *tgt_size) 2487{ 2488 size_t i; 2489 for (i = 0; i < mapnum; i++) 2490 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) 2491 { 2492 size_t align = (size_t) 1 << (kinds[i] >> 8); 2493 if (*tgt_align < align) 2494 *tgt_align = align; 2495 *tgt_size = (*tgt_size + align - 1) & ~(align - 1); 2496 *tgt_size += sizes[i]; 2497 } 2498} 2499 2500/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ 2501 2502static inline void 2503copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, 2504 size_t *sizes, unsigned short *kinds, size_t tgt_align, 2505 size_t tgt_size) 2506{ 2507 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); 2508 if (al) 2509 tgt += tgt_align - al; 2510 tgt_size = 0; 2511 size_t i; 2512 for (i = 0; i < mapnum; i++) 2513 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL) 2514 { 2515 size_t align = (size_t) 1 << (kinds[i] >> 8); 2516 tgt_size = (tgt_size + align - 1) & ~(align - 1); 2517 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); 2518 hostaddrs[i] = tgt + tgt_size; 2519 tgt_size = tgt_size + sizes[i]; 2520 } 2521} 2522 2523/* Helper function of GOMP_target{,_ext} routines. */ 2524 2525static void * 2526gomp_get_target_fn_addr (struct gomp_device_descr *devicep, 2527 void (*host_fn) (void *)) 2528{ 2529 if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC) 2530 return (void *) host_fn; 2531 else 2532 { 2533 gomp_mutex_lock (&devicep->lock); 2534 if (devicep->state == GOMP_DEVICE_FINALIZED) 2535 { 2536 gomp_mutex_unlock (&devicep->lock); 2537 return NULL; 2538 } 2539 2540 struct splay_tree_key_s k; 2541 k.host_start = (uintptr_t) host_fn; 2542 k.host_end = k.host_start + 1; 2543 splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); 2544 gomp_mutex_unlock (&devicep->lock); 2545 if (tgt_fn == NULL) 2546 return NULL; 2547 2548 return (void *) tgt_fn->tgt_offset; 2549 } 2550} 2551 2552/* Called when encountering a target directive. If DEVICE 2553 is GOMP_DEVICE_ICV, it means use device-var ICV. If it is 2554 GOMP_DEVICE_HOST_FALLBACK (or any value 2555 larger than last available hw device), use host fallback. 2556 FN is address of host code, UNUSED is part of the current ABI, but 2557 we're not actually using it. HOSTADDRS, SIZES and KINDS are arrays 2558 with MAPNUM entries, with addresses of the host objects, 2559 sizes of the host objects (resp. for pointer kind pointer bias 2560 and assumed sizeof (void *) size) and kinds. */ 2561 2562void 2563GOMP_target (int device, void (*fn) (void *), const void *unused, 2564 size_t mapnum, void **hostaddrs, size_t *sizes, 2565 unsigned char *kinds) 2566{ 2567 struct gomp_device_descr *devicep = resolve_device (device); 2568 2569 void *fn_addr; 2570 if (devicep == NULL 2571 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2572 /* All shared memory devices should use the GOMP_target_ext function. */ 2573 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM 2574 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) 2575 return gomp_target_fallback (fn, hostaddrs, devicep, NULL); 2576 2577 htab_t refcount_set = htab_create (mapnum); 2578 struct target_mem_desc *tgt_vars 2579 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, 2580 &refcount_set, GOMP_MAP_VARS_TARGET); 2581 devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, 2582 NULL); 2583 htab_clear (refcount_set); 2584 gomp_unmap_vars (tgt_vars, true, &refcount_set); 2585 htab_free (refcount_set); 2586} 2587 2588static inline unsigned int 2589clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags) 2590{ 2591 /* If we cannot run asynchronously, simply ignore nowait. */ 2592 if (devicep != NULL && devicep->async_run_func == NULL) 2593 flags &= ~GOMP_TARGET_FLAG_NOWAIT; 2594 2595 return flags; 2596} 2597 2598/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, 2599 and several arguments have been added: 2600 FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. 2601 DEPEND is array of dependencies, see GOMP_task for details. 2602 2603 ARGS is a pointer to an array consisting of a variable number of both 2604 device-independent and device-specific arguments, which can take one two 2605 elements where the first specifies for which device it is intended, the type 2606 and optionally also the value. If the value is not present in the first 2607 one, the whole second element the actual value. The last element of the 2608 array is a single NULL. Among the device independent can be for example 2609 NUM_TEAMS and THREAD_LIMIT. 2610 2611 NUM_TEAMS is positive if GOMP_teams will be called in the body with 2612 that value, or 1 if teams construct is not present, or 0, if 2613 teams construct does not have num_teams clause and so the choice is 2614 implementation defined, and -1 if it can't be determined on the host 2615 what value will GOMP_teams have on the device. 2616 THREAD_LIMIT similarly is positive if GOMP_teams will be called in the 2617 body with that value, or 0, if teams construct does not have thread_limit 2618 clause or the teams construct is not present, or -1 if it can't be 2619 determined on the host what value will GOMP_teams have on the device. */ 2620 2621void 2622GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, 2623 void **hostaddrs, size_t *sizes, unsigned short *kinds, 2624 unsigned int flags, void **depend, void **args) 2625{ 2626 struct gomp_device_descr *devicep = resolve_device (device); 2627 size_t tgt_align = 0, tgt_size = 0; 2628 bool fpc_done = false; 2629 2630 flags = clear_unsupported_flags (devicep, flags); 2631 2632 if (flags & GOMP_TARGET_FLAG_NOWAIT) 2633 { 2634 struct gomp_thread *thr = gomp_thread (); 2635 /* Create a team if we don't have any around, as nowait 2636 target tasks make sense to run asynchronously even when 2637 outside of any parallel. */ 2638 if (__builtin_expect (thr->ts.team == NULL, 0)) 2639 { 2640 struct gomp_team *team = gomp_new_team (1); 2641 struct gomp_task *task = thr->task; 2642 struct gomp_task **implicit_task = &task; 2643 struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv; 2644 team->prev_ts = thr->ts; 2645 thr->ts.team = team; 2646 thr->ts.team_id = 0; 2647 thr->ts.work_share = &team->work_shares[0]; 2648 thr->ts.last_work_share = NULL; 2649#ifdef HAVE_SYNC_BUILTINS 2650 thr->ts.single_count = 0; 2651#endif 2652 thr->ts.static_trip = 0; 2653 thr->task = &team->implicit_task[0]; 2654 gomp_init_task (thr->task, NULL, icv); 2655 while (*implicit_task 2656 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT) 2657 implicit_task = &(*implicit_task)->parent; 2658 if (*implicit_task) 2659 { 2660 thr->task = *implicit_task; 2661 gomp_end_task (); 2662 free (*implicit_task); 2663 thr->task = &team->implicit_task[0]; 2664 } 2665 else 2666 pthread_setspecific (gomp_thread_destructor, thr); 2667 if (implicit_task != &task) 2668 { 2669 *implicit_task = thr->task; 2670 thr->task = task; 2671 } 2672 } 2673 if (thr->ts.team 2674 && !thr->task->final_task) 2675 { 2676 gomp_create_target_task (devicep, fn, mapnum, hostaddrs, 2677 sizes, kinds, flags, depend, args, 2678 GOMP_TARGET_TASK_BEFORE_MAP); 2679 return; 2680 } 2681 } 2682 2683 /* If there are depend clauses, but nowait is not present 2684 (or we are in a final task), block the parent task until the 2685 dependencies are resolved and then just continue with the rest 2686 of the function as if it is a merged task. */ 2687 if (depend != NULL) 2688 { 2689 struct gomp_thread *thr = gomp_thread (); 2690 if (thr->task && thr->task->depend_hash) 2691 { 2692 /* If we might need to wait, copy firstprivate now. */ 2693 calculate_firstprivate_requirements (mapnum, sizes, kinds, 2694 &tgt_align, &tgt_size); 2695 if (tgt_align) 2696 { 2697 char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 2698 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, 2699 tgt_align, tgt_size); 2700 } 2701 fpc_done = true; 2702 gomp_task_maybe_wait_for_dependencies (depend); 2703 } 2704 } 2705 2706 void *fn_addr; 2707 if (devicep == NULL 2708 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2709 || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)) 2710 || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) 2711 { 2712 if (!fpc_done) 2713 { 2714 calculate_firstprivate_requirements (mapnum, sizes, kinds, 2715 &tgt_align, &tgt_size); 2716 if (tgt_align) 2717 { 2718 char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 2719 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, 2720 tgt_align, tgt_size); 2721 } 2722 } 2723 gomp_target_fallback (fn, hostaddrs, devicep, args); 2724 return; 2725 } 2726 2727 struct target_mem_desc *tgt_vars; 2728 htab_t refcount_set = NULL; 2729 2730 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2731 { 2732 if (!fpc_done) 2733 { 2734 calculate_firstprivate_requirements (mapnum, sizes, kinds, 2735 &tgt_align, &tgt_size); 2736 if (tgt_align) 2737 { 2738 char *tgt = gomp_alloca (tgt_size + tgt_align - 1); 2739 copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, 2740 tgt_align, tgt_size); 2741 } 2742 } 2743 tgt_vars = NULL; 2744 } 2745 else 2746 { 2747 refcount_set = htab_create (mapnum); 2748 tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, 2749 true, &refcount_set, GOMP_MAP_VARS_TARGET); 2750 } 2751 devicep->run_func (devicep->target_id, fn_addr, 2752 tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, 2753 args); 2754 if (tgt_vars) 2755 { 2756 htab_clear (refcount_set); 2757 gomp_unmap_vars (tgt_vars, true, &refcount_set); 2758 } 2759 if (refcount_set) 2760 htab_free (refcount_set); 2761} 2762 2763/* Host fallback for GOMP_target_data{,_ext} routines. */ 2764 2765static void 2766gomp_target_data_fallback (struct gomp_device_descr *devicep) 2767{ 2768 struct gomp_task_icv *icv = gomp_icv (false); 2769 2770 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY 2771 && devicep != NULL) 2772 gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot " 2773 "be used for offloading"); 2774 2775 if (icv->target_data) 2776 { 2777 /* Even when doing a host fallback, if there are any active 2778 #pragma omp target data constructs, need to remember the 2779 new #pragma omp target data, otherwise GOMP_target_end_data 2780 would get out of sync. */ 2781 struct target_mem_desc *tgt 2782 = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, 2783 NULL, GOMP_MAP_VARS_DATA); 2784 tgt->prev = icv->target_data; 2785 icv->target_data = tgt; 2786 } 2787} 2788 2789void 2790GOMP_target_data (int device, const void *unused, size_t mapnum, 2791 void **hostaddrs, size_t *sizes, unsigned char *kinds) 2792{ 2793 struct gomp_device_descr *devicep = resolve_device (device); 2794 2795 if (devicep == NULL 2796 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2797 || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)) 2798 return gomp_target_data_fallback (devicep); 2799 2800 struct target_mem_desc *tgt 2801 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, 2802 NULL, GOMP_MAP_VARS_DATA); 2803 struct gomp_task_icv *icv = gomp_icv (true); 2804 tgt->prev = icv->target_data; 2805 icv->target_data = tgt; 2806} 2807 2808void 2809GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, 2810 size_t *sizes, unsigned short *kinds) 2811{ 2812 struct gomp_device_descr *devicep = resolve_device (device); 2813 2814 if (devicep == NULL 2815 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2816 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2817 return gomp_target_data_fallback (devicep); 2818 2819 struct target_mem_desc *tgt 2820 = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, 2821 NULL, GOMP_MAP_VARS_DATA); 2822 struct gomp_task_icv *icv = gomp_icv (true); 2823 tgt->prev = icv->target_data; 2824 icv->target_data = tgt; 2825} 2826 2827void 2828GOMP_target_end_data (void) 2829{ 2830 struct gomp_task_icv *icv = gomp_icv (false); 2831 if (icv->target_data) 2832 { 2833 struct target_mem_desc *tgt = icv->target_data; 2834 icv->target_data = tgt->prev; 2835 gomp_unmap_vars (tgt, true, NULL); 2836 } 2837} 2838 2839void 2840GOMP_target_update (int device, const void *unused, size_t mapnum, 2841 void **hostaddrs, size_t *sizes, unsigned char *kinds) 2842{ 2843 struct gomp_device_descr *devicep = resolve_device (device); 2844 2845 if (devicep == NULL 2846 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2847 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2848 return; 2849 2850 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); 2851} 2852 2853void 2854GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, 2855 size_t *sizes, unsigned short *kinds, 2856 unsigned int flags, void **depend) 2857{ 2858 struct gomp_device_descr *devicep = resolve_device (device); 2859 2860 /* If there are depend clauses, but nowait is not present, 2861 block the parent task until the dependencies are resolved 2862 and then just continue with the rest of the function as if it 2863 is a merged task. Until we are able to schedule task during 2864 variable mapping or unmapping, ignore nowait if depend clauses 2865 are not present. */ 2866 if (depend != NULL) 2867 { 2868 struct gomp_thread *thr = gomp_thread (); 2869 if (thr->task && thr->task->depend_hash) 2870 { 2871 if ((flags & GOMP_TARGET_FLAG_NOWAIT) 2872 && thr->ts.team 2873 && !thr->task->final_task) 2874 { 2875 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, 2876 mapnum, hostaddrs, sizes, kinds, 2877 flags | GOMP_TARGET_FLAG_UPDATE, 2878 depend, NULL, GOMP_TARGET_TASK_DATA)) 2879 return; 2880 } 2881 else 2882 { 2883 struct gomp_team *team = thr->ts.team; 2884 /* If parallel or taskgroup has been cancelled, don't start new 2885 tasks. */ 2886 if (__builtin_expect (gomp_cancel_var, 0) && team) 2887 { 2888 if (gomp_team_barrier_cancelled (&team->barrier)) 2889 return; 2890 if (thr->task->taskgroup) 2891 { 2892 if (thr->task->taskgroup->cancelled) 2893 return; 2894 if (thr->task->taskgroup->workshare 2895 && thr->task->taskgroup->prev 2896 && thr->task->taskgroup->prev->cancelled) 2897 return; 2898 } 2899 } 2900 2901 gomp_task_maybe_wait_for_dependencies (depend); 2902 } 2903 } 2904 } 2905 2906 if (devicep == NULL 2907 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 2908 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 2909 return; 2910 2911 struct gomp_thread *thr = gomp_thread (); 2912 struct gomp_team *team = thr->ts.team; 2913 /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 2914 if (__builtin_expect (gomp_cancel_var, 0) && team) 2915 { 2916 if (gomp_team_barrier_cancelled (&team->barrier)) 2917 return; 2918 if (thr->task->taskgroup) 2919 { 2920 if (thr->task->taskgroup->cancelled) 2921 return; 2922 if (thr->task->taskgroup->workshare 2923 && thr->task->taskgroup->prev 2924 && thr->task->taskgroup->prev->cancelled) 2925 return; 2926 } 2927 } 2928 2929 gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true); 2930} 2931 2932static void 2933gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum, 2934 void **hostaddrs, size_t *sizes, unsigned short *kinds, 2935 htab_t *refcount_set) 2936{ 2937 const int typemask = 0xff; 2938 size_t i; 2939 gomp_mutex_lock (&devicep->lock); 2940 if (devicep->state == GOMP_DEVICE_FINALIZED) 2941 { 2942 gomp_mutex_unlock (&devicep->lock); 2943 return; 2944 } 2945 2946 for (i = 0; i < mapnum; i++) 2947 if ((kinds[i] & typemask) == GOMP_MAP_DETACH) 2948 { 2949 struct splay_tree_key_s cur_node; 2950 cur_node.host_start = (uintptr_t) hostaddrs[i]; 2951 cur_node.host_end = cur_node.host_start + sizeof (void *); 2952 splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node); 2953 2954 if (n) 2955 gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i], 2956 false, NULL); 2957 } 2958 2959 int nrmvars = 0; 2960 splay_tree_key remove_vars[mapnum]; 2961 2962 for (i = 0; i < mapnum; i++) 2963 { 2964 struct splay_tree_key_s cur_node; 2965 unsigned char kind = kinds[i] & typemask; 2966 switch (kind) 2967 { 2968 case GOMP_MAP_FROM: 2969 case GOMP_MAP_ALWAYS_FROM: 2970 case GOMP_MAP_DELETE: 2971 case GOMP_MAP_RELEASE: 2972 case GOMP_MAP_ZERO_LEN_ARRAY_SECTION: 2973 case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION: 2974 cur_node.host_start = (uintptr_t) hostaddrs[i]; 2975 cur_node.host_end = cur_node.host_start + sizes[i]; 2976 splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION 2977 || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION) 2978 ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node) 2979 : splay_tree_lookup (&devicep->mem_map, &cur_node); 2980 if (!k) 2981 continue; 2982 2983 bool delete_p = (kind == GOMP_MAP_DELETE 2984 || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION); 2985 bool do_copy, do_remove; 2986 gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy, 2987 &do_remove); 2988 2989 if ((kind == GOMP_MAP_FROM && do_copy) 2990 || kind == GOMP_MAP_ALWAYS_FROM) 2991 { 2992 if (k->aux && k->aux->attach_count) 2993 { 2994 /* We have to be careful not to overwrite still attached 2995 pointers during the copyback to host. */ 2996 uintptr_t addr = k->host_start; 2997 while (addr < k->host_end) 2998 { 2999 size_t i = (addr - k->host_start) / sizeof (void *); 3000 if (k->aux->attach_count[i] == 0) 3001 gomp_copy_dev2host (devicep, NULL, (void *) addr, 3002 (void *) (k->tgt->tgt_start 3003 + k->tgt_offset 3004 + addr - k->host_start), 3005 sizeof (void *)); 3006 addr += sizeof (void *); 3007 } 3008 } 3009 else 3010 gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start, 3011 (void *) (k->tgt->tgt_start + k->tgt_offset 3012 + cur_node.host_start 3013 - k->host_start), 3014 cur_node.host_end - cur_node.host_start); 3015 } 3016 3017 /* Structure elements lists are removed altogether at once, which 3018 may cause immediate deallocation of the target_mem_desc, causing 3019 errors if we still have following element siblings to copy back. 3020 While we're at it, it also seems more disciplined to simply 3021 queue all removals together for processing below. 3022 3023 Structured block unmapping (i.e. gomp_unmap_vars_internal) should 3024 not have this problem, since they maintain an additional 3025 tgt->refcount = 1 reference to the target_mem_desc to start with. 3026 */ 3027 if (do_remove) 3028 remove_vars[nrmvars++] = k; 3029 break; 3030 3031 case GOMP_MAP_DETACH: 3032 break; 3033 default: 3034 gomp_mutex_unlock (&devicep->lock); 3035 gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x", 3036 kind); 3037 } 3038 } 3039 3040 for (int i = 0; i < nrmvars; i++) 3041 gomp_remove_var (devicep, remove_vars[i]); 3042 3043 gomp_mutex_unlock (&devicep->lock); 3044} 3045 3046void 3047GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, 3048 size_t *sizes, unsigned short *kinds, 3049 unsigned int flags, void **depend) 3050{ 3051 struct gomp_device_descr *devicep = resolve_device (device); 3052 3053 /* If there are depend clauses, but nowait is not present, 3054 block the parent task until the dependencies are resolved 3055 and then just continue with the rest of the function as if it 3056 is a merged task. Until we are able to schedule task during 3057 variable mapping or unmapping, ignore nowait if depend clauses 3058 are not present. */ 3059 if (depend != NULL) 3060 { 3061 struct gomp_thread *thr = gomp_thread (); 3062 if (thr->task && thr->task->depend_hash) 3063 { 3064 if ((flags & GOMP_TARGET_FLAG_NOWAIT) 3065 && thr->ts.team 3066 && !thr->task->final_task) 3067 { 3068 if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, 3069 mapnum, hostaddrs, sizes, kinds, 3070 flags, depend, NULL, 3071 GOMP_TARGET_TASK_DATA)) 3072 return; 3073 } 3074 else 3075 { 3076 struct gomp_team *team = thr->ts.team; 3077 /* If parallel or taskgroup has been cancelled, don't start new 3078 tasks. */ 3079 if (__builtin_expect (gomp_cancel_var, 0) && team) 3080 { 3081 if (gomp_team_barrier_cancelled (&team->barrier)) 3082 return; 3083 if (thr->task->taskgroup) 3084 { 3085 if (thr->task->taskgroup->cancelled) 3086 return; 3087 if (thr->task->taskgroup->workshare 3088 && thr->task->taskgroup->prev 3089 && thr->task->taskgroup->prev->cancelled) 3090 return; 3091 } 3092 } 3093 3094 gomp_task_maybe_wait_for_dependencies (depend); 3095 } 3096 } 3097 } 3098 3099 if (devicep == NULL 3100 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3101 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3102 return; 3103 3104 struct gomp_thread *thr = gomp_thread (); 3105 struct gomp_team *team = thr->ts.team; 3106 /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 3107 if (__builtin_expect (gomp_cancel_var, 0) && team) 3108 { 3109 if (gomp_team_barrier_cancelled (&team->barrier)) 3110 return; 3111 if (thr->task->taskgroup) 3112 { 3113 if (thr->task->taskgroup->cancelled) 3114 return; 3115 if (thr->task->taskgroup->workshare 3116 && thr->task->taskgroup->prev 3117 && thr->task->taskgroup->prev->cancelled) 3118 return; 3119 } 3120 } 3121 3122 htab_t refcount_set = htab_create (mapnum); 3123 3124 /* The variables are mapped separately such that they can be released 3125 independently. */ 3126 size_t i, j; 3127 if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) 3128 for (i = 0; i < mapnum; i++) 3129 if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT) 3130 { 3131 gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i], 3132 &kinds[i], true, &refcount_set, 3133 GOMP_MAP_VARS_ENTER_DATA); 3134 i += sizes[i]; 3135 } 3136 else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET) 3137 { 3138 for (j = i + 1; j < mapnum; j++) 3139 if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff) 3140 && !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff)) 3141 break; 3142 gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i], 3143 &kinds[i], true, &refcount_set, 3144 GOMP_MAP_VARS_ENTER_DATA); 3145 i += j - i - 1; 3146 } 3147 else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH) 3148 { 3149 /* An attach operation must be processed together with the mapped 3150 base-pointer list item. */ 3151 gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i], 3152 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA); 3153 i += 1; 3154 } 3155 else 3156 gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i], 3157 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA); 3158 else 3159 gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set); 3160 htab_free (refcount_set); 3161} 3162 3163bool 3164gomp_target_task_fn (void *data) 3165{ 3166 struct gomp_target_task *ttask = (struct gomp_target_task *) data; 3167 struct gomp_device_descr *devicep = ttask->devicep; 3168 3169 if (ttask->fn != NULL) 3170 { 3171 void *fn_addr; 3172 if (devicep == NULL 3173 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3174 || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn)) 3175 || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) 3176 { 3177 ttask->state = GOMP_TARGET_TASK_FALLBACK; 3178 gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep, 3179 ttask->args); 3180 return false; 3181 } 3182 3183 if (ttask->state == GOMP_TARGET_TASK_FINISHED) 3184 { 3185 if (ttask->tgt) 3186 gomp_unmap_vars (ttask->tgt, true, NULL); 3187 return false; 3188 } 3189 3190 void *actual_arguments; 3191 if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3192 { 3193 ttask->tgt = NULL; 3194 actual_arguments = ttask->hostaddrs; 3195 } 3196 else 3197 { 3198 ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, 3199 NULL, ttask->sizes, ttask->kinds, true, 3200 NULL, GOMP_MAP_VARS_TARGET); 3201 actual_arguments = (void *) ttask->tgt->tgt_start; 3202 } 3203 ttask->state = GOMP_TARGET_TASK_READY_TO_RUN; 3204 3205 assert (devicep->async_run_func); 3206 devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments, 3207 ttask->args, (void *) ttask); 3208 return true; 3209 } 3210 else if (devicep == NULL 3211 || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3212 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3213 return false; 3214 3215 size_t i; 3216 if (ttask->flags & GOMP_TARGET_FLAG_UPDATE) 3217 gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, 3218 ttask->kinds, true); 3219 else 3220 { 3221 htab_t refcount_set = htab_create (ttask->mapnum); 3222 if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0) 3223 for (i = 0; i < ttask->mapnum; i++) 3224 if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT) 3225 { 3226 gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i], 3227 NULL, &ttask->sizes[i], &ttask->kinds[i], true, 3228 &refcount_set, GOMP_MAP_VARS_ENTER_DATA); 3229 i += ttask->sizes[i]; 3230 } 3231 else 3232 gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i], 3233 &ttask->kinds[i], true, &refcount_set, 3234 GOMP_MAP_VARS_ENTER_DATA); 3235 else 3236 gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes, 3237 ttask->kinds, &refcount_set); 3238 htab_free (refcount_set); 3239 } 3240 return false; 3241} 3242 3243void 3244GOMP_teams (unsigned int num_teams, unsigned int thread_limit) 3245{ 3246 if (thread_limit) 3247 { 3248 struct gomp_task_icv *icv = gomp_icv (true); 3249 icv->thread_limit_var 3250 = thread_limit > INT_MAX ? UINT_MAX : thread_limit; 3251 } 3252 (void) num_teams; 3253} 3254 3255bool 3256GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high, 3257 unsigned int thread_limit, bool first) 3258{ 3259 struct gomp_thread *thr = gomp_thread (); 3260 if (first) 3261 { 3262 if (thread_limit) 3263 { 3264 struct gomp_task_icv *icv = gomp_icv (true); 3265 icv->thread_limit_var 3266 = thread_limit > INT_MAX ? UINT_MAX : thread_limit; 3267 } 3268 (void) num_teams_high; 3269 if (num_teams_low == 0) 3270 num_teams_low = 1; 3271 thr->num_teams = num_teams_low - 1; 3272 thr->team_num = 0; 3273 } 3274 else if (thr->team_num == thr->num_teams) 3275 return false; 3276 else 3277 ++thr->team_num; 3278 return true; 3279} 3280 3281void * 3282omp_target_alloc (size_t size, int device_num) 3283{ 3284 if (device_num == gomp_get_num_devices ()) 3285 return malloc (size); 3286 3287 if (device_num < 0) 3288 return NULL; 3289 3290 struct gomp_device_descr *devicep = resolve_device (device_num); 3291 if (devicep == NULL) 3292 return NULL; 3293 3294 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3295 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3296 return malloc (size); 3297 3298 gomp_mutex_lock (&devicep->lock); 3299 void *ret = devicep->alloc_func (devicep->target_id, size); 3300 gomp_mutex_unlock (&devicep->lock); 3301 return ret; 3302} 3303 3304void 3305omp_target_free (void *device_ptr, int device_num) 3306{ 3307 if (device_ptr == NULL) 3308 return; 3309 3310 if (device_num == gomp_get_num_devices ()) 3311 { 3312 free (device_ptr); 3313 return; 3314 } 3315 3316 if (device_num < 0) 3317 return; 3318 3319 struct gomp_device_descr *devicep = resolve_device (device_num); 3320 if (devicep == NULL) 3321 return; 3322 3323 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3324 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3325 { 3326 free (device_ptr); 3327 return; 3328 } 3329 3330 gomp_mutex_lock (&devicep->lock); 3331 gomp_free_device_memory (devicep, device_ptr); 3332 gomp_mutex_unlock (&devicep->lock); 3333} 3334 3335int 3336omp_target_is_present (const void *ptr, int device_num) 3337{ 3338 if (ptr == NULL) 3339 return 1; 3340 3341 if (device_num == gomp_get_num_devices ()) 3342 return 1; 3343 3344 if (device_num < 0) 3345 return 0; 3346 3347 struct gomp_device_descr *devicep = resolve_device (device_num); 3348 if (devicep == NULL) 3349 return 0; 3350 3351 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3352 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3353 return 1; 3354 3355 gomp_mutex_lock (&devicep->lock); 3356 struct splay_tree_s *mem_map = &devicep->mem_map; 3357 struct splay_tree_key_s cur_node; 3358 3359 cur_node.host_start = (uintptr_t) ptr; 3360 cur_node.host_end = cur_node.host_start; 3361 splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node); 3362 int ret = n != NULL; 3363 gomp_mutex_unlock (&devicep->lock); 3364 return ret; 3365} 3366 3367int 3368omp_target_memcpy (void *dst, const void *src, size_t length, 3369 size_t dst_offset, size_t src_offset, int dst_device_num, 3370 int src_device_num) 3371{ 3372 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; 3373 bool ret; 3374 3375 if (dst_device_num != gomp_get_num_devices ()) 3376 { 3377 if (dst_device_num < 0) 3378 return EINVAL; 3379 3380 dst_devicep = resolve_device (dst_device_num); 3381 if (dst_devicep == NULL) 3382 return EINVAL; 3383 3384 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3385 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3386 dst_devicep = NULL; 3387 } 3388 if (src_device_num != num_devices_openmp) 3389 { 3390 if (src_device_num < 0) 3391 return EINVAL; 3392 3393 src_devicep = resolve_device (src_device_num); 3394 if (src_devicep == NULL) 3395 return EINVAL; 3396 3397 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3398 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3399 src_devicep = NULL; 3400 } 3401 if (src_devicep == NULL && dst_devicep == NULL) 3402 { 3403 memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length); 3404 return 0; 3405 } 3406 if (src_devicep == NULL) 3407 { 3408 gomp_mutex_lock (&dst_devicep->lock); 3409 ret = dst_devicep->host2dev_func (dst_devicep->target_id, 3410 (char *) dst + dst_offset, 3411 (char *) src + src_offset, length); 3412 gomp_mutex_unlock (&dst_devicep->lock); 3413 return (ret ? 0 : EINVAL); 3414 } 3415 if (dst_devicep == NULL) 3416 { 3417 gomp_mutex_lock (&src_devicep->lock); 3418 ret = src_devicep->dev2host_func (src_devicep->target_id, 3419 (char *) dst + dst_offset, 3420 (char *) src + src_offset, length); 3421 gomp_mutex_unlock (&src_devicep->lock); 3422 return (ret ? 0 : EINVAL); 3423 } 3424 if (src_devicep == dst_devicep) 3425 { 3426 gomp_mutex_lock (&src_devicep->lock); 3427 ret = src_devicep->dev2dev_func (src_devicep->target_id, 3428 (char *) dst + dst_offset, 3429 (char *) src + src_offset, length); 3430 gomp_mutex_unlock (&src_devicep->lock); 3431 return (ret ? 0 : EINVAL); 3432 } 3433 return EINVAL; 3434} 3435 3436static int 3437omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size, 3438 int num_dims, const size_t *volume, 3439 const size_t *dst_offsets, 3440 const size_t *src_offsets, 3441 const size_t *dst_dimensions, 3442 const size_t *src_dimensions, 3443 struct gomp_device_descr *dst_devicep, 3444 struct gomp_device_descr *src_devicep) 3445{ 3446 size_t dst_slice = element_size; 3447 size_t src_slice = element_size; 3448 size_t j, dst_off, src_off, length; 3449 int i, ret; 3450 3451 if (num_dims == 1) 3452 { 3453 if (__builtin_mul_overflow (element_size, volume[0], &length) 3454 || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off) 3455 || __builtin_mul_overflow (element_size, src_offsets[0], &src_off)) 3456 return EINVAL; 3457 if (dst_devicep == NULL && src_devicep == NULL) 3458 { 3459 memcpy ((char *) dst + dst_off, (const char *) src + src_off, 3460 length); 3461 ret = 1; 3462 } 3463 else if (src_devicep == NULL) 3464 ret = dst_devicep->host2dev_func (dst_devicep->target_id, 3465 (char *) dst + dst_off, 3466 (const char *) src + src_off, 3467 length); 3468 else if (dst_devicep == NULL) 3469 ret = src_devicep->dev2host_func (src_devicep->target_id, 3470 (char *) dst + dst_off, 3471 (const char *) src + src_off, 3472 length); 3473 else if (src_devicep == dst_devicep) 3474 ret = src_devicep->dev2dev_func (src_devicep->target_id, 3475 (char *) dst + dst_off, 3476 (const char *) src + src_off, 3477 length); 3478 else 3479 ret = 0; 3480 return ret ? 0 : EINVAL; 3481 } 3482 3483 /* FIXME: it would be nice to have some plugin function to handle 3484 num_dims == 2 and num_dims == 3 more efficiently. Larger ones can 3485 be handled in the generic recursion below, and for host-host it 3486 should be used even for any num_dims >= 2. */ 3487 3488 for (i = 1; i < num_dims; i++) 3489 if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice) 3490 || __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice)) 3491 return EINVAL; 3492 if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off) 3493 || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off)) 3494 return EINVAL; 3495 for (j = 0; j < volume[0]; j++) 3496 { 3497 ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off, 3498 (const char *) src + src_off, 3499 element_size, num_dims - 1, 3500 volume + 1, dst_offsets + 1, 3501 src_offsets + 1, dst_dimensions + 1, 3502 src_dimensions + 1, dst_devicep, 3503 src_devicep); 3504 if (ret) 3505 return ret; 3506 dst_off += dst_slice; 3507 src_off += src_slice; 3508 } 3509 return 0; 3510} 3511 3512int 3513omp_target_memcpy_rect (void *dst, const void *src, size_t element_size, 3514 int num_dims, const size_t *volume, 3515 const size_t *dst_offsets, 3516 const size_t *src_offsets, 3517 const size_t *dst_dimensions, 3518 const size_t *src_dimensions, 3519 int dst_device_num, int src_device_num) 3520{ 3521 struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL; 3522 3523 if (!dst && !src) 3524 return INT_MAX; 3525 3526 if (dst_device_num != gomp_get_num_devices ()) 3527 { 3528 if (dst_device_num < 0) 3529 return EINVAL; 3530 3531 dst_devicep = resolve_device (dst_device_num); 3532 if (dst_devicep == NULL) 3533 return EINVAL; 3534 3535 if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3536 || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3537 dst_devicep = NULL; 3538 } 3539 if (src_device_num != num_devices_openmp) 3540 { 3541 if (src_device_num < 0) 3542 return EINVAL; 3543 3544 src_devicep = resolve_device (src_device_num); 3545 if (src_devicep == NULL) 3546 return EINVAL; 3547 3548 if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3549 || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3550 src_devicep = NULL; 3551 } 3552 3553 if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep) 3554 return EINVAL; 3555 3556 if (src_devicep) 3557 gomp_mutex_lock (&src_devicep->lock); 3558 else if (dst_devicep) 3559 gomp_mutex_lock (&dst_devicep->lock); 3560 int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims, 3561 volume, dst_offsets, src_offsets, 3562 dst_dimensions, src_dimensions, 3563 dst_devicep, src_devicep); 3564 if (src_devicep) 3565 gomp_mutex_unlock (&src_devicep->lock); 3566 else if (dst_devicep) 3567 gomp_mutex_unlock (&dst_devicep->lock); 3568 return ret; 3569} 3570 3571int 3572omp_target_associate_ptr (const void *host_ptr, const void *device_ptr, 3573 size_t size, size_t device_offset, int device_num) 3574{ 3575 if (device_num == gomp_get_num_devices ()) 3576 return EINVAL; 3577 3578 if (device_num < 0) 3579 return EINVAL; 3580 3581 struct gomp_device_descr *devicep = resolve_device (device_num); 3582 if (devicep == NULL) 3583 return EINVAL; 3584 3585 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3586 || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) 3587 return EINVAL; 3588 3589 gomp_mutex_lock (&devicep->lock); 3590 3591 struct splay_tree_s *mem_map = &devicep->mem_map; 3592 struct splay_tree_key_s cur_node; 3593 int ret = EINVAL; 3594 3595 cur_node.host_start = (uintptr_t) host_ptr; 3596 cur_node.host_end = cur_node.host_start + size; 3597 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 3598 if (n) 3599 { 3600 if (n->tgt->tgt_start + n->tgt_offset 3601 == (uintptr_t) device_ptr + device_offset 3602 && n->host_start <= cur_node.host_start 3603 && n->host_end >= cur_node.host_end) 3604 ret = 0; 3605 } 3606 else 3607 { 3608 struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); 3609 tgt->array = gomp_malloc (sizeof (*tgt->array)); 3610 tgt->refcount = 1; 3611 tgt->tgt_start = 0; 3612 tgt->tgt_end = 0; 3613 tgt->to_free = NULL; 3614 tgt->prev = NULL; 3615 tgt->list_count = 0; 3616 tgt->device_descr = devicep; 3617 splay_tree_node array = tgt->array; 3618 splay_tree_key k = &array->key; 3619 k->host_start = cur_node.host_start; 3620 k->host_end = cur_node.host_end; 3621 k->tgt = tgt; 3622 k->tgt_offset = (uintptr_t) device_ptr + device_offset; 3623 k->refcount = REFCOUNT_INFINITY; 3624 k->dynamic_refcount = 0; 3625 k->aux = NULL; 3626 array->left = NULL; 3627 array->right = NULL; 3628 splay_tree_insert (&devicep->mem_map, array); 3629 ret = 0; 3630 } 3631 gomp_mutex_unlock (&devicep->lock); 3632 return ret; 3633} 3634 3635int 3636omp_target_disassociate_ptr (const void *ptr, int device_num) 3637{ 3638 if (device_num == gomp_get_num_devices ()) 3639 return EINVAL; 3640 3641 if (device_num < 0) 3642 return EINVAL; 3643 3644 struct gomp_device_descr *devicep = resolve_device (device_num); 3645 if (devicep == NULL) 3646 return EINVAL; 3647 3648 if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) 3649 return EINVAL; 3650 3651 gomp_mutex_lock (&devicep->lock); 3652 3653 struct splay_tree_s *mem_map = &devicep->mem_map; 3654 struct splay_tree_key_s cur_node; 3655 int ret = EINVAL; 3656 3657 cur_node.host_start = (uintptr_t) ptr; 3658 cur_node.host_end = cur_node.host_start; 3659 splay_tree_key n = gomp_map_lookup (mem_map, &cur_node); 3660 if (n 3661 && n->host_start == cur_node.host_start 3662 && n->refcount == REFCOUNT_INFINITY 3663 && n->tgt->tgt_start == 0 3664 && n->tgt->to_free == NULL 3665 && n->tgt->refcount == 1 3666 && n->tgt->list_count == 0) 3667 { 3668 splay_tree_remove (&devicep->mem_map, n); 3669 gomp_unmap_tgt (n->tgt); 3670 ret = 0; 3671 } 3672 3673 gomp_mutex_unlock (&devicep->lock); 3674 return ret; 3675} 3676 3677int 3678omp_pause_resource (omp_pause_resource_t kind, int device_num) 3679{ 3680 (void) kind; 3681 if (device_num == gomp_get_num_devices ()) 3682 return gomp_pause_host (); 3683 if (device_num < 0 || device_num >= num_devices_openmp) 3684 return -1; 3685 /* Do nothing for target devices for now. */ 3686 return 0; 3687} 3688 3689int 3690omp_pause_resource_all (omp_pause_resource_t kind) 3691{ 3692 (void) kind; 3693 if (gomp_pause_host ()) 3694 return -1; 3695 /* Do nothing for target devices for now. */ 3696 return 0; 3697} 3698 3699ialias (omp_pause_resource) 3700ialias (omp_pause_resource_all) 3701 3702#ifdef PLUGIN_SUPPORT 3703 3704/* This function tries to load a plugin for DEVICE. Name of plugin is passed 3705 in PLUGIN_NAME. 3706 The handles of the found functions are stored in the corresponding fields 3707 of DEVICE. The function returns TRUE on success and FALSE otherwise. */ 3708 3709static bool 3710gomp_load_plugin_for_device (struct gomp_device_descr *device, 3711 const char *plugin_name) 3712{ 3713 const char *err = NULL, *last_missing = NULL; 3714 3715 void *plugin_handle = dlopen (plugin_name, RTLD_LAZY); 3716 if (!plugin_handle) 3717#if OFFLOAD_DEFAULTED 3718 return 0; 3719#else 3720 goto dl_fail; 3721#endif 3722 3723 /* Check if all required functions are available in the plugin and store 3724 their handlers. None of the symbols can legitimately be NULL, 3725 so we don't need to check dlerror all the time. */ 3726#define DLSYM(f) \ 3727 if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f))) \ 3728 goto dl_fail 3729 /* Similar, but missing functions are not an error. Return false if 3730 failed, true otherwise. */ 3731#define DLSYM_OPT(f, n) \ 3732 ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n)) \ 3733 || (last_missing = #n, 0)) 3734 3735 DLSYM (version); 3736 if (device->version_func () != GOMP_VERSION) 3737 { 3738 err = "plugin version mismatch"; 3739 goto fail; 3740 } 3741 3742 DLSYM (get_name); 3743 DLSYM (get_caps); 3744 DLSYM (get_type); 3745 DLSYM (get_num_devices); 3746 DLSYM (init_device); 3747 DLSYM (fini_device); 3748 DLSYM (load_image); 3749 DLSYM (unload_image); 3750 DLSYM (alloc); 3751 DLSYM (free); 3752 DLSYM (dev2host); 3753 DLSYM (host2dev); 3754 device->capabilities = device->get_caps_func (); 3755 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3756 { 3757 DLSYM (run); 3758 DLSYM_OPT (async_run, async_run); 3759 DLSYM_OPT (can_run, can_run); 3760 DLSYM (dev2dev); 3761 } 3762 if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) 3763 { 3764 if (!DLSYM_OPT (openacc.exec, openacc_exec) 3765 || !DLSYM_OPT (openacc.create_thread_data, 3766 openacc_create_thread_data) 3767 || !DLSYM_OPT (openacc.destroy_thread_data, 3768 openacc_destroy_thread_data) 3769 || !DLSYM_OPT (openacc.async.construct, openacc_async_construct) 3770 || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct) 3771 || !DLSYM_OPT (openacc.async.test, openacc_async_test) 3772 || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize) 3773 || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize) 3774 || !DLSYM_OPT (openacc.async.queue_callback, 3775 openacc_async_queue_callback) 3776 || !DLSYM_OPT (openacc.async.exec, openacc_async_exec) 3777 || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host) 3778 || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev) 3779 || !DLSYM_OPT (openacc.get_property, openacc_get_property)) 3780 { 3781 /* Require all the OpenACC handlers if we have 3782 GOMP_OFFLOAD_CAP_OPENACC_200. */ 3783 err = "plugin missing OpenACC handler function"; 3784 goto fail; 3785 } 3786 3787 unsigned cuda = 0; 3788 cuda += DLSYM_OPT (openacc.cuda.get_current_device, 3789 openacc_cuda_get_current_device); 3790 cuda += DLSYM_OPT (openacc.cuda.get_current_context, 3791 openacc_cuda_get_current_context); 3792 cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream); 3793 cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream); 3794 if (cuda && cuda != 4) 3795 { 3796 /* Make sure all the CUDA functions are there if any of them are. */ 3797 err = "plugin missing OpenACC CUDA handler function"; 3798 goto fail; 3799 } 3800 } 3801#undef DLSYM 3802#undef DLSYM_OPT 3803 3804 return 1; 3805 3806 dl_fail: 3807 err = dlerror (); 3808 fail: 3809 gomp_error ("while loading %s: %s", plugin_name, err); 3810 if (last_missing) 3811 gomp_error ("missing function was %s", last_missing); 3812 if (plugin_handle) 3813 dlclose (plugin_handle); 3814 3815 return 0; 3816} 3817 3818/* This function finalizes all initialized devices. */ 3819 3820static void 3821gomp_target_fini (void) 3822{ 3823 int i; 3824 for (i = 0; i < num_devices; i++) 3825 { 3826 bool ret = true; 3827 struct gomp_device_descr *devicep = &devices[i]; 3828 gomp_mutex_lock (&devicep->lock); 3829 if (devicep->state == GOMP_DEVICE_INITIALIZED) 3830 ret = gomp_fini_device (devicep); 3831 gomp_mutex_unlock (&devicep->lock); 3832 if (!ret) 3833 gomp_fatal ("device finalization failed"); 3834 } 3835} 3836 3837/* This function initializes the runtime for offloading. 3838 It parses the list of offload plugins, and tries to load these. 3839 On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP 3840 will be set, and the array DEVICES initialized, containing descriptors for 3841 corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows 3842 by the others. */ 3843 3844static void 3845gomp_target_init (void) 3846{ 3847 const char *prefix ="libgomp-plugin-"; 3848 const char *suffix = SONAME_SUFFIX (1); 3849 const char *cur, *next; 3850 char *plugin_name; 3851 int i, new_num_devs; 3852 int num_devs = 0, num_devs_openmp; 3853 struct gomp_device_descr *devs = NULL; 3854 3855 if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED) 3856 return; 3857 3858 cur = OFFLOAD_PLUGINS; 3859 if (*cur) 3860 do 3861 { 3862 struct gomp_device_descr current_device; 3863 size_t prefix_len, suffix_len, cur_len; 3864 3865 next = strchr (cur, ','); 3866 3867 prefix_len = strlen (prefix); 3868 cur_len = next ? next - cur : strlen (cur); 3869 suffix_len = strlen (suffix); 3870 3871 plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1); 3872 if (!plugin_name) 3873 { 3874 num_devs = 0; 3875 break; 3876 } 3877 3878 memcpy (plugin_name, prefix, prefix_len); 3879 memcpy (plugin_name + prefix_len, cur, cur_len); 3880 memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1); 3881 3882 if (gomp_load_plugin_for_device (¤t_device, plugin_name)) 3883 { 3884 new_num_devs = current_device.get_num_devices_func (); 3885 if (new_num_devs >= 1) 3886 { 3887 /* Augment DEVICES and NUM_DEVICES. */ 3888 3889 devs = realloc (devs, (num_devs + new_num_devs) 3890 * sizeof (struct gomp_device_descr)); 3891 if (!devs) 3892 { 3893 num_devs = 0; 3894 free (plugin_name); 3895 break; 3896 } 3897 3898 current_device.name = current_device.get_name_func (); 3899 /* current_device.capabilities has already been set. */ 3900 current_device.type = current_device.get_type_func (); 3901 current_device.mem_map.root = NULL; 3902 current_device.state = GOMP_DEVICE_UNINITIALIZED; 3903 for (i = 0; i < new_num_devs; i++) 3904 { 3905 current_device.target_id = i; 3906 devs[num_devs] = current_device; 3907 gomp_mutex_init (&devs[num_devs].lock); 3908 num_devs++; 3909 } 3910 } 3911 } 3912 3913 free (plugin_name); 3914 cur = next + 1; 3915 } 3916 while (next); 3917 3918 /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set 3919 NUM_DEVICES_OPENMP. */ 3920 struct gomp_device_descr *devs_s 3921 = malloc (num_devs * sizeof (struct gomp_device_descr)); 3922 if (!devs_s) 3923 { 3924 num_devs = 0; 3925 free (devs); 3926 devs = NULL; 3927 } 3928 num_devs_openmp = 0; 3929 for (i = 0; i < num_devs; i++) 3930 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) 3931 devs_s[num_devs_openmp++] = devs[i]; 3932 int num_devs_after_openmp = num_devs_openmp; 3933 for (i = 0; i < num_devs; i++) 3934 if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) 3935 devs_s[num_devs_after_openmp++] = devs[i]; 3936 free (devs); 3937 devs = devs_s; 3938 3939 for (i = 0; i < num_devs; i++) 3940 { 3941 /* The 'devices' array can be moved (by the realloc call) until we have 3942 found all the plugins, so registering with the OpenACC runtime (which 3943 takes a copy of the pointer argument) must be delayed until now. */ 3944 if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) 3945 goacc_register (&devs[i]); 3946 } 3947 3948 num_devices = num_devs; 3949 num_devices_openmp = num_devs_openmp; 3950 devices = devs; 3951 if (atexit (gomp_target_fini) != 0) 3952 gomp_fatal ("atexit failed"); 3953} 3954 3955#else /* PLUGIN_SUPPORT */ 3956/* If dlfcn.h is unavailable we always fallback to host execution. 3957 GOMP_target* routines are just stubs for this case. */ 3958static void 3959gomp_target_init (void) 3960{ 3961} 3962#endif /* PLUGIN_SUPPORT */ 3963