1/* Copyright (C) 2013-2022 Free Software Foundation, Inc.
2   Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4   This file is part of the GNU Offloading and Multi Processing Library
5   (libgomp).
6
7   Libgomp is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15   more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26/* This file contains the support of offloading.  */
27
28#include "libgomp.h"
29#include "oacc-plugin.h"
30#include "oacc-int.h"
31#include "gomp-constants.h"
32#include <limits.h>
33#include <stdbool.h>
34#include <stdlib.h>
35#ifdef HAVE_INTTYPES_H
36# include <inttypes.h>  /* For PRIu64.  */
37#endif
38#include <string.h>
39#include <assert.h>
40#include <errno.h>
41
42#ifdef PLUGIN_SUPPORT
43#include <dlfcn.h>
44#include "plugin-suffix.h"
45#endif
46
47typedef uintptr_t *hash_entry_type;
48static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
49static inline void htab_free (void *ptr) { free (ptr); }
50#include "hashtab.h"
51
52static inline hashval_t
53htab_hash (hash_entry_type element)
54{
55  return hash_pointer ((void *) element);
56}
57
58static inline bool
59htab_eq (hash_entry_type x, hash_entry_type y)
60{
61  return x == y;
62}
63
64#define FIELD_TGT_EMPTY (~(size_t) 0)
65
66static void gomp_target_init (void);
67
68/* The whole initialization code for offloading plugins is only run one.  */
69static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
70
71/* Mutex for offload image registration.  */
72static gomp_mutex_t register_lock;
73
74/* This structure describes an offload image.
75   It contains type of the target device, pointer to host table descriptor, and
76   pointer to target data.  */
77struct offload_image_descr {
78  unsigned version;
79  enum offload_target_type type;
80  const void *host_table;
81  const void *target_data;
82};
83
84/* Array of descriptors of offload images.  */
85static struct offload_image_descr *offload_images;
86
87/* Total number of offload images.  */
88static int num_offload_images;
89
90/* Array of descriptors for all available devices.  */
91static struct gomp_device_descr *devices;
92
93/* Total number of available devices.  */
94static int num_devices;
95
96/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
97static int num_devices_openmp;
98
99/* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
100
101static void *
102gomp_realloc_unlock (void *old, size_t size)
103{
104  void *ret = realloc (old, size);
105  if (ret == NULL)
106    {
107      gomp_mutex_unlock (&register_lock);
108      gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
109    }
110  return ret;
111}
112
113attribute_hidden void
114gomp_init_targets_once (void)
115{
116  (void) pthread_once (&gomp_is_initialized, gomp_target_init);
117}
118
119attribute_hidden int
120gomp_get_num_devices (void)
121{
122  gomp_init_targets_once ();
123  return num_devices_openmp;
124}
125
126static struct gomp_device_descr *
127resolve_device (int device_id)
128{
129  if (device_id == GOMP_DEVICE_ICV)
130    {
131      struct gomp_task_icv *icv = gomp_icv (false);
132      device_id = icv->default_device_var;
133    }
134
135  if (device_id < 0 || device_id >= gomp_get_num_devices ())
136    {
137      if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
138	  && device_id != GOMP_DEVICE_HOST_FALLBACK
139	  && device_id != num_devices_openmp)
140	gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
141		    "but device not found");
142
143      return NULL;
144    }
145
146  gomp_mutex_lock (&devices[device_id].lock);
147  if (devices[device_id].state == GOMP_DEVICE_UNINITIALIZED)
148    gomp_init_device (&devices[device_id]);
149  else if (devices[device_id].state == GOMP_DEVICE_FINALIZED)
150    {
151      gomp_mutex_unlock (&devices[device_id].lock);
152
153      if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY)
154	gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, "
155		    "but device is finalized");
156
157      return NULL;
158    }
159  gomp_mutex_unlock (&devices[device_id].lock);
160
161  return &devices[device_id];
162}
163
164
165static inline splay_tree_key
166gomp_map_lookup (splay_tree mem_map, splay_tree_key key)
167{
168  if (key->host_start != key->host_end)
169    return splay_tree_lookup (mem_map, key);
170
171  key->host_end++;
172  splay_tree_key n = splay_tree_lookup (mem_map, key);
173  key->host_end--;
174  if (n)
175    return n;
176  key->host_start--;
177  n = splay_tree_lookup (mem_map, key);
178  key->host_start++;
179  if (n)
180    return n;
181  return splay_tree_lookup (mem_map, key);
182}
183
184static inline splay_tree_key
185gomp_map_0len_lookup (splay_tree mem_map, splay_tree_key key)
186{
187  if (key->host_start != key->host_end)
188    return splay_tree_lookup (mem_map, key);
189
190  key->host_end++;
191  splay_tree_key n = splay_tree_lookup (mem_map, key);
192  key->host_end--;
193  return n;
194}
195
196static inline void
197gomp_device_copy (struct gomp_device_descr *devicep,
198		  bool (*copy_func) (int, void *, const void *, size_t),
199		  const char *dst, void *dstaddr,
200		  const char *src, const void *srcaddr,
201		  size_t size)
202{
203  if (!copy_func (devicep->target_id, dstaddr, srcaddr, size))
204    {
205      gomp_mutex_unlock (&devicep->lock);
206      gomp_fatal ("Copying of %s object [%p..%p) to %s object [%p..%p) failed",
207		  src, srcaddr, srcaddr + size, dst, dstaddr, dstaddr + size);
208    }
209}
210
211static inline void
212goacc_device_copy_async (struct gomp_device_descr *devicep,
213			 bool (*copy_func) (int, void *, const void *, size_t,
214					    struct goacc_asyncqueue *),
215			 const char *dst, void *dstaddr,
216			 const char *src, const void *srcaddr,
217			 const void *srcaddr_orig,
218			 size_t size, struct goacc_asyncqueue *aq)
219{
220  if (!copy_func (devicep->target_id, dstaddr, srcaddr, size, aq))
221    {
222      gomp_mutex_unlock (&devicep->lock);
223      if (srcaddr_orig && srcaddr_orig != srcaddr)
224	gomp_fatal ("Copying of %s object [%p..%p)"
225		    " via buffer %s object [%p..%p)"
226		    " to %s object [%p..%p) failed",
227		    src, srcaddr_orig, srcaddr_orig + size,
228		    src, srcaddr, srcaddr + size,
229		    dst, dstaddr, dstaddr + size);
230      else
231	gomp_fatal ("Copying of %s object [%p..%p)"
232		    " to %s object [%p..%p) failed",
233		    src, srcaddr, srcaddr + size,
234		    dst, dstaddr, dstaddr + size);
235    }
236}
237
238/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
239   host to device memory transfers.  */
240
241struct gomp_coalesce_chunk
242{
243  /* The starting and ending point of a coalesced chunk of memory.  */
244  size_t start, end;
245};
246
247struct gomp_coalesce_buf
248{
249  /* Buffer into which gomp_copy_host2dev will memcpy data and from which
250     it will be copied to the device.  */
251  void *buf;
252  struct target_mem_desc *tgt;
253  /* Array with offsets, chunks[i].start is the starting offset and
254     chunks[i].end ending offset relative to tgt->tgt_start device address
255     of chunks which are to be copied to buf and later copied to device.  */
256  struct gomp_coalesce_chunk *chunks;
257  /* Number of chunks in chunks array, or -1 if coalesce buffering should not
258     be performed.  */
259  long chunk_cnt;
260  /* During construction of chunks array, how many memory regions are within
261     the last chunk.  If there is just one memory region for a chunk, we copy
262     it directly to device rather than going through buf.  */
263  long use_cnt;
264};
265
266/* Maximum size of memory region considered for coalescing.  Larger copies
267   are performed directly.  */
268#define MAX_COALESCE_BUF_SIZE	(32 * 1024)
269
270/* Maximum size of a gap in between regions to consider them being copied
271   within the same chunk.  All the device offsets considered are within
272   newly allocated device memory, so it isn't fatal if we copy some padding
273   in between from host to device.  The gaps come either from alignment
274   padding or from memory regions which are not supposed to be copied from
275   host to device (e.g. map(alloc:), map(from:) etc.).  */
276#define MAX_COALESCE_BUF_GAP	(4 * 1024)
277
278/* Add region with device tgt_start relative offset and length to CBUF.
279
280   This must not be used for asynchronous copies, because the host data might
281   not be computed yet (by an earlier asynchronous compute region, for
282   example).
283   TODO ... but we could allow CBUF usage for EPHEMERAL data?  (Open question:
284   is it more performant to use libgomp CBUF buffering or individual device
285   asyncronous copying?)  */
286
287static inline void
288gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
289{
290  if (len > MAX_COALESCE_BUF_SIZE || len == 0)
291    return;
292  if (cbuf->chunk_cnt)
293    {
294      if (cbuf->chunk_cnt < 0)
295	return;
296      if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end)
297	{
298	  cbuf->chunk_cnt = -1;
299	  return;
300	}
301      if (start < cbuf->chunks[cbuf->chunk_cnt - 1].end + MAX_COALESCE_BUF_GAP)
302	{
303	  cbuf->chunks[cbuf->chunk_cnt - 1].end = start + len;
304	  cbuf->use_cnt++;
305	  return;
306	}
307      /* If the last chunk is only used by one mapping, discard it,
308	 as it will be one host to device copy anyway and
309	 memcpying it around will only waste cycles.  */
310      if (cbuf->use_cnt == 1)
311	cbuf->chunk_cnt--;
312    }
313  cbuf->chunks[cbuf->chunk_cnt].start = start;
314  cbuf->chunks[cbuf->chunk_cnt].end = start + len;
315  cbuf->chunk_cnt++;
316  cbuf->use_cnt = 1;
317}
318
319/* Return true for mapping kinds which need to copy data from the
320   host to device for regions that weren't previously mapped.  */
321
322static inline bool
323gomp_to_device_kind_p (int kind)
324{
325  switch (kind)
326    {
327    case GOMP_MAP_ALLOC:
328    case GOMP_MAP_FROM:
329    case GOMP_MAP_FORCE_ALLOC:
330    case GOMP_MAP_FORCE_FROM:
331    case GOMP_MAP_ALWAYS_FROM:
332      return false;
333    default:
334      return true;
335    }
336}
337
338/* Copy host memory to an offload device.  In asynchronous mode (if AQ is
339   non-NULL), when the source data is stack or may otherwise be deallocated
340   before the asynchronous copy takes place, EPHEMERAL must be passed as
341   TRUE.  */
342
343attribute_hidden void
344gomp_copy_host2dev (struct gomp_device_descr *devicep,
345		    struct goacc_asyncqueue *aq,
346		    void *d, const void *h, size_t sz,
347		    bool ephemeral, struct gomp_coalesce_buf *cbuf)
348{
349  if (__builtin_expect (aq != NULL, 0))
350    {
351      /* See 'gomp_coalesce_buf_add'.  */
352      assert (!cbuf);
353
354      void *h_buf = (void *) h;
355      if (ephemeral)
356	{
357	  /* We're queueing up an asynchronous copy from data that may
358	     disappear before the transfer takes place (i.e. because it is a
359	     stack local in a function that is no longer executing).  Make a
360	     copy of the data into a temporary buffer in those cases.  */
361	  h_buf = gomp_malloc (sz);
362	  memcpy (h_buf, h, sz);
363	}
364      goacc_device_copy_async (devicep, devicep->openacc.async.host2dev_func,
365			       "dev", d, "host", h_buf, h, sz, aq);
366      if (ephemeral)
367	/* Free temporary buffer once the transfer has completed.  */
368	devicep->openacc.async.queue_callback_func (aq, free, h_buf);
369
370      return;
371    }
372
373  if (cbuf)
374    {
375      uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
376      if (doff < cbuf->chunks[cbuf->chunk_cnt - 1].end)
377	{
378	  long first = 0;
379	  long last = cbuf->chunk_cnt - 1;
380	  while (first <= last)
381	    {
382	      long middle = (first + last) >> 1;
383	      if (cbuf->chunks[middle].end <= doff)
384		first = middle + 1;
385	      else if (cbuf->chunks[middle].start <= doff)
386		{
387		  if (doff + sz > cbuf->chunks[middle].end)
388		    {
389		      gomp_mutex_unlock (&devicep->lock);
390		      gomp_fatal ("internal libgomp cbuf error");
391		    }
392		  memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
393			  h, sz);
394		  return;
395		}
396	      else
397		last = middle - 1;
398	    }
399	}
400    }
401
402  gomp_device_copy (devicep, devicep->host2dev_func, "dev", d, "host", h, sz);
403}
404
405attribute_hidden void
406gomp_copy_dev2host (struct gomp_device_descr *devicep,
407		    struct goacc_asyncqueue *aq,
408		    void *h, const void *d, size_t sz)
409{
410  if (__builtin_expect (aq != NULL, 0))
411    goacc_device_copy_async (devicep, devicep->openacc.async.dev2host_func,
412			     "host", h, "dev", d, NULL, sz, aq);
413  else
414    gomp_device_copy (devicep, devicep->dev2host_func, "host", h, "dev", d, sz);
415}
416
417static void
418gomp_free_device_memory (struct gomp_device_descr *devicep, void *devptr)
419{
420  if (!devicep->free_func (devicep->target_id, devptr))
421    {
422      gomp_mutex_unlock (&devicep->lock);
423      gomp_fatal ("error in freeing device memory block at %p", devptr);
424    }
425}
426
427/* Increment reference count of a splay_tree_key region K by 1.
428   If REFCOUNT_SET != NULL, use it to track already seen refcounts, and only
429   increment the value if refcount is not yet contained in the set (used for
430   OpenMP 5.0, which specifies that a region's refcount is adjusted at most
431   once for each construct).  */
432
433static inline void
434gomp_increment_refcount (splay_tree_key k, htab_t *refcount_set)
435{
436  if (k == NULL || k->refcount == REFCOUNT_INFINITY)
437    return;
438
439  uintptr_t *refcount_ptr = &k->refcount;
440
441  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
442    refcount_ptr = &k->structelem_refcount;
443  else if (REFCOUNT_STRUCTELEM_P (k->refcount))
444    refcount_ptr = k->structelem_refcount_ptr;
445
446  if (refcount_set)
447    {
448      if (htab_find (*refcount_set, refcount_ptr))
449	return;
450      uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
451      *slot = refcount_ptr;
452    }
453
454  *refcount_ptr += 1;
455  return;
456}
457
458/* Decrement reference count of a splay_tree_key region K by 1, or if DELETE_P
459   is true, set reference count to zero. If REFCOUNT_SET != NULL, use it to
460   track already seen refcounts, and only adjust the value if refcount is not
461   yet contained in the set (like gomp_increment_refcount).
462
463   Return out-values: set *DO_COPY to true if we set the refcount to zero, or
464   it is already zero and we know we decremented it earlier. This signals that
465   associated maps should be copied back to host.
466
467   *DO_REMOVE is set to true when we this is the first handling of this refcount
468   and we are setting it to zero. This signals a removal of this key from the
469   splay-tree map.
470
471   Copy and removal are separated due to cases like handling of structure
472   elements, e.g. each map of a structure element representing a possible copy
473   out of a structure field has to be handled individually, but we only signal
474   removal for one (the first encountered) sibing map.  */
475
476static inline void
477gomp_decrement_refcount (splay_tree_key k, htab_t *refcount_set, bool delete_p,
478			 bool *do_copy, bool *do_remove)
479{
480  if (k == NULL || k->refcount == REFCOUNT_INFINITY)
481    {
482      *do_copy = *do_remove = false;
483      return;
484    }
485
486  uintptr_t *refcount_ptr = &k->refcount;
487
488  if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount))
489    refcount_ptr = &k->structelem_refcount;
490  else if (REFCOUNT_STRUCTELEM_P (k->refcount))
491    refcount_ptr = k->structelem_refcount_ptr;
492
493  bool new_encountered_refcount;
494  bool set_to_zero = false;
495  bool is_zero = false;
496
497  uintptr_t orig_refcount = *refcount_ptr;
498
499  if (refcount_set)
500    {
501      if (htab_find (*refcount_set, refcount_ptr))
502	{
503	  new_encountered_refcount = false;
504	  goto end;
505	}
506
507      uintptr_t **slot = htab_find_slot (refcount_set, refcount_ptr, INSERT);
508      *slot = refcount_ptr;
509      new_encountered_refcount = true;
510    }
511  else
512    /* If no refcount_set being used, assume all keys are being decremented
513       for the first time.  */
514    new_encountered_refcount = true;
515
516  if (delete_p)
517    *refcount_ptr = 0;
518  else if (*refcount_ptr > 0)
519    *refcount_ptr -= 1;
520
521 end:
522  if (*refcount_ptr == 0)
523    {
524      if (orig_refcount > 0)
525	set_to_zero = true;
526
527      is_zero = true;
528    }
529
530  *do_copy = (set_to_zero || (!new_encountered_refcount && is_zero));
531  *do_remove = (new_encountered_refcount && set_to_zero);
532}
533
534/* Handle the case where gomp_map_lookup, splay_tree_lookup or
535   gomp_map_0len_lookup found oldn for newn.
536   Helper function of gomp_map_vars.  */
537
538static inline void
539gomp_map_vars_existing (struct gomp_device_descr *devicep,
540			struct goacc_asyncqueue *aq, splay_tree_key oldn,
541			splay_tree_key newn, struct target_var_desc *tgt_var,
542			unsigned char kind, bool always_to_flag, bool implicit,
543			struct gomp_coalesce_buf *cbuf,
544			htab_t *refcount_set)
545{
546  assert (kind != GOMP_MAP_ATTACH
547	  || kind != GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
548
549  tgt_var->key = oldn;
550  tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind);
551  tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind);
552  tgt_var->is_attach = false;
553  tgt_var->offset = newn->host_start - oldn->host_start;
554
555  /* For implicit maps, old contained in new is valid.  */
556  bool implicit_subset = (implicit
557			  && newn->host_start <= oldn->host_start
558			  && oldn->host_end <= newn->host_end);
559  if (implicit_subset)
560    tgt_var->length = oldn->host_end - oldn->host_start;
561  else
562    tgt_var->length = newn->host_end - newn->host_start;
563
564  if ((kind & GOMP_MAP_FLAG_FORCE)
565      /* For implicit maps, old contained in new is valid.  */
566      || !(implicit_subset
567	   /* Otherwise, new contained inside old is considered valid.  */
568	   || (oldn->host_start <= newn->host_start
569	       && newn->host_end <= oldn->host_end)))
570    {
571      gomp_mutex_unlock (&devicep->lock);
572      gomp_fatal ("Trying to map into device [%p..%p) object when "
573		  "[%p..%p) is already mapped",
574		  (void *) newn->host_start, (void *) newn->host_end,
575		  (void *) oldn->host_start, (void *) oldn->host_end);
576    }
577
578  if (GOMP_MAP_ALWAYS_TO_P (kind) || always_to_flag)
579    {
580      /* Implicit + always should not happen. If this does occur, below
581	 address/length adjustment is a TODO.  */
582      assert (!implicit_subset);
583
584      if (oldn->aux && oldn->aux->attach_count)
585	{
586	  /* We have to be careful not to overwrite still attached pointers
587	     during the copyback to host.  */
588	  uintptr_t addr = newn->host_start;
589	  while (addr < newn->host_end)
590	    {
591	      size_t i = (addr - oldn->host_start) / sizeof (void *);
592	      if (oldn->aux->attach_count[i] == 0)
593		gomp_copy_host2dev (devicep, aq,
594				    (void *) (oldn->tgt->tgt_start
595					      + oldn->tgt_offset
596					      + addr - oldn->host_start),
597				    (void *) addr,
598				    sizeof (void *), false, cbuf);
599	      addr += sizeof (void *);
600	    }
601	}
602      else
603	gomp_copy_host2dev (devicep, aq,
604			    (void *) (oldn->tgt->tgt_start + oldn->tgt_offset
605				      + newn->host_start - oldn->host_start),
606			    (void *) newn->host_start,
607			    newn->host_end - newn->host_start, false, cbuf);
608    }
609
610  gomp_increment_refcount (oldn, refcount_set);
611}
612
613static int
614get_kind (bool short_mapkind, void *kinds, int idx)
615{
616  if (!short_mapkind)
617    return ((unsigned char *) kinds)[idx];
618
619  int val = ((unsigned short *) kinds)[idx];
620  if (GOMP_MAP_IMPLICIT_P (val))
621    val &= ~GOMP_MAP_IMPLICIT;
622  return val;
623}
624
625
626static bool
627get_implicit (bool short_mapkind, void *kinds, int idx)
628{
629  if (!short_mapkind)
630    return false;
631
632  int val = ((unsigned short *) kinds)[idx];
633  return GOMP_MAP_IMPLICIT_P (val);
634}
635
636static void
637gomp_map_pointer (struct target_mem_desc *tgt, struct goacc_asyncqueue *aq,
638		  uintptr_t host_ptr, uintptr_t target_offset, uintptr_t bias,
639		  struct gomp_coalesce_buf *cbuf,
640		  bool allow_zero_length_array_sections)
641{
642  struct gomp_device_descr *devicep = tgt->device_descr;
643  struct splay_tree_s *mem_map = &devicep->mem_map;
644  struct splay_tree_key_s cur_node;
645
646  cur_node.host_start = host_ptr;
647  if (cur_node.host_start == (uintptr_t) NULL)
648    {
649      cur_node.tgt_offset = (uintptr_t) NULL;
650      gomp_copy_host2dev (devicep, aq,
651			  (void *) (tgt->tgt_start + target_offset),
652			  (void *) &cur_node.tgt_offset, sizeof (void *),
653			  true, cbuf);
654      return;
655    }
656  /* Add bias to the pointer value.  */
657  cur_node.host_start += bias;
658  cur_node.host_end = cur_node.host_start;
659  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
660  if (n == NULL)
661    {
662      if (allow_zero_length_array_sections)
663	cur_node.tgt_offset = 0;
664      else
665	{
666	  gomp_mutex_unlock (&devicep->lock);
667	  gomp_fatal ("Pointer target of array section wasn't mapped");
668	}
669    }
670  else
671    {
672      cur_node.host_start -= n->host_start;
673      cur_node.tgt_offset
674	= n->tgt->tgt_start + n->tgt_offset + cur_node.host_start;
675      /* At this point tgt_offset is target address of the
676	 array section.  Now subtract bias to get what we want
677	 to initialize the pointer with.  */
678      cur_node.tgt_offset -= bias;
679    }
680  gomp_copy_host2dev (devicep, aq, (void *) (tgt->tgt_start + target_offset),
681		      (void *) &cur_node.tgt_offset, sizeof (void *),
682		      true, cbuf);
683}
684
685static void
686gomp_map_fields_existing (struct target_mem_desc *tgt,
687			  struct goacc_asyncqueue *aq, splay_tree_key n,
688			  size_t first, size_t i, void **hostaddrs,
689			  size_t *sizes, void *kinds,
690			  struct gomp_coalesce_buf *cbuf, htab_t *refcount_set)
691{
692  struct gomp_device_descr *devicep = tgt->device_descr;
693  struct splay_tree_s *mem_map = &devicep->mem_map;
694  struct splay_tree_key_s cur_node;
695  int kind;
696  bool implicit;
697  const bool short_mapkind = true;
698  const int typemask = short_mapkind ? 0xff : 0x7;
699
700  cur_node.host_start = (uintptr_t) hostaddrs[i];
701  cur_node.host_end = cur_node.host_start + sizes[i];
702  splay_tree_key n2 = splay_tree_lookup (mem_map, &cur_node);
703  kind = get_kind (short_mapkind, kinds, i);
704  implicit = get_implicit (short_mapkind, kinds, i);
705  if (n2
706      && n2->tgt == n->tgt
707      && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
708    {
709      gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
710			      kind & typemask, false, implicit, cbuf,
711			      refcount_set);
712      return;
713    }
714  if (sizes[i] == 0)
715    {
716      if (cur_node.host_start > (uintptr_t) hostaddrs[first - 1])
717	{
718	  cur_node.host_start--;
719	  n2 = splay_tree_lookup (mem_map, &cur_node);
720	  cur_node.host_start++;
721	  if (n2
722	      && n2->tgt == n->tgt
723	      && n2->host_start - n->host_start
724		 == n2->tgt_offset - n->tgt_offset)
725	    {
726	      gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
727				      kind & typemask, false, implicit, cbuf,
728				      refcount_set);
729	      return;
730	    }
731	}
732      cur_node.host_end++;
733      n2 = splay_tree_lookup (mem_map, &cur_node);
734      cur_node.host_end--;
735      if (n2
736	  && n2->tgt == n->tgt
737	  && n2->host_start - n->host_start == n2->tgt_offset - n->tgt_offset)
738	{
739	  gomp_map_vars_existing (devicep, aq, n2, &cur_node, &tgt->list[i],
740				  kind & typemask, false, implicit, cbuf,
741				  refcount_set);
742	  return;
743	}
744    }
745  gomp_mutex_unlock (&devicep->lock);
746  gomp_fatal ("Trying to map into device [%p..%p) structure element when "
747	      "other mapped elements from the same structure weren't mapped "
748	      "together with it", (void *) cur_node.host_start,
749	      (void *) cur_node.host_end);
750}
751
752attribute_hidden void
753gomp_attach_pointer (struct gomp_device_descr *devicep,
754		     struct goacc_asyncqueue *aq, splay_tree mem_map,
755		     splay_tree_key n, uintptr_t attach_to, size_t bias,
756		     struct gomp_coalesce_buf *cbufp,
757		     bool allow_zero_length_array_sections)
758{
759  struct splay_tree_key_s s;
760  size_t size, idx;
761
762  if (n == NULL)
763    {
764      gomp_mutex_unlock (&devicep->lock);
765      gomp_fatal ("enclosing struct not mapped for attach");
766    }
767
768  size = (n->host_end - n->host_start + sizeof (void *) - 1) / sizeof (void *);
769  /* We might have a pointer in a packed struct: however we cannot have more
770     than one such pointer in each pointer-sized portion of the struct, so
771     this is safe.  */
772  idx = (attach_to - n->host_start) / sizeof (void *);
773
774  if (!n->aux)
775    n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
776
777  if (!n->aux->attach_count)
778    n->aux->attach_count
779      = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size);
780
781  if (n->aux->attach_count[idx] < UINTPTR_MAX)
782    n->aux->attach_count[idx]++;
783  else
784    {
785      gomp_mutex_unlock (&devicep->lock);
786      gomp_fatal ("attach count overflow");
787    }
788
789  if (n->aux->attach_count[idx] == 1)
790    {
791      uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to
792			 - n->host_start;
793      uintptr_t target = (uintptr_t) *(void **) attach_to;
794      splay_tree_key tn;
795      uintptr_t data;
796
797      if ((void *) target == NULL)
798	{
799	  gomp_mutex_unlock (&devicep->lock);
800	  gomp_fatal ("attempt to attach null pointer");
801	}
802
803      s.host_start = target + bias;
804      s.host_end = s.host_start + 1;
805      tn = splay_tree_lookup (mem_map, &s);
806
807      if (!tn)
808	{
809	  if (allow_zero_length_array_sections)
810	    /* When allowing attachment to zero-length array sections, we
811	       allow attaching to NULL pointers when the target region is not
812	       mapped.  */
813	    data = 0;
814	  else
815	    {
816	      gomp_mutex_unlock (&devicep->lock);
817	      gomp_fatal ("pointer target not mapped for attach");
818	    }
819	}
820      else
821	data = tn->tgt->tgt_start + tn->tgt_offset + target - tn->host_start;
822
823      gomp_debug (1,
824		  "%s: attaching host %p, target %p (struct base %p) to %p\n",
825		  __FUNCTION__, (void *) attach_to, (void *) devptr,
826		  (void *) (n->tgt->tgt_start + n->tgt_offset), (void *) data);
827
828      gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &data,
829			  sizeof (void *), true, cbufp);
830    }
831  else
832    gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
833		(void *) attach_to, (int) n->aux->attach_count[idx]);
834}
835
836attribute_hidden void
837gomp_detach_pointer (struct gomp_device_descr *devicep,
838		     struct goacc_asyncqueue *aq, splay_tree_key n,
839		     uintptr_t detach_from, bool finalize,
840		     struct gomp_coalesce_buf *cbufp)
841{
842  size_t idx;
843
844  if (n == NULL)
845    {
846      gomp_mutex_unlock (&devicep->lock);
847      gomp_fatal ("enclosing struct not mapped for detach");
848    }
849
850  idx = (detach_from - n->host_start) / sizeof (void *);
851
852  if (!n->aux || !n->aux->attach_count)
853    {
854      gomp_mutex_unlock (&devicep->lock);
855      gomp_fatal ("no attachment counters for struct");
856    }
857
858  if (finalize)
859    n->aux->attach_count[idx] = 1;
860
861  if (n->aux->attach_count[idx] == 0)
862    {
863      gomp_mutex_unlock (&devicep->lock);
864      gomp_fatal ("attach count underflow");
865    }
866  else
867    n->aux->attach_count[idx]--;
868
869  if (n->aux->attach_count[idx] == 0)
870    {
871      uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + detach_from
872			 - n->host_start;
873      uintptr_t target = (uintptr_t) *(void **) detach_from;
874
875      gomp_debug (1,
876		  "%s: detaching host %p, target %p (struct base %p) to %p\n",
877		  __FUNCTION__, (void *) detach_from, (void *) devptr,
878		  (void *) (n->tgt->tgt_start + n->tgt_offset),
879		  (void *) target);
880
881      gomp_copy_host2dev (devicep, aq, (void *) devptr, (void *) &target,
882			  sizeof (void *), true, cbufp);
883    }
884  else
885    gomp_debug (1, "%s: attach count for %p -> %u\n", __FUNCTION__,
886		(void *) detach_from, (int) n->aux->attach_count[idx]);
887}
888
889attribute_hidden uintptr_t
890gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
891{
892  if (tgt->list[i].key != NULL)
893    return tgt->list[i].key->tgt->tgt_start
894	   + tgt->list[i].key->tgt_offset
895	   + tgt->list[i].offset;
896
897  switch (tgt->list[i].offset)
898    {
899    case OFFSET_INLINED:
900      return (uintptr_t) hostaddrs[i];
901
902    case OFFSET_POINTER:
903      return 0;
904
905    case OFFSET_STRUCT:
906      return tgt->list[i + 1].key->tgt->tgt_start
907	     + tgt->list[i + 1].key->tgt_offset
908	     + tgt->list[i + 1].offset
909	     + (uintptr_t) hostaddrs[i]
910	     - (uintptr_t) hostaddrs[i + 1];
911
912    default:
913      return tgt->tgt_start + tgt->list[i].offset;
914    }
915}
916
917static inline __attribute__((always_inline)) struct target_mem_desc *
918gomp_map_vars_internal (struct gomp_device_descr *devicep,
919			struct goacc_asyncqueue *aq, size_t mapnum,
920			void **hostaddrs, void **devaddrs, size_t *sizes,
921			void *kinds, bool short_mapkind,
922			htab_t *refcount_set,
923			enum gomp_map_vars_kind pragma_kind)
924{
925  size_t i, tgt_align, tgt_size, not_found_cnt = 0;
926  bool has_firstprivate = false;
927  bool has_always_ptrset = false;
928  bool openmp_p = (pragma_kind & GOMP_MAP_VARS_OPENACC) == 0;
929  const int rshift = short_mapkind ? 8 : 3;
930  const int typemask = short_mapkind ? 0xff : 0x7;
931  struct splay_tree_s *mem_map = &devicep->mem_map;
932  struct splay_tree_key_s cur_node;
933  struct target_mem_desc *tgt
934    = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
935  tgt->list_count = mapnum;
936  tgt->refcount = (pragma_kind & GOMP_MAP_VARS_ENTER_DATA) ? 0 : 1;
937  tgt->device_descr = devicep;
938  tgt->prev = NULL;
939  struct gomp_coalesce_buf cbuf, *cbufp = NULL;
940
941  if (mapnum == 0)
942    {
943      tgt->tgt_start = 0;
944      tgt->tgt_end = 0;
945      return tgt;
946    }
947
948  tgt_align = sizeof (void *);
949  tgt_size = 0;
950  cbuf.chunks = NULL;
951  cbuf.chunk_cnt = -1;
952  cbuf.use_cnt = 0;
953  cbuf.buf = NULL;
954  if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
955    {
956      size_t chunks_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
957      cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunks_size);
958      cbuf.chunk_cnt = 0;
959    }
960  if (pragma_kind == GOMP_MAP_VARS_TARGET)
961    {
962      size_t align = 4 * sizeof (void *);
963      tgt_align = align;
964      tgt_size = mapnum * sizeof (void *);
965      cbuf.chunk_cnt = 1;
966      cbuf.use_cnt = 1 + (mapnum > 1);
967      cbuf.chunks[0].start = 0;
968      cbuf.chunks[0].end = tgt_size;
969    }
970
971  gomp_mutex_lock (&devicep->lock);
972  if (devicep->state == GOMP_DEVICE_FINALIZED)
973    {
974      gomp_mutex_unlock (&devicep->lock);
975      free (tgt);
976      return NULL;
977    }
978
979  for (i = 0; i < mapnum; i++)
980    {
981      int kind = get_kind (short_mapkind, kinds, i);
982      bool implicit = get_implicit (short_mapkind, kinds, i);
983      if (hostaddrs[i] == NULL
984	  || (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
985	{
986	  tgt->list[i].key = NULL;
987	  tgt->list[i].offset = OFFSET_INLINED;
988	  continue;
989	}
990      else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR
991	       || (kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
992	{
993	  tgt->list[i].key = NULL;
994	  if (!not_found_cnt)
995	    {
996	      /* In OpenMP < 5.0 and OpenACC the mapping has to be done
997		 on a separate construct prior to using use_device_{addr,ptr}.
998		 In OpenMP 5.0, map directives need to be ordered by the
999		 middle-end before the use_device_* clauses.  If
1000		 !not_found_cnt, all mappings requested (if any) are already
1001		 mapped, so use_device_{addr,ptr} can be resolved right away.
1002		 Otherwise, if not_found_cnt, gomp_map_lookup might fail
1003		 now but would succeed after performing the mappings in the
1004		 following loop.  We can't defer this always to the second
1005		 loop, because it is not even invoked when !not_found_cnt
1006		 after the first loop.  */
1007	      cur_node.host_start = (uintptr_t) hostaddrs[i];
1008	      cur_node.host_end = cur_node.host_start;
1009	      splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
1010	      if (n != NULL)
1011		{
1012		  cur_node.host_start -= n->host_start;
1013		  hostaddrs[i]
1014		    = (void *) (n->tgt->tgt_start + n->tgt_offset
1015				+ cur_node.host_start);
1016		}
1017	      else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1018		{
1019		  gomp_mutex_unlock (&devicep->lock);
1020		  gomp_fatal ("use_device_ptr pointer wasn't mapped");
1021		}
1022	      else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1023		/* If not present, continue using the host address.  */
1024		;
1025	      else
1026		__builtin_unreachable ();
1027	      tgt->list[i].offset = OFFSET_INLINED;
1028	    }
1029	  else
1030	    tgt->list[i].offset = 0;
1031	  continue;
1032	}
1033      else if ((kind & typemask) == GOMP_MAP_STRUCT)
1034	{
1035	  size_t first = i + 1;
1036	  size_t last = i + sizes[i];
1037	  cur_node.host_start = (uintptr_t) hostaddrs[i];
1038	  cur_node.host_end = (uintptr_t) hostaddrs[last]
1039			      + sizes[last];
1040	  tgt->list[i].key = NULL;
1041	  tgt->list[i].offset = OFFSET_STRUCT;
1042	  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1043	  if (n == NULL)
1044	    {
1045	      size_t align = (size_t) 1 << (kind >> rshift);
1046	      if (tgt_align < align)
1047		tgt_align = align;
1048	      tgt_size -= (uintptr_t) hostaddrs[first] - cur_node.host_start;
1049	      tgt_size = (tgt_size + align - 1) & ~(align - 1);
1050	      tgt_size += cur_node.host_end - cur_node.host_start;
1051	      not_found_cnt += last - i;
1052	      for (i = first; i <= last; i++)
1053		{
1054		  tgt->list[i].key = NULL;
1055		  if (!aq
1056		      && gomp_to_device_kind_p (get_kind (short_mapkind, kinds, i)
1057						& typemask))
1058		    gomp_coalesce_buf_add (&cbuf,
1059					   tgt_size - cur_node.host_end
1060					   + (uintptr_t) hostaddrs[i],
1061					   sizes[i]);
1062		}
1063	      i--;
1064	      continue;
1065	    }
1066	  for (i = first; i <= last; i++)
1067	    gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1068				      sizes, kinds, NULL, refcount_set);
1069	  i--;
1070	  continue;
1071	}
1072      else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
1073	{
1074	  tgt->list[i].key = NULL;
1075	  tgt->list[i].offset = OFFSET_POINTER;
1076	  has_firstprivate = true;
1077	  continue;
1078	}
1079      else if ((kind & typemask) == GOMP_MAP_ATTACH
1080	       || ((kind & typemask)
1081		   == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION))
1082	{
1083	  tgt->list[i].key = NULL;
1084	  has_firstprivate = true;
1085	  continue;
1086	}
1087      cur_node.host_start = (uintptr_t) hostaddrs[i];
1088      if (!GOMP_MAP_POINTER_P (kind & typemask))
1089	cur_node.host_end = cur_node.host_start + sizes[i];
1090      else
1091	cur_node.host_end = cur_node.host_start + sizeof (void *);
1092      if ((kind & typemask) == GOMP_MAP_FIRSTPRIVATE)
1093	{
1094	  tgt->list[i].key = NULL;
1095
1096	  size_t align = (size_t) 1 << (kind >> rshift);
1097	  if (tgt_align < align)
1098	    tgt_align = align;
1099	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
1100	  if (!aq)
1101	    gomp_coalesce_buf_add (&cbuf, tgt_size,
1102				   cur_node.host_end - cur_node.host_start);
1103	  tgt_size += cur_node.host_end - cur_node.host_start;
1104	  has_firstprivate = true;
1105	  continue;
1106	}
1107      splay_tree_key n;
1108      if ((kind & typemask) == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
1109	{
1110	  n = gomp_map_0len_lookup (mem_map, &cur_node);
1111	  if (!n)
1112	    {
1113	      tgt->list[i].key = NULL;
1114	      tgt->list[i].offset = OFFSET_POINTER;
1115	      continue;
1116	    }
1117	}
1118      else
1119	n = splay_tree_lookup (mem_map, &cur_node);
1120      if (n && n->refcount != REFCOUNT_LINK)
1121	{
1122	  int always_to_cnt = 0;
1123	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
1124	    {
1125	      bool has_nullptr = false;
1126	      size_t j;
1127	      for (j = 0; j < n->tgt->list_count; j++)
1128		if (n->tgt->list[j].key == n)
1129		  {
1130		    has_nullptr = n->tgt->list[j].has_null_ptr_assoc;
1131		    break;
1132		  }
1133	      if (n->tgt->list_count == 0)
1134		{
1135		  /* 'declare target'; assume has_nullptr; it could also be
1136		     statically assigned pointer, but that it should be to
1137		     the equivalent variable on the host.  */
1138		  assert (n->refcount == REFCOUNT_INFINITY);
1139		  has_nullptr = true;
1140		}
1141	      else
1142		assert (j < n->tgt->list_count);
1143	      /* Re-map the data if there is an 'always' modifier or if it a
1144		 null pointer was there and non a nonnull has been found; that
1145		 permits transparent re-mapping for Fortran array descriptors
1146		 which were previously mapped unallocated.  */
1147	      for (j = i + 1; j < mapnum; j++)
1148		{
1149		  int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1150		  if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1151		      && (!has_nullptr
1152			  || !GOMP_MAP_POINTER_P (ptr_kind)
1153			  || *(void **) hostaddrs[j] == NULL))
1154		    break;
1155		  else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1156			   || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1157			       > cur_node.host_end))
1158		    break;
1159		  else
1160		    {
1161		      has_always_ptrset = true;
1162		      ++always_to_cnt;
1163		    }
1164		}
1165	    }
1166	  gomp_map_vars_existing (devicep, aq, n, &cur_node, &tgt->list[i],
1167				  kind & typemask, always_to_cnt > 0, implicit,
1168				  NULL, refcount_set);
1169	  i += always_to_cnt;
1170	}
1171      else
1172	{
1173	  tgt->list[i].key = NULL;
1174
1175	  if ((kind & typemask) == GOMP_MAP_IF_PRESENT)
1176	    {
1177	      /* Not present, hence, skip entry - including its MAP_POINTER,
1178		 when existing.  */
1179	      tgt->list[i].offset = OFFSET_POINTER;
1180	      if (i + 1 < mapnum
1181		  && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1182		      == GOMP_MAP_POINTER))
1183		{
1184		  ++i;
1185		  tgt->list[i].key = NULL;
1186		  tgt->list[i].offset = 0;
1187		}
1188	      continue;
1189	    }
1190	  size_t align = (size_t) 1 << (kind >> rshift);
1191	  not_found_cnt++;
1192	  if (tgt_align < align)
1193	    tgt_align = align;
1194	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
1195	  if (!aq
1196	      && gomp_to_device_kind_p (kind & typemask))
1197	    gomp_coalesce_buf_add (&cbuf, tgt_size,
1198				   cur_node.host_end - cur_node.host_start);
1199	  tgt_size += cur_node.host_end - cur_node.host_start;
1200	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
1201	    {
1202	      size_t j;
1203	      int kind;
1204	      for (j = i + 1; j < mapnum; j++)
1205		if (!GOMP_MAP_POINTER_P ((kind = (get_kind (short_mapkind,
1206						  kinds, j)) & typemask))
1207		    && !GOMP_MAP_ALWAYS_POINTER_P (kind))
1208		  break;
1209		else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
1210			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1211			     > cur_node.host_end))
1212		  break;
1213		else
1214		  {
1215		    tgt->list[j].key = NULL;
1216		    i++;
1217		  }
1218	    }
1219	}
1220    }
1221
1222  if (devaddrs)
1223    {
1224      if (mapnum != 1)
1225	{
1226	  gomp_mutex_unlock (&devicep->lock);
1227	  gomp_fatal ("unexpected aggregation");
1228	}
1229      tgt->to_free = devaddrs[0];
1230      tgt->tgt_start = (uintptr_t) tgt->to_free;
1231      tgt->tgt_end = tgt->tgt_start + sizes[0];
1232    }
1233  else if (not_found_cnt || pragma_kind == GOMP_MAP_VARS_TARGET)
1234    {
1235      /* Allocate tgt_align aligned tgt_size block of memory.  */
1236      /* FIXME: Perhaps change interface to allocate properly aligned
1237	 memory.  */
1238      tgt->to_free = devicep->alloc_func (devicep->target_id,
1239					  tgt_size + tgt_align - 1);
1240      if (!tgt->to_free)
1241	{
1242	  gomp_mutex_unlock (&devicep->lock);
1243	  gomp_fatal ("device memory allocation fail");
1244	}
1245
1246      tgt->tgt_start = (uintptr_t) tgt->to_free;
1247      tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
1248      tgt->tgt_end = tgt->tgt_start + tgt_size;
1249
1250      if (cbuf.use_cnt == 1)
1251	cbuf.chunk_cnt--;
1252      if (cbuf.chunk_cnt > 0)
1253	{
1254	  cbuf.buf
1255	    = malloc (cbuf.chunks[cbuf.chunk_cnt - 1].end - cbuf.chunks[0].start);
1256	  if (cbuf.buf)
1257	    {
1258	      cbuf.tgt = tgt;
1259	      cbufp = &cbuf;
1260	    }
1261	}
1262    }
1263  else
1264    {
1265      tgt->to_free = NULL;
1266      tgt->tgt_start = 0;
1267      tgt->tgt_end = 0;
1268    }
1269
1270  tgt_size = 0;
1271  if (pragma_kind == GOMP_MAP_VARS_TARGET)
1272    tgt_size = mapnum * sizeof (void *);
1273
1274  tgt->array = NULL;
1275  if (not_found_cnt || has_firstprivate || has_always_ptrset)
1276    {
1277      if (not_found_cnt)
1278	tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
1279      splay_tree_node array = tgt->array;
1280      size_t j, field_tgt_offset = 0, field_tgt_clear = FIELD_TGT_EMPTY;
1281      uintptr_t field_tgt_base = 0;
1282      splay_tree_key field_tgt_structelem_first = NULL;
1283
1284      for (i = 0; i < mapnum; i++)
1285	if (has_always_ptrset
1286	    && tgt->list[i].key
1287	    && (get_kind (short_mapkind, kinds, i) & typemask)
1288	       == GOMP_MAP_TO_PSET)
1289	  {
1290	    splay_tree_key k = tgt->list[i].key;
1291	    bool has_nullptr = false;
1292	    size_t j;
1293	    for (j = 0; j < k->tgt->list_count; j++)
1294	      if (k->tgt->list[j].key == k)
1295		{
1296		  has_nullptr = k->tgt->list[j].has_null_ptr_assoc;
1297		  break;
1298		}
1299	    if (k->tgt->list_count == 0)
1300	      has_nullptr = true;
1301	    else
1302	      assert (j < k->tgt->list_count);
1303
1304	    tgt->list[i].has_null_ptr_assoc = false;
1305	    for (j = i + 1; j < mapnum; j++)
1306	      {
1307		int ptr_kind = get_kind (short_mapkind, kinds, j) & typemask;
1308		if (!GOMP_MAP_ALWAYS_POINTER_P (ptr_kind)
1309		    && (!has_nullptr
1310			|| !GOMP_MAP_POINTER_P (ptr_kind)
1311			|| *(void **) hostaddrs[j] == NULL))
1312		  break;
1313		else if ((uintptr_t) hostaddrs[j] < k->host_start
1314			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1315			     > k->host_end))
1316		  break;
1317		else
1318		  {
1319		    if (*(void **) hostaddrs[j] == NULL)
1320		      tgt->list[i].has_null_ptr_assoc = true;
1321		    tgt->list[j].key = k;
1322		    tgt->list[j].copy_from = false;
1323		    tgt->list[j].always_copy_from = false;
1324		    tgt->list[j].is_attach = false;
1325		    gomp_increment_refcount (k, refcount_set);
1326		    gomp_map_pointer (k->tgt, aq,
1327				      (uintptr_t) *(void **) hostaddrs[j],
1328				      k->tgt_offset + ((uintptr_t) hostaddrs[j]
1329						       - k->host_start),
1330				      sizes[j], cbufp, false);
1331		  }
1332	      }
1333	    i = j - 1;
1334	  }
1335	else if (tgt->list[i].key == NULL)
1336	  {
1337	    int kind = get_kind (short_mapkind, kinds, i);
1338	    bool implicit = get_implicit (short_mapkind, kinds, i);
1339	    if (hostaddrs[i] == NULL)
1340	      continue;
1341	    switch (kind & typemask)
1342	      {
1343		size_t align, len, first, last;
1344		splay_tree_key n;
1345	      case GOMP_MAP_FIRSTPRIVATE:
1346		align = (size_t) 1 << (kind >> rshift);
1347		tgt_size = (tgt_size + align - 1) & ~(align - 1);
1348		tgt->list[i].offset = tgt_size;
1349		len = sizes[i];
1350		gomp_copy_host2dev (devicep, aq,
1351				    (void *) (tgt->tgt_start + tgt_size),
1352				    (void *) hostaddrs[i], len, false, cbufp);
1353		tgt_size += len;
1354		continue;
1355	      case GOMP_MAP_FIRSTPRIVATE_INT:
1356	      case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
1357		continue;
1358	      case GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT:
1359		/* The OpenACC 'host_data' construct only allows 'use_device'
1360		   "mapping" clauses, so in the first loop, 'not_found_cnt'
1361		   must always have been zero, so all OpenACC 'use_device'
1362		   clauses have already been handled.  (We can only easily test
1363		   'use_device' with 'if_present' clause here.)  */
1364		assert (tgt->list[i].offset == OFFSET_INLINED);
1365		/* Nevertheless, FALLTHRU to the normal handling, to keep the
1366		   code conceptually simple, similar to the first loop.  */
1367	      case GOMP_MAP_USE_DEVICE_PTR:
1368		if (tgt->list[i].offset == 0)
1369		  {
1370		    cur_node.host_start = (uintptr_t) hostaddrs[i];
1371		    cur_node.host_end = cur_node.host_start;
1372		    n = gomp_map_lookup (mem_map, &cur_node);
1373		    if (n != NULL)
1374		      {
1375			cur_node.host_start -= n->host_start;
1376			hostaddrs[i]
1377			  = (void *) (n->tgt->tgt_start + n->tgt_offset
1378				      + cur_node.host_start);
1379		      }
1380		    else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
1381		      {
1382			gomp_mutex_unlock (&devicep->lock);
1383			gomp_fatal ("use_device_ptr pointer wasn't mapped");
1384		      }
1385		    else if ((kind & typemask)
1386			     == GOMP_MAP_USE_DEVICE_PTR_IF_PRESENT)
1387		      /* If not present, continue using the host address.  */
1388		      ;
1389		    else
1390		      __builtin_unreachable ();
1391		    tgt->list[i].offset = OFFSET_INLINED;
1392		  }
1393		continue;
1394	      case GOMP_MAP_STRUCT:
1395		first = i + 1;
1396		last = i + sizes[i];
1397		cur_node.host_start = (uintptr_t) hostaddrs[i];
1398		cur_node.host_end = (uintptr_t) hostaddrs[last]
1399				    + sizes[last];
1400		if (tgt->list[first].key != NULL)
1401		  continue;
1402		n = splay_tree_lookup (mem_map, &cur_node);
1403		if (n == NULL)
1404		  {
1405		    size_t align = (size_t) 1 << (kind >> rshift);
1406		    tgt_size -= (uintptr_t) hostaddrs[first]
1407				- (uintptr_t) hostaddrs[i];
1408		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
1409		    tgt_size += (uintptr_t) hostaddrs[first]
1410				- (uintptr_t) hostaddrs[i];
1411		    field_tgt_base = (uintptr_t) hostaddrs[first];
1412		    field_tgt_offset = tgt_size;
1413		    field_tgt_clear = last;
1414		    field_tgt_structelem_first = NULL;
1415		    tgt_size += cur_node.host_end
1416				- (uintptr_t) hostaddrs[first];
1417		    continue;
1418		  }
1419		for (i = first; i <= last; i++)
1420		  gomp_map_fields_existing (tgt, aq, n, first, i, hostaddrs,
1421					    sizes, kinds, cbufp, refcount_set);
1422		i--;
1423		continue;
1424	      case GOMP_MAP_ALWAYS_POINTER:
1425		cur_node.host_start = (uintptr_t) hostaddrs[i];
1426		cur_node.host_end = cur_node.host_start + sizeof (void *);
1427		n = splay_tree_lookup (mem_map, &cur_node);
1428		if (n == NULL
1429		    || n->host_start > cur_node.host_start
1430		    || n->host_end < cur_node.host_end)
1431		  {
1432		    gomp_mutex_unlock (&devicep->lock);
1433		    gomp_fatal ("always pointer not mapped");
1434		  }
1435		if ((get_kind (short_mapkind, kinds, i - 1) & typemask)
1436		    != GOMP_MAP_ALWAYS_POINTER)
1437		  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i - 1);
1438		if (cur_node.tgt_offset)
1439		  cur_node.tgt_offset -= sizes[i];
1440		gomp_copy_host2dev (devicep, aq,
1441				    (void *) (n->tgt->tgt_start
1442					      + n->tgt_offset
1443					      + cur_node.host_start
1444					      - n->host_start),
1445				    (void *) &cur_node.tgt_offset,
1446				    sizeof (void *), true, cbufp);
1447		cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
1448				      + cur_node.host_start - n->host_start;
1449		continue;
1450	      case GOMP_MAP_IF_PRESENT:
1451		/* Not present - otherwise handled above. Skip over its
1452		   MAP_POINTER as well.  */
1453		if (i + 1 < mapnum
1454		    && ((typemask & get_kind (short_mapkind, kinds, i + 1))
1455			== GOMP_MAP_POINTER))
1456		  ++i;
1457		continue;
1458	      case GOMP_MAP_ATTACH:
1459	      case GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION:
1460		{
1461		  cur_node.host_start = (uintptr_t) hostaddrs[i];
1462		  cur_node.host_end = cur_node.host_start + sizeof (void *);
1463		  splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
1464		  if (n != NULL)
1465		    {
1466		      tgt->list[i].key = n;
1467		      tgt->list[i].offset = cur_node.host_start - n->host_start;
1468		      tgt->list[i].length = n->host_end - n->host_start;
1469		      tgt->list[i].copy_from = false;
1470		      tgt->list[i].always_copy_from = false;
1471		      tgt->list[i].is_attach = true;
1472		      /* OpenACC 'attach'/'detach' doesn't affect
1473			 structured/dynamic reference counts ('n->refcount',
1474			 'n->dynamic_refcount').  */
1475
1476		      bool zlas
1477			= ((kind & typemask)
1478			   == GOMP_MAP_ATTACH_ZERO_LENGTH_ARRAY_SECTION);
1479		      gomp_attach_pointer (devicep, aq, mem_map, n,
1480					   (uintptr_t) hostaddrs[i], sizes[i],
1481					   cbufp, zlas);
1482		    }
1483		  else if ((pragma_kind & GOMP_MAP_VARS_OPENACC) != 0)
1484		    {
1485		      gomp_mutex_unlock (&devicep->lock);
1486		      gomp_fatal ("outer struct not mapped for attach");
1487		    }
1488		  continue;
1489		}
1490	      default:
1491		break;
1492	      }
1493	    splay_tree_key k = &array->key;
1494	    k->host_start = (uintptr_t) hostaddrs[i];
1495	    if (!GOMP_MAP_POINTER_P (kind & typemask))
1496	      k->host_end = k->host_start + sizes[i];
1497	    else
1498	      k->host_end = k->host_start + sizeof (void *);
1499	    splay_tree_key n = splay_tree_lookup (mem_map, k);
1500	    if (n && n->refcount != REFCOUNT_LINK)
1501	      gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
1502				      kind & typemask, false, implicit, cbufp,
1503				      refcount_set);
1504	    else
1505	      {
1506		k->aux = NULL;
1507		if (n && n->refcount == REFCOUNT_LINK)
1508		  {
1509		    /* Replace target address of the pointer with target address
1510		       of mapped object in the splay tree.  */
1511		    splay_tree_remove (mem_map, n);
1512		    k->aux
1513		      = gomp_malloc_cleared (sizeof (struct splay_tree_aux));
1514		    k->aux->link_key = n;
1515		  }
1516		size_t align = (size_t) 1 << (kind >> rshift);
1517		tgt->list[i].key = k;
1518		k->tgt = tgt;
1519		k->refcount = 0;
1520		k->dynamic_refcount = 0;
1521		if (field_tgt_clear != FIELD_TGT_EMPTY)
1522		  {
1523		    k->tgt_offset = k->host_start - field_tgt_base
1524				    + field_tgt_offset;
1525		    if (openmp_p)
1526		      {
1527			k->refcount = REFCOUNT_STRUCTELEM;
1528			if (field_tgt_structelem_first == NULL)
1529			  {
1530			    /* Set to first structure element of sequence.  */
1531			    k->refcount |= REFCOUNT_STRUCTELEM_FLAG_FIRST;
1532			    field_tgt_structelem_first = k;
1533			  }
1534			else
1535			  /* Point to refcount of leading element, but do not
1536			     increment again.  */
1537			  k->structelem_refcount_ptr
1538			    = &field_tgt_structelem_first->structelem_refcount;
1539
1540			if (i == field_tgt_clear)
1541			  {
1542			    k->refcount |= REFCOUNT_STRUCTELEM_FLAG_LAST;
1543			    field_tgt_structelem_first = NULL;
1544			  }
1545		      }
1546		    if (i == field_tgt_clear)
1547		      field_tgt_clear = FIELD_TGT_EMPTY;
1548		  }
1549		else
1550		  {
1551		    tgt_size = (tgt_size + align - 1) & ~(align - 1);
1552		    k->tgt_offset = tgt_size;
1553		    tgt_size += k->host_end - k->host_start;
1554		  }
1555		/* First increment, from 0 to 1. gomp_increment_refcount
1556		   encapsulates the different increment cases, so use this
1557		   instead of directly setting 1 during initialization.  */
1558		gomp_increment_refcount (k, refcount_set);
1559
1560		tgt->list[i].copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
1561		tgt->list[i].always_copy_from
1562		  = GOMP_MAP_ALWAYS_FROM_P (kind & typemask);
1563		tgt->list[i].is_attach = false;
1564		tgt->list[i].offset = 0;
1565		tgt->list[i].length = k->host_end - k->host_start;
1566		tgt->refcount++;
1567		array->left = NULL;
1568		array->right = NULL;
1569		splay_tree_insert (mem_map, array);
1570		switch (kind & typemask)
1571		  {
1572		  case GOMP_MAP_ALLOC:
1573		  case GOMP_MAP_FROM:
1574		  case GOMP_MAP_FORCE_ALLOC:
1575		  case GOMP_MAP_FORCE_FROM:
1576		  case GOMP_MAP_ALWAYS_FROM:
1577		    break;
1578		  case GOMP_MAP_TO:
1579		  case GOMP_MAP_TOFROM:
1580		  case GOMP_MAP_FORCE_TO:
1581		  case GOMP_MAP_FORCE_TOFROM:
1582		  case GOMP_MAP_ALWAYS_TO:
1583		  case GOMP_MAP_ALWAYS_TOFROM:
1584		    gomp_copy_host2dev (devicep, aq,
1585					(void *) (tgt->tgt_start
1586						  + k->tgt_offset),
1587					(void *) k->host_start,
1588					k->host_end - k->host_start,
1589					false, cbufp);
1590		    break;
1591		  case GOMP_MAP_POINTER:
1592		  case GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION:
1593		    gomp_map_pointer
1594		      (tgt, aq, (uintptr_t) *(void **) k->host_start,
1595		       k->tgt_offset, sizes[i], cbufp,
1596		       ((kind & typemask)
1597			== GOMP_MAP_POINTER_TO_ZERO_LENGTH_ARRAY_SECTION));
1598		    break;
1599		  case GOMP_MAP_TO_PSET:
1600		    gomp_copy_host2dev (devicep, aq,
1601					(void *) (tgt->tgt_start
1602						  + k->tgt_offset),
1603					(void *) k->host_start,
1604					k->host_end - k->host_start,
1605					false, cbufp);
1606		    tgt->list[i].has_null_ptr_assoc = false;
1607
1608		    for (j = i + 1; j < mapnum; j++)
1609		      {
1610			int ptr_kind = (get_kind (short_mapkind, kinds, j)
1611					& typemask);
1612			if (!GOMP_MAP_POINTER_P (ptr_kind)
1613			    && !GOMP_MAP_ALWAYS_POINTER_P (ptr_kind))
1614			  break;
1615			else if ((uintptr_t) hostaddrs[j] < k->host_start
1616				 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
1617				     > k->host_end))
1618			  break;
1619			else
1620			  {
1621			    tgt->list[j].key = k;
1622			    tgt->list[j].copy_from = false;
1623			    tgt->list[j].always_copy_from = false;
1624			    tgt->list[j].is_attach = false;
1625			    tgt->list[i].has_null_ptr_assoc |= !(*(void **) hostaddrs[j]);
1626			    /* For OpenMP, the use of refcount_sets causes
1627			       errors if we set k->refcount = 1 above but also
1628			       increment it again here, for decrementing will
1629			       not properly match, since we decrement only once
1630			       for each key's refcount. Therefore avoid this
1631			       increment for OpenMP constructs.  */
1632			    if (!openmp_p)
1633			      gomp_increment_refcount (k, refcount_set);
1634			    gomp_map_pointer (tgt, aq,
1635					      (uintptr_t) *(void **) hostaddrs[j],
1636					      k->tgt_offset
1637					      + ((uintptr_t) hostaddrs[j]
1638						 - k->host_start),
1639					      sizes[j], cbufp, false);
1640			  }
1641			}
1642		    i = j - 1;
1643		    break;
1644		  case GOMP_MAP_FORCE_PRESENT:
1645		    {
1646		      /* We already looked up the memory region above and it
1647			 was missing.  */
1648		      size_t size = k->host_end - k->host_start;
1649		      gomp_mutex_unlock (&devicep->lock);
1650#ifdef HAVE_INTTYPES_H
1651		      gomp_fatal ("present clause: !acc_is_present (%p, "
1652				  "%"PRIu64" (0x%"PRIx64"))",
1653				  (void *) k->host_start,
1654				  (uint64_t) size, (uint64_t) size);
1655#else
1656		      gomp_fatal ("present clause: !acc_is_present (%p, "
1657				  "%lu (0x%lx))", (void *) k->host_start,
1658				  (unsigned long) size, (unsigned long) size);
1659#endif
1660		    }
1661		    break;
1662		  case GOMP_MAP_FORCE_DEVICEPTR:
1663		    assert (k->host_end - k->host_start == sizeof (void *));
1664		    gomp_copy_host2dev (devicep, aq,
1665					(void *) (tgt->tgt_start
1666						  + k->tgt_offset),
1667					(void *) k->host_start,
1668					sizeof (void *), false, cbufp);
1669		    break;
1670		  default:
1671		    gomp_mutex_unlock (&devicep->lock);
1672		    gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
1673				kind);
1674		  }
1675
1676		if (k->aux && k->aux->link_key)
1677		  {
1678		    /* Set link pointer on target to the device address of the
1679		       mapped object.  */
1680		    void *tgt_addr = (void *) (tgt->tgt_start + k->tgt_offset);
1681		    /* We intentionally do not use coalescing here, as it's not
1682		       data allocated by the current call to this function.  */
1683		    gomp_copy_host2dev (devicep, aq, (void *) n->tgt_offset,
1684					&tgt_addr, sizeof (void *), true, NULL);
1685		  }
1686		array++;
1687	      }
1688	  }
1689    }
1690
1691  if (pragma_kind == GOMP_MAP_VARS_TARGET)
1692    {
1693      for (i = 0; i < mapnum; i++)
1694	{
1695	  cur_node.tgt_offset = gomp_map_val (tgt, hostaddrs, i);
1696	  gomp_copy_host2dev (devicep, aq,
1697			      (void *) (tgt->tgt_start + i * sizeof (void *)),
1698			      (void *) &cur_node.tgt_offset, sizeof (void *),
1699			      true, cbufp);
1700	}
1701    }
1702
1703  if (cbufp)
1704    {
1705      /* See 'gomp_coalesce_buf_add'.  */
1706      assert (!aq);
1707
1708      long c = 0;
1709      for (c = 0; c < cbuf.chunk_cnt; ++c)
1710	gomp_copy_host2dev (devicep, aq,
1711			    (void *) (tgt->tgt_start + cbuf.chunks[c].start),
1712			    (char *) cbuf.buf + (cbuf.chunks[c].start
1713						 - cbuf.chunks[0].start),
1714			    cbuf.chunks[c].end - cbuf.chunks[c].start,
1715			    true, NULL);
1716      free (cbuf.buf);
1717      cbuf.buf = NULL;
1718      cbufp = NULL;
1719    }
1720
1721  /* If the variable from "omp target enter data" map-list was already mapped,
1722     tgt is not needed.  Otherwise tgt will be freed by gomp_unmap_vars or
1723     gomp_exit_data.  */
1724  if ((pragma_kind & GOMP_MAP_VARS_ENTER_DATA) && tgt->refcount == 0)
1725    {
1726      free (tgt);
1727      tgt = NULL;
1728    }
1729
1730  gomp_mutex_unlock (&devicep->lock);
1731  return tgt;
1732}
1733
1734static struct target_mem_desc *
1735gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
1736	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
1737	       bool short_mapkind, htab_t *refcount_set,
1738	       enum gomp_map_vars_kind pragma_kind)
1739{
1740  /* This management of a local refcount_set is for convenience of callers
1741     who do not share a refcount_set over multiple map/unmap uses.  */
1742  htab_t local_refcount_set = NULL;
1743  if (refcount_set == NULL)
1744    {
1745      local_refcount_set = htab_create (mapnum);
1746      refcount_set = &local_refcount_set;
1747    }
1748
1749  struct target_mem_desc *tgt;
1750  tgt = gomp_map_vars_internal (devicep, NULL, mapnum, hostaddrs, devaddrs,
1751				sizes, kinds, short_mapkind, refcount_set,
1752				pragma_kind);
1753  if (local_refcount_set)
1754    htab_free (local_refcount_set);
1755
1756  return tgt;
1757}
1758
1759attribute_hidden struct target_mem_desc *
1760goacc_map_vars (struct gomp_device_descr *devicep,
1761		struct goacc_asyncqueue *aq, size_t mapnum,
1762		void **hostaddrs, void **devaddrs, size_t *sizes,
1763		void *kinds, bool short_mapkind,
1764		enum gomp_map_vars_kind pragma_kind)
1765{
1766  return gomp_map_vars_internal (devicep, aq, mapnum, hostaddrs, devaddrs,
1767				 sizes, kinds, short_mapkind, NULL,
1768				 GOMP_MAP_VARS_OPENACC | pragma_kind);
1769}
1770
1771static void
1772gomp_unmap_tgt (struct target_mem_desc *tgt)
1773{
1774  /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
1775  if (tgt->tgt_end)
1776    gomp_free_device_memory (tgt->device_descr, tgt->to_free);
1777
1778  free (tgt->array);
1779  free (tgt);
1780}
1781
1782static bool
1783gomp_unref_tgt (void *ptr)
1784{
1785  bool is_tgt_unmapped = false;
1786
1787  struct target_mem_desc *tgt = (struct target_mem_desc *) ptr;
1788
1789  if (tgt->refcount > 1)
1790    tgt->refcount--;
1791  else
1792    {
1793      gomp_unmap_tgt (tgt);
1794      is_tgt_unmapped = true;
1795    }
1796
1797  return is_tgt_unmapped;
1798}
1799
1800static void
1801gomp_unref_tgt_void (void *ptr)
1802{
1803  (void) gomp_unref_tgt (ptr);
1804}
1805
1806static void
1807gomp_remove_splay_tree_key (splay_tree sp, splay_tree_key k)
1808{
1809  splay_tree_remove (sp, k);
1810  if (k->aux)
1811    {
1812      if (k->aux->link_key)
1813	splay_tree_insert (sp, (splay_tree_node) k->aux->link_key);
1814      if (k->aux->attach_count)
1815	free (k->aux->attach_count);
1816      free (k->aux);
1817      k->aux = NULL;
1818    }
1819}
1820
1821static inline __attribute__((always_inline)) bool
1822gomp_remove_var_internal (struct gomp_device_descr *devicep, splay_tree_key k,
1823			  struct goacc_asyncqueue *aq)
1824{
1825  bool is_tgt_unmapped = false;
1826
1827  if (REFCOUNT_STRUCTELEM_P (k->refcount))
1828    {
1829      if (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount) == false)
1830	/* Infer the splay_tree_key of the first structelem key using the
1831	   pointer to the first structleme_refcount.  */
1832	k = (splay_tree_key) ((char *) k->structelem_refcount_ptr
1833			      - offsetof (struct splay_tree_key_s,
1834					  structelem_refcount));
1835      assert (REFCOUNT_STRUCTELEM_FIRST_P (k->refcount));
1836
1837      /* The array created by gomp_map_vars is an array of splay_tree_nodes,
1838	 with the splay_tree_keys embedded inside.  */
1839      splay_tree_node node =
1840	(splay_tree_node) ((char *) k
1841			   - offsetof (struct splay_tree_node_s, key));
1842      while (true)
1843	{
1844	  /* Starting from the _FIRST key, and continue for all following
1845	     sibling keys.  */
1846	  gomp_remove_splay_tree_key (&devicep->mem_map, k);
1847	  if (REFCOUNT_STRUCTELEM_LAST_P (k->refcount))
1848	    break;
1849	  else
1850	    k = &(++node)->key;
1851	}
1852    }
1853  else
1854    gomp_remove_splay_tree_key (&devicep->mem_map, k);
1855
1856  if (aq)
1857    devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1858						(void *) k->tgt);
1859  else
1860    is_tgt_unmapped = gomp_unref_tgt ((void *) k->tgt);
1861  return is_tgt_unmapped;
1862}
1863
1864attribute_hidden bool
1865gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k)
1866{
1867  return gomp_remove_var_internal (devicep, k, NULL);
1868}
1869
1870/* Remove a variable asynchronously.  This actually removes the variable
1871   mapping immediately, but retains the linked target_mem_desc until the
1872   asynchronous operation has completed (as it may still refer to target
1873   memory).  The device lock must be held before entry, and remains locked on
1874   exit.  */
1875
1876attribute_hidden void
1877gomp_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key k,
1878		       struct goacc_asyncqueue *aq)
1879{
1880  (void) gomp_remove_var_internal (devicep, k, aq);
1881}
1882
1883/* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
1884   variables back from device to host: if it is false, it is assumed that this
1885   has been done already.  */
1886
1887static inline __attribute__((always_inline)) void
1888gomp_unmap_vars_internal (struct target_mem_desc *tgt, bool do_copyfrom,
1889			  htab_t *refcount_set, struct goacc_asyncqueue *aq)
1890{
1891  struct gomp_device_descr *devicep = tgt->device_descr;
1892
1893  if (tgt->list_count == 0)
1894    {
1895      free (tgt);
1896      return;
1897    }
1898
1899  gomp_mutex_lock (&devicep->lock);
1900  if (devicep->state == GOMP_DEVICE_FINALIZED)
1901    {
1902      gomp_mutex_unlock (&devicep->lock);
1903      free (tgt->array);
1904      free (tgt);
1905      return;
1906    }
1907
1908  size_t i;
1909
1910  /* We must perform detachments before any copies back to the host.  */
1911  for (i = 0; i < tgt->list_count; i++)
1912    {
1913      splay_tree_key k = tgt->list[i].key;
1914
1915      if (k != NULL && tgt->list[i].is_attach)
1916	gomp_detach_pointer (devicep, aq, k, tgt->list[i].key->host_start
1917					     + tgt->list[i].offset,
1918			     false, NULL);
1919    }
1920
1921  for (i = 0; i < tgt->list_count; i++)
1922    {
1923      splay_tree_key k = tgt->list[i].key;
1924      if (k == NULL)
1925	continue;
1926
1927      /* OpenACC 'attach'/'detach' doesn't affect structured/dynamic reference
1928	 counts ('n->refcount', 'n->dynamic_refcount').  */
1929      if (tgt->list[i].is_attach)
1930	continue;
1931
1932      bool do_copy, do_remove;
1933      gomp_decrement_refcount (k, refcount_set, false, &do_copy, &do_remove);
1934
1935      if ((do_copy && do_copyfrom && tgt->list[i].copy_from)
1936	  || tgt->list[i].always_copy_from)
1937	gomp_copy_dev2host (devicep, aq,
1938			    (void *) (k->host_start + tgt->list[i].offset),
1939			    (void *) (k->tgt->tgt_start + k->tgt_offset
1940				      + tgt->list[i].offset),
1941			    tgt->list[i].length);
1942      if (do_remove)
1943	{
1944	  struct target_mem_desc *k_tgt = k->tgt;
1945	  bool is_tgt_unmapped = gomp_remove_var (devicep, k);
1946	  /* It would be bad if TGT got unmapped while we're still iterating
1947	     over its LIST_COUNT, and also expect to use it in the following
1948	     code.  */
1949	  assert (!is_tgt_unmapped
1950		  || k_tgt != tgt);
1951	}
1952    }
1953
1954  if (aq)
1955    devicep->openacc.async.queue_callback_func (aq, gomp_unref_tgt_void,
1956						(void *) tgt);
1957  else
1958    gomp_unref_tgt ((void *) tgt);
1959
1960  gomp_mutex_unlock (&devicep->lock);
1961}
1962
1963static void
1964gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1965		 htab_t *refcount_set)
1966{
1967  /* This management of a local refcount_set is for convenience of callers
1968     who do not share a refcount_set over multiple map/unmap uses.  */
1969  htab_t local_refcount_set = NULL;
1970  if (refcount_set == NULL)
1971    {
1972      local_refcount_set = htab_create (tgt->list_count);
1973      refcount_set = &local_refcount_set;
1974    }
1975
1976  gomp_unmap_vars_internal (tgt, do_copyfrom, refcount_set, NULL);
1977
1978  if (local_refcount_set)
1979    htab_free (local_refcount_set);
1980}
1981
1982attribute_hidden void
1983goacc_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom,
1984		  struct goacc_asyncqueue *aq)
1985{
1986  gomp_unmap_vars_internal (tgt, do_copyfrom, NULL, aq);
1987}
1988
1989static void
1990gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
1991	     size_t *sizes, void *kinds, bool short_mapkind)
1992{
1993  size_t i;
1994  struct splay_tree_key_s cur_node;
1995  const int typemask = short_mapkind ? 0xff : 0x7;
1996
1997  if (!devicep)
1998    return;
1999
2000  if (mapnum == 0)
2001    return;
2002
2003  gomp_mutex_lock (&devicep->lock);
2004  if (devicep->state == GOMP_DEVICE_FINALIZED)
2005    {
2006      gomp_mutex_unlock (&devicep->lock);
2007      return;
2008    }
2009
2010  for (i = 0; i < mapnum; i++)
2011    if (sizes[i])
2012      {
2013	cur_node.host_start = (uintptr_t) hostaddrs[i];
2014	cur_node.host_end = cur_node.host_start + sizes[i];
2015	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2016	if (n)
2017	  {
2018	    int kind = get_kind (short_mapkind, kinds, i);
2019	    if (n->host_start > cur_node.host_start
2020		|| n->host_end < cur_node.host_end)
2021	      {
2022		gomp_mutex_unlock (&devicep->lock);
2023		gomp_fatal ("Trying to update [%p..%p) object when "
2024			    "only [%p..%p) is mapped",
2025			    (void *) cur_node.host_start,
2026			    (void *) cur_node.host_end,
2027			    (void *) n->host_start,
2028			    (void *) n->host_end);
2029	      }
2030
2031	    if (n->aux && n->aux->attach_count)
2032	      {
2033		uintptr_t addr = cur_node.host_start;
2034		while (addr < cur_node.host_end)
2035		  {
2036		    /* We have to be careful not to overwrite still attached
2037		       pointers during host<->device updates.  */
2038		    size_t i = (addr - cur_node.host_start) / sizeof (void *);
2039		    if (n->aux->attach_count[i] == 0)
2040		      {
2041			void *devaddr = (void *) (n->tgt->tgt_start
2042						  + n->tgt_offset
2043						  + addr - n->host_start);
2044			if (GOMP_MAP_COPY_TO_P (kind & typemask))
2045			  gomp_copy_host2dev (devicep, NULL,
2046					      devaddr, (void *) addr,
2047					      sizeof (void *), false, NULL);
2048			if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2049			  gomp_copy_dev2host (devicep, NULL,
2050					      (void *) addr, devaddr,
2051					      sizeof (void *));
2052		      }
2053		    addr += sizeof (void *);
2054		  }
2055	      }
2056	    else
2057	      {
2058		void *hostaddr = (void *) cur_node.host_start;
2059		void *devaddr = (void *) (n->tgt->tgt_start + n->tgt_offset
2060					  + cur_node.host_start
2061					  - n->host_start);
2062		size_t size = cur_node.host_end - cur_node.host_start;
2063
2064		if (GOMP_MAP_COPY_TO_P (kind & typemask))
2065		  gomp_copy_host2dev (devicep, NULL, devaddr, hostaddr, size,
2066				      false, NULL);
2067		if (GOMP_MAP_COPY_FROM_P (kind & typemask))
2068		  gomp_copy_dev2host (devicep, NULL, hostaddr, devaddr, size);
2069	      }
2070	  }
2071      }
2072  gomp_mutex_unlock (&devicep->lock);
2073}
2074
2075/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
2076   And insert to splay tree the mapping between addresses from HOST_TABLE and
2077   from loaded target image.  We rely in the host and device compiler
2078   emitting variable and functions in the same order.  */
2079
2080static void
2081gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version,
2082			   const void *host_table, const void *target_data,
2083			   bool is_register_lock)
2084{
2085  void **host_func_table = ((void ***) host_table)[0];
2086  void **host_funcs_end  = ((void ***) host_table)[1];
2087  void **host_var_table  = ((void ***) host_table)[2];
2088  void **host_vars_end   = ((void ***) host_table)[3];
2089
2090  /* The func table contains only addresses, the var table contains addresses
2091     and corresponding sizes.  */
2092  int num_funcs = host_funcs_end - host_func_table;
2093  int num_vars  = (host_vars_end - host_var_table) / 2;
2094
2095  /* Others currently is only 'device_num' */
2096  int num_others = 1;
2097
2098  /* Load image to device and get target addresses for the image.  */
2099  struct addr_pair *target_table = NULL;
2100  int i, num_target_entries;
2101
2102  num_target_entries
2103    = devicep->load_image_func (devicep->target_id, version,
2104				target_data, &target_table);
2105
2106  if (num_target_entries != num_funcs + num_vars
2107      /* Others (device_num) are included as trailing entries in pair list.  */
2108      && num_target_entries != num_funcs + num_vars + num_others)
2109    {
2110      gomp_mutex_unlock (&devicep->lock);
2111      if (is_register_lock)
2112	gomp_mutex_unlock (&register_lock);
2113      gomp_fatal ("Cannot map target functions or variables"
2114		  " (expected %u, have %u)", num_funcs + num_vars,
2115		  num_target_entries);
2116    }
2117
2118  /* Insert host-target address mapping into splay tree.  */
2119  struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
2120  tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
2121  tgt->refcount = REFCOUNT_INFINITY;
2122  tgt->tgt_start = 0;
2123  tgt->tgt_end = 0;
2124  tgt->to_free = NULL;
2125  tgt->prev = NULL;
2126  tgt->list_count = 0;
2127  tgt->device_descr = devicep;
2128  splay_tree_node array = tgt->array;
2129
2130  for (i = 0; i < num_funcs; i++)
2131    {
2132      splay_tree_key k = &array->key;
2133      k->host_start = (uintptr_t) host_func_table[i];
2134      k->host_end = k->host_start + 1;
2135      k->tgt = tgt;
2136      k->tgt_offset = target_table[i].start;
2137      k->refcount = REFCOUNT_INFINITY;
2138      k->dynamic_refcount = 0;
2139      k->aux = NULL;
2140      array->left = NULL;
2141      array->right = NULL;
2142      splay_tree_insert (&devicep->mem_map, array);
2143      array++;
2144    }
2145
2146  /* Most significant bit of the size in host and target tables marks
2147     "omp declare target link" variables.  */
2148  const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2149  const uintptr_t size_mask = ~link_bit;
2150
2151  for (i = 0; i < num_vars; i++)
2152    {
2153      struct addr_pair *target_var = &target_table[num_funcs + i];
2154      uintptr_t target_size = target_var->end - target_var->start;
2155      bool is_link_var = link_bit & (uintptr_t) host_var_table[i * 2 + 1];
2156
2157      if (!is_link_var && (uintptr_t) host_var_table[i * 2 + 1] != target_size)
2158	{
2159	  gomp_mutex_unlock (&devicep->lock);
2160	  if (is_register_lock)
2161	    gomp_mutex_unlock (&register_lock);
2162	  gomp_fatal ("Cannot map target variables (size mismatch)");
2163	}
2164
2165      splay_tree_key k = &array->key;
2166      k->host_start = (uintptr_t) host_var_table[i * 2];
2167      k->host_end
2168	= k->host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2169      k->tgt = tgt;
2170      k->tgt_offset = target_var->start;
2171      k->refcount = is_link_var ? REFCOUNT_LINK : REFCOUNT_INFINITY;
2172      k->dynamic_refcount = 0;
2173      k->aux = NULL;
2174      array->left = NULL;
2175      array->right = NULL;
2176      splay_tree_insert (&devicep->mem_map, array);
2177      array++;
2178    }
2179
2180  /* Last entry is for the on-device 'device_num' variable. Tolerate case
2181     where plugin does not return this entry.  */
2182  if (num_funcs + num_vars < num_target_entries)
2183    {
2184      struct addr_pair *device_num_var = &target_table[num_funcs + num_vars];
2185      /* Start address will be non-zero for last entry if GOMP_DEVICE_NUM_VAR
2186	 was found in this image.  */
2187      if (device_num_var->start != 0)
2188	{
2189	  /* The index of the devicep within devices[] is regarded as its
2190	     'device number', which is different from the per-device type
2191	     devicep->target_id.  */
2192	  int device_num_val = (int) (devicep - &devices[0]);
2193	  if (device_num_var->end - device_num_var->start != sizeof (int))
2194	    {
2195	      gomp_mutex_unlock (&devicep->lock);
2196	      if (is_register_lock)
2197		gomp_mutex_unlock (&register_lock);
2198	      gomp_fatal ("offload plugin managed 'device_num' not of expected "
2199			  "format");
2200	    }
2201
2202	  /* Copy device_num value to place on device memory, hereby actually
2203	     designating its device number into effect.  */
2204	  gomp_copy_host2dev (devicep, NULL, (void *) device_num_var->start,
2205			      &device_num_val, sizeof (int), false, NULL);
2206	}
2207    }
2208
2209  free (target_table);
2210}
2211
2212/* Unload the mappings described by target_data from device DEVICE_P.
2213   The device must be locked.   */
2214
2215static void
2216gomp_unload_image_from_device (struct gomp_device_descr *devicep,
2217			       unsigned version,
2218			       const void *host_table, const void *target_data)
2219{
2220  void **host_func_table = ((void ***) host_table)[0];
2221  void **host_funcs_end  = ((void ***) host_table)[1];
2222  void **host_var_table  = ((void ***) host_table)[2];
2223  void **host_vars_end   = ((void ***) host_table)[3];
2224
2225  /* The func table contains only addresses, the var table contains addresses
2226     and corresponding sizes.  */
2227  int num_funcs = host_funcs_end - host_func_table;
2228  int num_vars  = (host_vars_end - host_var_table) / 2;
2229
2230  struct splay_tree_key_s k;
2231  splay_tree_key node = NULL;
2232
2233  /* Find mapping at start of node array */
2234  if (num_funcs || num_vars)
2235    {
2236      k.host_start = (num_funcs ? (uintptr_t) host_func_table[0]
2237		      : (uintptr_t) host_var_table[0]);
2238      k.host_end = k.host_start + 1;
2239      node = splay_tree_lookup (&devicep->mem_map, &k);
2240    }
2241
2242  if (!devicep->unload_image_func (devicep->target_id, version, target_data))
2243    {
2244      gomp_mutex_unlock (&devicep->lock);
2245      gomp_fatal ("image unload fail");
2246    }
2247
2248  /* Remove mappings from splay tree.  */
2249  int i;
2250  for (i = 0; i < num_funcs; i++)
2251    {
2252      k.host_start = (uintptr_t) host_func_table[i];
2253      k.host_end = k.host_start + 1;
2254      splay_tree_remove (&devicep->mem_map, &k);
2255    }
2256
2257  /* Most significant bit of the size in host and target tables marks
2258     "omp declare target link" variables.  */
2259  const uintptr_t link_bit = 1ULL << (sizeof (uintptr_t) * __CHAR_BIT__ - 1);
2260  const uintptr_t size_mask = ~link_bit;
2261  bool is_tgt_unmapped = false;
2262
2263  for (i = 0; i < num_vars; i++)
2264    {
2265      k.host_start = (uintptr_t) host_var_table[i * 2];
2266      k.host_end
2267	= k.host_start + (size_mask & (uintptr_t) host_var_table[i * 2 + 1]);
2268
2269      if (!(link_bit & (uintptr_t) host_var_table[i * 2 + 1]))
2270	splay_tree_remove (&devicep->mem_map, &k);
2271      else
2272	{
2273	  splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &k);
2274	  is_tgt_unmapped = gomp_remove_var (devicep, n);
2275	}
2276    }
2277
2278  if (node && !is_tgt_unmapped)
2279    {
2280      free (node->tgt);
2281      free (node);
2282    }
2283}
2284
2285/* This function should be called from every offload image while loading.
2286   It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2287   the target, and TARGET_DATA needed by target plugin.  */
2288
2289void
2290GOMP_offload_register_ver (unsigned version, const void *host_table,
2291			   int target_type, const void *target_data)
2292{
2293  int i;
2294
2295  if (GOMP_VERSION_LIB (version) > GOMP_VERSION)
2296    gomp_fatal ("Library too old for offload (version %u < %u)",
2297		GOMP_VERSION, GOMP_VERSION_LIB (version));
2298
2299  gomp_mutex_lock (&register_lock);
2300
2301  /* Load image to all initialized devices.  */
2302  for (i = 0; i < num_devices; i++)
2303    {
2304      struct gomp_device_descr *devicep = &devices[i];
2305      gomp_mutex_lock (&devicep->lock);
2306      if (devicep->type == target_type
2307	  && devicep->state == GOMP_DEVICE_INITIALIZED)
2308	gomp_load_image_to_device (devicep, version,
2309				   host_table, target_data, true);
2310      gomp_mutex_unlock (&devicep->lock);
2311    }
2312
2313  /* Insert image to array of pending images.  */
2314  offload_images
2315    = gomp_realloc_unlock (offload_images,
2316			   (num_offload_images + 1)
2317			   * sizeof (struct offload_image_descr));
2318  offload_images[num_offload_images].version = version;
2319  offload_images[num_offload_images].type = target_type;
2320  offload_images[num_offload_images].host_table = host_table;
2321  offload_images[num_offload_images].target_data = target_data;
2322
2323  num_offload_images++;
2324  gomp_mutex_unlock (&register_lock);
2325}
2326
2327void
2328GOMP_offload_register (const void *host_table, int target_type,
2329		       const void *target_data)
2330{
2331  GOMP_offload_register_ver (0, host_table, target_type, target_data);
2332}
2333
2334/* This function should be called from every offload image while unloading.
2335   It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
2336   the target, and TARGET_DATA needed by target plugin.  */
2337
2338void
2339GOMP_offload_unregister_ver (unsigned version, const void *host_table,
2340			     int target_type, const void *target_data)
2341{
2342  int i;
2343
2344  gomp_mutex_lock (&register_lock);
2345
2346  /* Unload image from all initialized devices.  */
2347  for (i = 0; i < num_devices; i++)
2348    {
2349      struct gomp_device_descr *devicep = &devices[i];
2350      gomp_mutex_lock (&devicep->lock);
2351      if (devicep->type == target_type
2352	  && devicep->state == GOMP_DEVICE_INITIALIZED)
2353	gomp_unload_image_from_device (devicep, version,
2354				       host_table, target_data);
2355      gomp_mutex_unlock (&devicep->lock);
2356    }
2357
2358  /* Remove image from array of pending images.  */
2359  for (i = 0; i < num_offload_images; i++)
2360    if (offload_images[i].target_data == target_data)
2361      {
2362	offload_images[i] = offload_images[--num_offload_images];
2363	break;
2364      }
2365
2366  gomp_mutex_unlock (&register_lock);
2367}
2368
2369void
2370GOMP_offload_unregister (const void *host_table, int target_type,
2371			 const void *target_data)
2372{
2373  GOMP_offload_unregister_ver (0, host_table, target_type, target_data);
2374}
2375
2376/* This function initializes the target device, specified by DEVICEP.  DEVICEP
2377   must be locked on entry, and remains locked on return.  */
2378
2379attribute_hidden void
2380gomp_init_device (struct gomp_device_descr *devicep)
2381{
2382  int i;
2383  if (!devicep->init_device_func (devicep->target_id))
2384    {
2385      gomp_mutex_unlock (&devicep->lock);
2386      gomp_fatal ("device initialization failed");
2387    }
2388
2389  /* Load to device all images registered by the moment.  */
2390  for (i = 0; i < num_offload_images; i++)
2391    {
2392      struct offload_image_descr *image = &offload_images[i];
2393      if (image->type == devicep->type)
2394	gomp_load_image_to_device (devicep, image->version,
2395				   image->host_table, image->target_data,
2396				   false);
2397    }
2398
2399  /* Initialize OpenACC asynchronous queues.  */
2400  goacc_init_asyncqueues (devicep);
2401
2402  devicep->state = GOMP_DEVICE_INITIALIZED;
2403}
2404
2405/* This function finalizes the target device, specified by DEVICEP.  DEVICEP
2406   must be locked on entry, and remains locked on return.  */
2407
2408attribute_hidden bool
2409gomp_fini_device (struct gomp_device_descr *devicep)
2410{
2411  bool ret = goacc_fini_asyncqueues (devicep);
2412  ret &= devicep->fini_device_func (devicep->target_id);
2413  devicep->state = GOMP_DEVICE_FINALIZED;
2414  return ret;
2415}
2416
2417attribute_hidden void
2418gomp_unload_device (struct gomp_device_descr *devicep)
2419{
2420  if (devicep->state == GOMP_DEVICE_INITIALIZED)
2421    {
2422      unsigned i;
2423
2424      /* Unload from device all images registered at the moment.  */
2425      for (i = 0; i < num_offload_images; i++)
2426	{
2427	  struct offload_image_descr *image = &offload_images[i];
2428	  if (image->type == devicep->type)
2429	    gomp_unload_image_from_device (devicep, image->version,
2430					   image->host_table,
2431					   image->target_data);
2432	}
2433    }
2434}
2435
2436/* Host fallback for GOMP_target{,_ext} routines.  */
2437
2438static void
2439gomp_target_fallback (void (*fn) (void *), void **hostaddrs,
2440		      struct gomp_device_descr *devicep, void **args)
2441{
2442  struct gomp_thread old_thr, *thr = gomp_thread ();
2443
2444  if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2445      && devicep != NULL)
2446    gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2447		"be used for offloading");
2448
2449  old_thr = *thr;
2450  memset (thr, '\0', sizeof (*thr));
2451  if (gomp_places_list)
2452    {
2453      thr->place = old_thr.place;
2454      thr->ts.place_partition_len = gomp_places_list_len;
2455    }
2456  if (args)
2457    while (*args)
2458      {
2459	intptr_t id = (intptr_t) *args++, val;
2460	if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM)
2461	  val = (intptr_t) *args++;
2462	else
2463	  val = id >> GOMP_TARGET_ARG_VALUE_SHIFT;
2464	if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL)
2465	  continue;
2466	id &= GOMP_TARGET_ARG_ID_MASK;
2467	if (id != GOMP_TARGET_ARG_THREAD_LIMIT)
2468	  continue;
2469	val = val > INT_MAX ? INT_MAX : val;
2470	if (val)
2471	  gomp_icv (true)->thread_limit_var = val;
2472	break;
2473      }
2474
2475  fn (hostaddrs);
2476  gomp_free_thread (thr);
2477  *thr = old_thr;
2478}
2479
2480/* Calculate alignment and size requirements of a private copy of data shared
2481   as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE.  */
2482
2483static inline void
2484calculate_firstprivate_requirements (size_t mapnum, size_t *sizes,
2485				     unsigned short *kinds, size_t *tgt_align,
2486				     size_t *tgt_size)
2487{
2488  size_t i;
2489  for (i = 0; i < mapnum; i++)
2490    if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
2491      {
2492	size_t align = (size_t) 1 << (kinds[i] >> 8);
2493	if (*tgt_align < align)
2494	  *tgt_align = align;
2495	*tgt_size = (*tgt_size + align - 1) & ~(align - 1);
2496	*tgt_size += sizes[i];
2497      }
2498}
2499
2500/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST.  */
2501
2502static inline void
2503copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs,
2504			size_t *sizes, unsigned short *kinds, size_t tgt_align,
2505			size_t tgt_size)
2506{
2507  uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
2508  if (al)
2509    tgt += tgt_align - al;
2510  tgt_size = 0;
2511  size_t i;
2512  for (i = 0; i < mapnum; i++)
2513    if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE && hostaddrs[i] != NULL)
2514      {
2515	size_t align = (size_t) 1 << (kinds[i] >> 8);
2516	tgt_size = (tgt_size + align - 1) & ~(align - 1);
2517	memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
2518	hostaddrs[i] = tgt + tgt_size;
2519	tgt_size = tgt_size + sizes[i];
2520      }
2521}
2522
2523/* Helper function of GOMP_target{,_ext} routines.  */
2524
2525static void *
2526gomp_get_target_fn_addr (struct gomp_device_descr *devicep,
2527			 void (*host_fn) (void *))
2528{
2529  if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
2530    return (void *) host_fn;
2531  else
2532    {
2533      gomp_mutex_lock (&devicep->lock);
2534      if (devicep->state == GOMP_DEVICE_FINALIZED)
2535	{
2536	  gomp_mutex_unlock (&devicep->lock);
2537	  return NULL;
2538	}
2539
2540      struct splay_tree_key_s k;
2541      k.host_start = (uintptr_t) host_fn;
2542      k.host_end = k.host_start + 1;
2543      splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
2544      gomp_mutex_unlock (&devicep->lock);
2545      if (tgt_fn == NULL)
2546	return NULL;
2547
2548      return (void *) tgt_fn->tgt_offset;
2549    }
2550}
2551
2552/* Called when encountering a target directive.  If DEVICE
2553   is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
2554   GOMP_DEVICE_HOST_FALLBACK (or any value
2555   larger than last available hw device), use host fallback.
2556   FN is address of host code, UNUSED is part of the current ABI, but
2557   we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
2558   with MAPNUM entries, with addresses of the host objects,
2559   sizes of the host objects (resp. for pointer kind pointer bias
2560   and assumed sizeof (void *) size) and kinds.  */
2561
2562void
2563GOMP_target (int device, void (*fn) (void *), const void *unused,
2564	     size_t mapnum, void **hostaddrs, size_t *sizes,
2565	     unsigned char *kinds)
2566{
2567  struct gomp_device_descr *devicep = resolve_device (device);
2568
2569  void *fn_addr;
2570  if (devicep == NULL
2571      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2572      /* All shared memory devices should use the GOMP_target_ext function.  */
2573      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM
2574      || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)))
2575    return gomp_target_fallback (fn, hostaddrs, devicep, NULL);
2576
2577  htab_t refcount_set = htab_create (mapnum);
2578  struct target_mem_desc *tgt_vars
2579    = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2580		     &refcount_set, GOMP_MAP_VARS_TARGET);
2581  devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start,
2582		     NULL);
2583  htab_clear (refcount_set);
2584  gomp_unmap_vars (tgt_vars, true, &refcount_set);
2585  htab_free (refcount_set);
2586}
2587
2588static inline unsigned int
2589clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags)
2590{
2591  /* If we cannot run asynchronously, simply ignore nowait.  */
2592  if (devicep != NULL && devicep->async_run_func == NULL)
2593    flags &= ~GOMP_TARGET_FLAG_NOWAIT;
2594
2595  return flags;
2596}
2597
2598/* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present,
2599   and several arguments have been added:
2600   FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h.
2601   DEPEND is array of dependencies, see GOMP_task for details.
2602
2603   ARGS is a pointer to an array consisting of a variable number of both
2604   device-independent and device-specific arguments, which can take one two
2605   elements where the first specifies for which device it is intended, the type
2606   and optionally also the value.  If the value is not present in the first
2607   one, the whole second element the actual value.  The last element of the
2608   array is a single NULL.  Among the device independent can be for example
2609   NUM_TEAMS and THREAD_LIMIT.
2610
2611   NUM_TEAMS is positive if GOMP_teams will be called in the body with
2612   that value, or 1 if teams construct is not present, or 0, if
2613   teams construct does not have num_teams clause and so the choice is
2614   implementation defined, and -1 if it can't be determined on the host
2615   what value will GOMP_teams have on the device.
2616   THREAD_LIMIT similarly is positive if GOMP_teams will be called in the
2617   body with that value, or 0, if teams construct does not have thread_limit
2618   clause or the teams construct is not present, or -1 if it can't be
2619   determined on the host what value will GOMP_teams have on the device.  */
2620
2621void
2622GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum,
2623		 void **hostaddrs, size_t *sizes, unsigned short *kinds,
2624		 unsigned int flags, void **depend, void **args)
2625{
2626  struct gomp_device_descr *devicep = resolve_device (device);
2627  size_t tgt_align = 0, tgt_size = 0;
2628  bool fpc_done = false;
2629
2630  flags = clear_unsupported_flags (devicep, flags);
2631
2632  if (flags & GOMP_TARGET_FLAG_NOWAIT)
2633    {
2634      struct gomp_thread *thr = gomp_thread ();
2635      /* Create a team if we don't have any around, as nowait
2636	 target tasks make sense to run asynchronously even when
2637	 outside of any parallel.  */
2638      if (__builtin_expect (thr->ts.team == NULL, 0))
2639	{
2640	  struct gomp_team *team = gomp_new_team (1);
2641	  struct gomp_task *task = thr->task;
2642	  struct gomp_task **implicit_task = &task;
2643	  struct gomp_task_icv *icv = task ? &task->icv : &gomp_global_icv;
2644	  team->prev_ts = thr->ts;
2645	  thr->ts.team = team;
2646	  thr->ts.team_id = 0;
2647	  thr->ts.work_share = &team->work_shares[0];
2648	  thr->ts.last_work_share = NULL;
2649#ifdef HAVE_SYNC_BUILTINS
2650	  thr->ts.single_count = 0;
2651#endif
2652	  thr->ts.static_trip = 0;
2653	  thr->task = &team->implicit_task[0];
2654	  gomp_init_task (thr->task, NULL, icv);
2655	  while (*implicit_task
2656		 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT)
2657	    implicit_task = &(*implicit_task)->parent;
2658	  if (*implicit_task)
2659	    {
2660	      thr->task = *implicit_task;
2661	      gomp_end_task ();
2662	      free (*implicit_task);
2663	      thr->task = &team->implicit_task[0];
2664	    }
2665	  else
2666	    pthread_setspecific (gomp_thread_destructor, thr);
2667	  if (implicit_task != &task)
2668	    {
2669	      *implicit_task = thr->task;
2670	      thr->task = task;
2671	    }
2672	}
2673      if (thr->ts.team
2674	  && !thr->task->final_task)
2675	{
2676	  gomp_create_target_task (devicep, fn, mapnum, hostaddrs,
2677				   sizes, kinds, flags, depend, args,
2678				   GOMP_TARGET_TASK_BEFORE_MAP);
2679	  return;
2680	}
2681    }
2682
2683  /* If there are depend clauses, but nowait is not present
2684     (or we are in a final task), block the parent task until the
2685     dependencies are resolved and then just continue with the rest
2686     of the function as if it is a merged task.  */
2687  if (depend != NULL)
2688    {
2689      struct gomp_thread *thr = gomp_thread ();
2690      if (thr->task && thr->task->depend_hash)
2691	{
2692	  /* If we might need to wait, copy firstprivate now.  */
2693	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
2694					       &tgt_align, &tgt_size);
2695	  if (tgt_align)
2696	    {
2697	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2698	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2699				      tgt_align, tgt_size);
2700	    }
2701	  fpc_done = true;
2702	  gomp_task_maybe_wait_for_dependencies (depend);
2703	}
2704    }
2705
2706  void *fn_addr;
2707  if (devicep == NULL
2708      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2709      || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))
2710      || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
2711    {
2712      if (!fpc_done)
2713	{
2714	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
2715					       &tgt_align, &tgt_size);
2716	  if (tgt_align)
2717	    {
2718	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2719	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2720				      tgt_align, tgt_size);
2721	    }
2722	}
2723      gomp_target_fallback (fn, hostaddrs, devicep, args);
2724      return;
2725    }
2726
2727  struct target_mem_desc *tgt_vars;
2728  htab_t refcount_set = NULL;
2729
2730  if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2731    {
2732      if (!fpc_done)
2733	{
2734	  calculate_firstprivate_requirements (mapnum, sizes, kinds,
2735					       &tgt_align, &tgt_size);
2736	  if (tgt_align)
2737	    {
2738	      char *tgt = gomp_alloca (tgt_size + tgt_align - 1);
2739	      copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds,
2740				      tgt_align, tgt_size);
2741	    }
2742	}
2743      tgt_vars = NULL;
2744    }
2745  else
2746    {
2747      refcount_set = htab_create (mapnum);
2748      tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds,
2749				true, &refcount_set, GOMP_MAP_VARS_TARGET);
2750    }
2751  devicep->run_func (devicep->target_id, fn_addr,
2752		     tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs,
2753		     args);
2754  if (tgt_vars)
2755    {
2756      htab_clear (refcount_set);
2757      gomp_unmap_vars (tgt_vars, true, &refcount_set);
2758    }
2759  if (refcount_set)
2760    htab_free (refcount_set);
2761}
2762
2763/* Host fallback for GOMP_target_data{,_ext} routines.  */
2764
2765static void
2766gomp_target_data_fallback (struct gomp_device_descr *devicep)
2767{
2768  struct gomp_task_icv *icv = gomp_icv (false);
2769
2770  if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_MANDATORY
2771      && devicep != NULL)
2772    gomp_fatal ("OMP_TARGET_OFFLOAD is set to MANDATORY, but device cannot "
2773		"be used for offloading");
2774
2775  if (icv->target_data)
2776    {
2777      /* Even when doing a host fallback, if there are any active
2778         #pragma omp target data constructs, need to remember the
2779         new #pragma omp target data, otherwise GOMP_target_end_data
2780         would get out of sync.  */
2781      struct target_mem_desc *tgt
2782	= gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false,
2783			 NULL, GOMP_MAP_VARS_DATA);
2784      tgt->prev = icv->target_data;
2785      icv->target_data = tgt;
2786    }
2787}
2788
2789void
2790GOMP_target_data (int device, const void *unused, size_t mapnum,
2791		  void **hostaddrs, size_t *sizes, unsigned char *kinds)
2792{
2793  struct gomp_device_descr *devicep = resolve_device (device);
2794
2795  if (devicep == NULL
2796      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2797      || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
2798    return gomp_target_data_fallback (devicep);
2799
2800  struct target_mem_desc *tgt
2801    = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
2802		     NULL, GOMP_MAP_VARS_DATA);
2803  struct gomp_task_icv *icv = gomp_icv (true);
2804  tgt->prev = icv->target_data;
2805  icv->target_data = tgt;
2806}
2807
2808void
2809GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs,
2810		      size_t *sizes, unsigned short *kinds)
2811{
2812  struct gomp_device_descr *devicep = resolve_device (device);
2813
2814  if (devicep == NULL
2815      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2816      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2817    return gomp_target_data_fallback (devicep);
2818
2819  struct target_mem_desc *tgt
2820    = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true,
2821		     NULL, GOMP_MAP_VARS_DATA);
2822  struct gomp_task_icv *icv = gomp_icv (true);
2823  tgt->prev = icv->target_data;
2824  icv->target_data = tgt;
2825}
2826
2827void
2828GOMP_target_end_data (void)
2829{
2830  struct gomp_task_icv *icv = gomp_icv (false);
2831  if (icv->target_data)
2832    {
2833      struct target_mem_desc *tgt = icv->target_data;
2834      icv->target_data = tgt->prev;
2835      gomp_unmap_vars (tgt, true, NULL);
2836    }
2837}
2838
2839void
2840GOMP_target_update (int device, const void *unused, size_t mapnum,
2841		    void **hostaddrs, size_t *sizes, unsigned char *kinds)
2842{
2843  struct gomp_device_descr *devicep = resolve_device (device);
2844
2845  if (devicep == NULL
2846      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2847      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2848    return;
2849
2850  gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
2851}
2852
2853void
2854GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs,
2855			size_t *sizes, unsigned short *kinds,
2856			unsigned int flags, void **depend)
2857{
2858  struct gomp_device_descr *devicep = resolve_device (device);
2859
2860  /* If there are depend clauses, but nowait is not present,
2861     block the parent task until the dependencies are resolved
2862     and then just continue with the rest of the function as if it
2863     is a merged task.  Until we are able to schedule task during
2864     variable mapping or unmapping, ignore nowait if depend clauses
2865     are not present.  */
2866  if (depend != NULL)
2867    {
2868      struct gomp_thread *thr = gomp_thread ();
2869      if (thr->task && thr->task->depend_hash)
2870	{
2871	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
2872	      && thr->ts.team
2873	      && !thr->task->final_task)
2874	    {
2875	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
2876					   mapnum, hostaddrs, sizes, kinds,
2877					   flags | GOMP_TARGET_FLAG_UPDATE,
2878					   depend, NULL, GOMP_TARGET_TASK_DATA))
2879		return;
2880	    }
2881	  else
2882	    {
2883	      struct gomp_team *team = thr->ts.team;
2884	      /* If parallel or taskgroup has been cancelled, don't start new
2885		 tasks.  */
2886	      if (__builtin_expect (gomp_cancel_var, 0) && team)
2887		{
2888		  if (gomp_team_barrier_cancelled (&team->barrier))
2889		    return;
2890		  if (thr->task->taskgroup)
2891		    {
2892		      if (thr->task->taskgroup->cancelled)
2893			return;
2894		      if (thr->task->taskgroup->workshare
2895			  && thr->task->taskgroup->prev
2896			  && thr->task->taskgroup->prev->cancelled)
2897			return;
2898		    }
2899		}
2900
2901	      gomp_task_maybe_wait_for_dependencies (depend);
2902	    }
2903	}
2904    }
2905
2906  if (devicep == NULL
2907      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
2908      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
2909    return;
2910
2911  struct gomp_thread *thr = gomp_thread ();
2912  struct gomp_team *team = thr->ts.team;
2913  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
2914  if (__builtin_expect (gomp_cancel_var, 0) && team)
2915    {
2916      if (gomp_team_barrier_cancelled (&team->barrier))
2917	return;
2918      if (thr->task->taskgroup)
2919	{
2920	  if (thr->task->taskgroup->cancelled)
2921	    return;
2922	  if (thr->task->taskgroup->workshare
2923	      && thr->task->taskgroup->prev
2924	      && thr->task->taskgroup->prev->cancelled)
2925	    return;
2926	}
2927    }
2928
2929  gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, true);
2930}
2931
2932static void
2933gomp_exit_data (struct gomp_device_descr *devicep, size_t mapnum,
2934		void **hostaddrs, size_t *sizes, unsigned short *kinds,
2935		htab_t *refcount_set)
2936{
2937  const int typemask = 0xff;
2938  size_t i;
2939  gomp_mutex_lock (&devicep->lock);
2940  if (devicep->state == GOMP_DEVICE_FINALIZED)
2941    {
2942      gomp_mutex_unlock (&devicep->lock);
2943      return;
2944    }
2945
2946  for (i = 0; i < mapnum; i++)
2947    if ((kinds[i] & typemask) == GOMP_MAP_DETACH)
2948      {
2949	struct splay_tree_key_s cur_node;
2950	cur_node.host_start = (uintptr_t) hostaddrs[i];
2951	cur_node.host_end = cur_node.host_start + sizeof (void *);
2952	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
2953
2954	if (n)
2955	  gomp_detach_pointer (devicep, NULL, n, (uintptr_t) hostaddrs[i],
2956			       false, NULL);
2957      }
2958
2959  int nrmvars = 0;
2960  splay_tree_key remove_vars[mapnum];
2961
2962  for (i = 0; i < mapnum; i++)
2963    {
2964      struct splay_tree_key_s cur_node;
2965      unsigned char kind = kinds[i] & typemask;
2966      switch (kind)
2967	{
2968	case GOMP_MAP_FROM:
2969	case GOMP_MAP_ALWAYS_FROM:
2970	case GOMP_MAP_DELETE:
2971	case GOMP_MAP_RELEASE:
2972	case GOMP_MAP_ZERO_LEN_ARRAY_SECTION:
2973	case GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION:
2974	  cur_node.host_start = (uintptr_t) hostaddrs[i];
2975	  cur_node.host_end = cur_node.host_start + sizes[i];
2976	  splay_tree_key k = (kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION
2977			      || kind == GOMP_MAP_ZERO_LEN_ARRAY_SECTION)
2978	    ? gomp_map_0len_lookup (&devicep->mem_map, &cur_node)
2979	    : splay_tree_lookup (&devicep->mem_map, &cur_node);
2980	  if (!k)
2981	    continue;
2982
2983	  bool delete_p = (kind == GOMP_MAP_DELETE
2984			   || kind == GOMP_MAP_DELETE_ZERO_LEN_ARRAY_SECTION);
2985	  bool do_copy, do_remove;
2986	  gomp_decrement_refcount (k, refcount_set, delete_p, &do_copy,
2987				   &do_remove);
2988
2989	  if ((kind == GOMP_MAP_FROM && do_copy)
2990	      || kind == GOMP_MAP_ALWAYS_FROM)
2991	    {
2992	      if (k->aux && k->aux->attach_count)
2993		{
2994		  /* We have to be careful not to overwrite still attached
2995		     pointers during the copyback to host.  */
2996		  uintptr_t addr = k->host_start;
2997		  while (addr < k->host_end)
2998		    {
2999		      size_t i = (addr - k->host_start) / sizeof (void *);
3000		      if (k->aux->attach_count[i] == 0)
3001			gomp_copy_dev2host (devicep, NULL, (void *) addr,
3002					    (void *) (k->tgt->tgt_start
3003						      + k->tgt_offset
3004						      + addr - k->host_start),
3005					    sizeof (void *));
3006		      addr += sizeof (void *);
3007		    }
3008		}
3009	      else
3010		gomp_copy_dev2host (devicep, NULL, (void *) cur_node.host_start,
3011				    (void *) (k->tgt->tgt_start + k->tgt_offset
3012					      + cur_node.host_start
3013					      - k->host_start),
3014				    cur_node.host_end - cur_node.host_start);
3015	    }
3016
3017	  /* Structure elements lists are removed altogether at once, which
3018	     may cause immediate deallocation of the target_mem_desc, causing
3019	     errors if we still have following element siblings to copy back.
3020	     While we're at it, it also seems more disciplined to simply
3021	     queue all removals together for processing below.
3022
3023	     Structured block unmapping (i.e. gomp_unmap_vars_internal) should
3024	     not have this problem, since they maintain an additional
3025	     tgt->refcount = 1 reference to the target_mem_desc to start with.
3026	  */
3027	  if (do_remove)
3028	    remove_vars[nrmvars++] = k;
3029	  break;
3030
3031	case GOMP_MAP_DETACH:
3032	  break;
3033	default:
3034	  gomp_mutex_unlock (&devicep->lock);
3035	  gomp_fatal ("GOMP_target_enter_exit_data unhandled kind 0x%.2x",
3036		      kind);
3037	}
3038    }
3039
3040  for (int i = 0; i < nrmvars; i++)
3041    gomp_remove_var (devicep, remove_vars[i]);
3042
3043  gomp_mutex_unlock (&devicep->lock);
3044}
3045
3046void
3047GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs,
3048			     size_t *sizes, unsigned short *kinds,
3049			     unsigned int flags, void **depend)
3050{
3051  struct gomp_device_descr *devicep = resolve_device (device);
3052
3053  /* If there are depend clauses, but nowait is not present,
3054     block the parent task until the dependencies are resolved
3055     and then just continue with the rest of the function as if it
3056     is a merged task.  Until we are able to schedule task during
3057     variable mapping or unmapping, ignore nowait if depend clauses
3058     are not present.  */
3059  if (depend != NULL)
3060    {
3061      struct gomp_thread *thr = gomp_thread ();
3062      if (thr->task && thr->task->depend_hash)
3063	{
3064	  if ((flags & GOMP_TARGET_FLAG_NOWAIT)
3065	      && thr->ts.team
3066	      && !thr->task->final_task)
3067	    {
3068	      if (gomp_create_target_task (devicep, (void (*) (void *)) NULL,
3069					   mapnum, hostaddrs, sizes, kinds,
3070					   flags, depend, NULL,
3071					   GOMP_TARGET_TASK_DATA))
3072		return;
3073	    }
3074	  else
3075	    {
3076	      struct gomp_team *team = thr->ts.team;
3077	      /* If parallel or taskgroup has been cancelled, don't start new
3078		 tasks.  */
3079	      if (__builtin_expect (gomp_cancel_var, 0) && team)
3080		{
3081		  if (gomp_team_barrier_cancelled (&team->barrier))
3082		    return;
3083		  if (thr->task->taskgroup)
3084		    {
3085		      if (thr->task->taskgroup->cancelled)
3086			return;
3087		      if (thr->task->taskgroup->workshare
3088			  && thr->task->taskgroup->prev
3089			  && thr->task->taskgroup->prev->cancelled)
3090			return;
3091		    }
3092		}
3093
3094	      gomp_task_maybe_wait_for_dependencies (depend);
3095	    }
3096	}
3097    }
3098
3099  if (devicep == NULL
3100      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3101      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3102    return;
3103
3104  struct gomp_thread *thr = gomp_thread ();
3105  struct gomp_team *team = thr->ts.team;
3106  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
3107  if (__builtin_expect (gomp_cancel_var, 0) && team)
3108    {
3109      if (gomp_team_barrier_cancelled (&team->barrier))
3110	return;
3111      if (thr->task->taskgroup)
3112	{
3113	  if (thr->task->taskgroup->cancelled)
3114	    return;
3115	  if (thr->task->taskgroup->workshare
3116	      && thr->task->taskgroup->prev
3117	      && thr->task->taskgroup->prev->cancelled)
3118	    return;
3119	}
3120    }
3121
3122  htab_t refcount_set = htab_create (mapnum);
3123
3124  /* The variables are mapped separately such that they can be released
3125     independently.  */
3126  size_t i, j;
3127  if ((flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3128    for (i = 0; i < mapnum; i++)
3129      if ((kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3130	{
3131	  gomp_map_vars (devicep, sizes[i] + 1, &hostaddrs[i], NULL, &sizes[i],
3132			 &kinds[i], true, &refcount_set,
3133			 GOMP_MAP_VARS_ENTER_DATA);
3134	  i += sizes[i];
3135	}
3136      else if ((kinds[i] & 0xff) == GOMP_MAP_TO_PSET)
3137	{
3138	  for (j = i + 1; j < mapnum; j++)
3139	    if (!GOMP_MAP_POINTER_P (get_kind (true, kinds, j) & 0xff)
3140		&& !GOMP_MAP_ALWAYS_POINTER_P (get_kind (true, kinds, j) & 0xff))
3141	      break;
3142	  gomp_map_vars (devicep, j-i, &hostaddrs[i], NULL, &sizes[i],
3143			 &kinds[i], true, &refcount_set,
3144			 GOMP_MAP_VARS_ENTER_DATA);
3145	  i += j - i - 1;
3146	}
3147      else if (i + 1 < mapnum && (kinds[i + 1] & 0xff) == GOMP_MAP_ATTACH)
3148	{
3149	  /* An attach operation must be processed together with the mapped
3150	     base-pointer list item.  */
3151	  gomp_map_vars (devicep, 2, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3152			 true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3153	  i += 1;
3154	}
3155      else
3156	gomp_map_vars (devicep, 1, &hostaddrs[i], NULL, &sizes[i], &kinds[i],
3157		       true, &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3158  else
3159    gomp_exit_data (devicep, mapnum, hostaddrs, sizes, kinds, &refcount_set);
3160  htab_free (refcount_set);
3161}
3162
3163bool
3164gomp_target_task_fn (void *data)
3165{
3166  struct gomp_target_task *ttask = (struct gomp_target_task *) data;
3167  struct gomp_device_descr *devicep = ttask->devicep;
3168
3169  if (ttask->fn != NULL)
3170    {
3171      void *fn_addr;
3172      if (devicep == NULL
3173	  || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3174	  || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))
3175	  || (devicep->can_run_func && !devicep->can_run_func (fn_addr)))
3176	{
3177	  ttask->state = GOMP_TARGET_TASK_FALLBACK;
3178	  gomp_target_fallback (ttask->fn, ttask->hostaddrs, devicep,
3179				ttask->args);
3180	  return false;
3181	}
3182
3183      if (ttask->state == GOMP_TARGET_TASK_FINISHED)
3184	{
3185	  if (ttask->tgt)
3186	    gomp_unmap_vars (ttask->tgt, true, NULL);
3187	  return false;
3188	}
3189
3190      void *actual_arguments;
3191      if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3192	{
3193	  ttask->tgt = NULL;
3194	  actual_arguments = ttask->hostaddrs;
3195	}
3196      else
3197	{
3198	  ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs,
3199				      NULL, ttask->sizes, ttask->kinds, true,
3200				      NULL, GOMP_MAP_VARS_TARGET);
3201	  actual_arguments = (void *) ttask->tgt->tgt_start;
3202	}
3203      ttask->state = GOMP_TARGET_TASK_READY_TO_RUN;
3204
3205      assert (devicep->async_run_func);
3206      devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments,
3207			       ttask->args, (void *) ttask);
3208      return true;
3209    }
3210  else if (devicep == NULL
3211	   || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3212	   || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3213    return false;
3214
3215  size_t i;
3216  if (ttask->flags & GOMP_TARGET_FLAG_UPDATE)
3217    gomp_update (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3218		 ttask->kinds, true);
3219  else
3220    {
3221      htab_t refcount_set = htab_create (ttask->mapnum);
3222      if ((ttask->flags & GOMP_TARGET_FLAG_EXIT_DATA) == 0)
3223	for (i = 0; i < ttask->mapnum; i++)
3224	  if ((ttask->kinds[i] & 0xff) == GOMP_MAP_STRUCT)
3225	    {
3226	      gomp_map_vars (devicep, ttask->sizes[i] + 1, &ttask->hostaddrs[i],
3227			     NULL, &ttask->sizes[i], &ttask->kinds[i], true,
3228			     &refcount_set, GOMP_MAP_VARS_ENTER_DATA);
3229	      i += ttask->sizes[i];
3230	    }
3231	  else
3232	    gomp_map_vars (devicep, 1, &ttask->hostaddrs[i], NULL, &ttask->sizes[i],
3233			   &ttask->kinds[i], true, &refcount_set,
3234			   GOMP_MAP_VARS_ENTER_DATA);
3235      else
3236	gomp_exit_data (devicep, ttask->mapnum, ttask->hostaddrs, ttask->sizes,
3237			ttask->kinds, &refcount_set);
3238      htab_free (refcount_set);
3239    }
3240  return false;
3241}
3242
3243void
3244GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
3245{
3246  if (thread_limit)
3247    {
3248      struct gomp_task_icv *icv = gomp_icv (true);
3249      icv->thread_limit_var
3250	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3251    }
3252  (void) num_teams;
3253}
3254
3255bool
3256GOMP_teams4 (unsigned int num_teams_low, unsigned int num_teams_high,
3257	     unsigned int thread_limit, bool first)
3258{
3259  struct gomp_thread *thr = gomp_thread ();
3260  if (first)
3261    {
3262      if (thread_limit)
3263	{
3264	  struct gomp_task_icv *icv = gomp_icv (true);
3265	  icv->thread_limit_var
3266	    = thread_limit > INT_MAX ? UINT_MAX : thread_limit;
3267	}
3268      (void) num_teams_high;
3269      if (num_teams_low == 0)
3270	num_teams_low = 1;
3271      thr->num_teams = num_teams_low - 1;
3272      thr->team_num = 0;
3273    }
3274  else if (thr->team_num == thr->num_teams)
3275    return false;
3276  else
3277    ++thr->team_num;
3278  return true;
3279}
3280
3281void *
3282omp_target_alloc (size_t size, int device_num)
3283{
3284  if (device_num == gomp_get_num_devices ())
3285    return malloc (size);
3286
3287  if (device_num < 0)
3288    return NULL;
3289
3290  struct gomp_device_descr *devicep = resolve_device (device_num);
3291  if (devicep == NULL)
3292    return NULL;
3293
3294  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3295      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3296    return malloc (size);
3297
3298  gomp_mutex_lock (&devicep->lock);
3299  void *ret = devicep->alloc_func (devicep->target_id, size);
3300  gomp_mutex_unlock (&devicep->lock);
3301  return ret;
3302}
3303
3304void
3305omp_target_free (void *device_ptr, int device_num)
3306{
3307  if (device_ptr == NULL)
3308    return;
3309
3310  if (device_num == gomp_get_num_devices ())
3311    {
3312      free (device_ptr);
3313      return;
3314    }
3315
3316  if (device_num < 0)
3317    return;
3318
3319  struct gomp_device_descr *devicep = resolve_device (device_num);
3320  if (devicep == NULL)
3321    return;
3322
3323  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3324      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3325    {
3326      free (device_ptr);
3327      return;
3328    }
3329
3330  gomp_mutex_lock (&devicep->lock);
3331  gomp_free_device_memory (devicep, device_ptr);
3332  gomp_mutex_unlock (&devicep->lock);
3333}
3334
3335int
3336omp_target_is_present (const void *ptr, int device_num)
3337{
3338  if (ptr == NULL)
3339    return 1;
3340
3341  if (device_num == gomp_get_num_devices ())
3342    return 1;
3343
3344  if (device_num < 0)
3345    return 0;
3346
3347  struct gomp_device_descr *devicep = resolve_device (device_num);
3348  if (devicep == NULL)
3349    return 0;
3350
3351  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3352      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3353    return 1;
3354
3355  gomp_mutex_lock (&devicep->lock);
3356  struct splay_tree_s *mem_map = &devicep->mem_map;
3357  struct splay_tree_key_s cur_node;
3358
3359  cur_node.host_start = (uintptr_t) ptr;
3360  cur_node.host_end = cur_node.host_start;
3361  splay_tree_key n = gomp_map_0len_lookup (mem_map, &cur_node);
3362  int ret = n != NULL;
3363  gomp_mutex_unlock (&devicep->lock);
3364  return ret;
3365}
3366
3367int
3368omp_target_memcpy (void *dst, const void *src, size_t length,
3369		   size_t dst_offset, size_t src_offset, int dst_device_num,
3370		   int src_device_num)
3371{
3372  struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3373  bool ret;
3374
3375  if (dst_device_num != gomp_get_num_devices ())
3376    {
3377      if (dst_device_num < 0)
3378	return EINVAL;
3379
3380      dst_devicep = resolve_device (dst_device_num);
3381      if (dst_devicep == NULL)
3382	return EINVAL;
3383
3384      if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3385	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3386	dst_devicep = NULL;
3387    }
3388  if (src_device_num != num_devices_openmp)
3389    {
3390      if (src_device_num < 0)
3391	return EINVAL;
3392
3393      src_devicep = resolve_device (src_device_num);
3394      if (src_devicep == NULL)
3395	return EINVAL;
3396
3397      if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3398	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3399	src_devicep = NULL;
3400    }
3401  if (src_devicep == NULL && dst_devicep == NULL)
3402    {
3403      memcpy ((char *) dst + dst_offset, (char *) src + src_offset, length);
3404      return 0;
3405    }
3406  if (src_devicep == NULL)
3407    {
3408      gomp_mutex_lock (&dst_devicep->lock);
3409      ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3410					(char *) dst + dst_offset,
3411					(char *) src + src_offset, length);
3412      gomp_mutex_unlock (&dst_devicep->lock);
3413      return (ret ? 0 : EINVAL);
3414    }
3415  if (dst_devicep == NULL)
3416    {
3417      gomp_mutex_lock (&src_devicep->lock);
3418      ret = src_devicep->dev2host_func (src_devicep->target_id,
3419					(char *) dst + dst_offset,
3420					(char *) src + src_offset, length);
3421      gomp_mutex_unlock (&src_devicep->lock);
3422      return (ret ? 0 : EINVAL);
3423    }
3424  if (src_devicep == dst_devicep)
3425    {
3426      gomp_mutex_lock (&src_devicep->lock);
3427      ret = src_devicep->dev2dev_func (src_devicep->target_id,
3428				       (char *) dst + dst_offset,
3429				       (char *) src + src_offset, length);
3430      gomp_mutex_unlock (&src_devicep->lock);
3431      return (ret ? 0 : EINVAL);
3432    }
3433  return EINVAL;
3434}
3435
3436static int
3437omp_target_memcpy_rect_worker (void *dst, const void *src, size_t element_size,
3438			       int num_dims, const size_t *volume,
3439			       const size_t *dst_offsets,
3440			       const size_t *src_offsets,
3441			       const size_t *dst_dimensions,
3442			       const size_t *src_dimensions,
3443			       struct gomp_device_descr *dst_devicep,
3444			       struct gomp_device_descr *src_devicep)
3445{
3446  size_t dst_slice = element_size;
3447  size_t src_slice = element_size;
3448  size_t j, dst_off, src_off, length;
3449  int i, ret;
3450
3451  if (num_dims == 1)
3452    {
3453      if (__builtin_mul_overflow (element_size, volume[0], &length)
3454	  || __builtin_mul_overflow (element_size, dst_offsets[0], &dst_off)
3455	  || __builtin_mul_overflow (element_size, src_offsets[0], &src_off))
3456	return EINVAL;
3457      if (dst_devicep == NULL && src_devicep == NULL)
3458	{
3459	  memcpy ((char *) dst + dst_off, (const char *) src + src_off,
3460		  length);
3461	  ret = 1;
3462	}
3463      else if (src_devicep == NULL)
3464	ret = dst_devicep->host2dev_func (dst_devicep->target_id,
3465					  (char *) dst + dst_off,
3466					  (const char *) src + src_off,
3467					  length);
3468      else if (dst_devicep == NULL)
3469	ret = src_devicep->dev2host_func (src_devicep->target_id,
3470					  (char *) dst + dst_off,
3471					  (const char *) src + src_off,
3472					  length);
3473      else if (src_devicep == dst_devicep)
3474	ret = src_devicep->dev2dev_func (src_devicep->target_id,
3475					 (char *) dst + dst_off,
3476					 (const char *) src + src_off,
3477					 length);
3478      else
3479	ret = 0;
3480      return ret ? 0 : EINVAL;
3481    }
3482
3483  /* FIXME: it would be nice to have some plugin function to handle
3484     num_dims == 2 and num_dims == 3 more efficiently.  Larger ones can
3485     be handled in the generic recursion below, and for host-host it
3486     should be used even for any num_dims >= 2.  */
3487
3488  for (i = 1; i < num_dims; i++)
3489    if (__builtin_mul_overflow (dst_slice, dst_dimensions[i], &dst_slice)
3490	|| __builtin_mul_overflow (src_slice, src_dimensions[i], &src_slice))
3491      return EINVAL;
3492  if (__builtin_mul_overflow (dst_slice, dst_offsets[0], &dst_off)
3493      || __builtin_mul_overflow (src_slice, src_offsets[0], &src_off))
3494    return EINVAL;
3495  for (j = 0; j < volume[0]; j++)
3496    {
3497      ret = omp_target_memcpy_rect_worker ((char *) dst + dst_off,
3498					   (const char *) src + src_off,
3499					   element_size, num_dims - 1,
3500					   volume + 1, dst_offsets + 1,
3501					   src_offsets + 1, dst_dimensions + 1,
3502					   src_dimensions + 1, dst_devicep,
3503					   src_devicep);
3504      if (ret)
3505	return ret;
3506      dst_off += dst_slice;
3507      src_off += src_slice;
3508    }
3509  return 0;
3510}
3511
3512int
3513omp_target_memcpy_rect (void *dst, const void *src, size_t element_size,
3514			int num_dims, const size_t *volume,
3515			const size_t *dst_offsets,
3516			const size_t *src_offsets,
3517			const size_t *dst_dimensions,
3518			const size_t *src_dimensions,
3519			int dst_device_num, int src_device_num)
3520{
3521  struct gomp_device_descr *dst_devicep = NULL, *src_devicep = NULL;
3522
3523  if (!dst && !src)
3524    return INT_MAX;
3525
3526  if (dst_device_num != gomp_get_num_devices ())
3527    {
3528      if (dst_device_num < 0)
3529	return EINVAL;
3530
3531      dst_devicep = resolve_device (dst_device_num);
3532      if (dst_devicep == NULL)
3533	return EINVAL;
3534
3535      if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3536	  || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3537	dst_devicep = NULL;
3538    }
3539  if (src_device_num != num_devices_openmp)
3540    {
3541      if (src_device_num < 0)
3542	return EINVAL;
3543
3544      src_devicep = resolve_device (src_device_num);
3545      if (src_devicep == NULL)
3546	return EINVAL;
3547
3548      if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3549	  || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3550	src_devicep = NULL;
3551    }
3552
3553  if (src_devicep != NULL && dst_devicep != NULL && src_devicep != dst_devicep)
3554    return EINVAL;
3555
3556  if (src_devicep)
3557    gomp_mutex_lock (&src_devicep->lock);
3558  else if (dst_devicep)
3559    gomp_mutex_lock (&dst_devicep->lock);
3560  int ret = omp_target_memcpy_rect_worker (dst, src, element_size, num_dims,
3561					   volume, dst_offsets, src_offsets,
3562					   dst_dimensions, src_dimensions,
3563					   dst_devicep, src_devicep);
3564  if (src_devicep)
3565    gomp_mutex_unlock (&src_devicep->lock);
3566  else if (dst_devicep)
3567    gomp_mutex_unlock (&dst_devicep->lock);
3568  return ret;
3569}
3570
3571int
3572omp_target_associate_ptr (const void *host_ptr, const void *device_ptr,
3573			  size_t size, size_t device_offset, int device_num)
3574{
3575  if (device_num == gomp_get_num_devices ())
3576    return EINVAL;
3577
3578  if (device_num < 0)
3579    return EINVAL;
3580
3581  struct gomp_device_descr *devicep = resolve_device (device_num);
3582  if (devicep == NULL)
3583    return EINVAL;
3584
3585  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3586      || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
3587    return EINVAL;
3588
3589  gomp_mutex_lock (&devicep->lock);
3590
3591  struct splay_tree_s *mem_map = &devicep->mem_map;
3592  struct splay_tree_key_s cur_node;
3593  int ret = EINVAL;
3594
3595  cur_node.host_start = (uintptr_t) host_ptr;
3596  cur_node.host_end = cur_node.host_start + size;
3597  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3598  if (n)
3599    {
3600      if (n->tgt->tgt_start + n->tgt_offset
3601	  == (uintptr_t) device_ptr + device_offset
3602	  && n->host_start <= cur_node.host_start
3603	  && n->host_end >= cur_node.host_end)
3604	ret = 0;
3605    }
3606  else
3607    {
3608      struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
3609      tgt->array = gomp_malloc (sizeof (*tgt->array));
3610      tgt->refcount = 1;
3611      tgt->tgt_start = 0;
3612      tgt->tgt_end = 0;
3613      tgt->to_free = NULL;
3614      tgt->prev = NULL;
3615      tgt->list_count = 0;
3616      tgt->device_descr = devicep;
3617      splay_tree_node array = tgt->array;
3618      splay_tree_key k = &array->key;
3619      k->host_start = cur_node.host_start;
3620      k->host_end = cur_node.host_end;
3621      k->tgt = tgt;
3622      k->tgt_offset = (uintptr_t) device_ptr + device_offset;
3623      k->refcount = REFCOUNT_INFINITY;
3624      k->dynamic_refcount = 0;
3625      k->aux = NULL;
3626      array->left = NULL;
3627      array->right = NULL;
3628      splay_tree_insert (&devicep->mem_map, array);
3629      ret = 0;
3630    }
3631  gomp_mutex_unlock (&devicep->lock);
3632  return ret;
3633}
3634
3635int
3636omp_target_disassociate_ptr (const void *ptr, int device_num)
3637{
3638  if (device_num == gomp_get_num_devices ())
3639    return EINVAL;
3640
3641  if (device_num < 0)
3642    return EINVAL;
3643
3644  struct gomp_device_descr *devicep = resolve_device (device_num);
3645  if (devicep == NULL)
3646    return EINVAL;
3647
3648  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3649    return EINVAL;
3650
3651  gomp_mutex_lock (&devicep->lock);
3652
3653  struct splay_tree_s *mem_map = &devicep->mem_map;
3654  struct splay_tree_key_s cur_node;
3655  int ret = EINVAL;
3656
3657  cur_node.host_start = (uintptr_t) ptr;
3658  cur_node.host_end = cur_node.host_start;
3659  splay_tree_key n = gomp_map_lookup (mem_map, &cur_node);
3660  if (n
3661      && n->host_start == cur_node.host_start
3662      && n->refcount == REFCOUNT_INFINITY
3663      && n->tgt->tgt_start == 0
3664      && n->tgt->to_free == NULL
3665      && n->tgt->refcount == 1
3666      && n->tgt->list_count == 0)
3667    {
3668      splay_tree_remove (&devicep->mem_map, n);
3669      gomp_unmap_tgt (n->tgt);
3670      ret = 0;
3671    }
3672
3673  gomp_mutex_unlock (&devicep->lock);
3674  return ret;
3675}
3676
3677int
3678omp_pause_resource (omp_pause_resource_t kind, int device_num)
3679{
3680  (void) kind;
3681  if (device_num == gomp_get_num_devices ())
3682    return gomp_pause_host ();
3683  if (device_num < 0 || device_num >= num_devices_openmp)
3684    return -1;
3685  /* Do nothing for target devices for now.  */
3686  return 0;
3687}
3688
3689int
3690omp_pause_resource_all (omp_pause_resource_t kind)
3691{
3692  (void) kind;
3693  if (gomp_pause_host ())
3694    return -1;
3695  /* Do nothing for target devices for now.  */
3696  return 0;
3697}
3698
3699ialias (omp_pause_resource)
3700ialias (omp_pause_resource_all)
3701
3702#ifdef PLUGIN_SUPPORT
3703
3704/* This function tries to load a plugin for DEVICE.  Name of plugin is passed
3705   in PLUGIN_NAME.
3706   The handles of the found functions are stored in the corresponding fields
3707   of DEVICE.  The function returns TRUE on success and FALSE otherwise.  */
3708
3709static bool
3710gomp_load_plugin_for_device (struct gomp_device_descr *device,
3711			     const char *plugin_name)
3712{
3713  const char *err = NULL, *last_missing = NULL;
3714
3715  void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
3716  if (!plugin_handle)
3717#if OFFLOAD_DEFAULTED
3718    return 0;
3719#else
3720    goto dl_fail;
3721#endif
3722
3723  /* Check if all required functions are available in the plugin and store
3724     their handlers.  None of the symbols can legitimately be NULL,
3725     so we don't need to check dlerror all the time.  */
3726#define DLSYM(f)							\
3727  if (!(device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f)))	\
3728    goto dl_fail
3729  /* Similar, but missing functions are not an error.  Return false if
3730     failed, true otherwise.  */
3731#define DLSYM_OPT(f, n)							\
3732  ((device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n))	\
3733   || (last_missing = #n, 0))
3734
3735  DLSYM (version);
3736  if (device->version_func () != GOMP_VERSION)
3737    {
3738      err = "plugin version mismatch";
3739      goto fail;
3740    }
3741
3742  DLSYM (get_name);
3743  DLSYM (get_caps);
3744  DLSYM (get_type);
3745  DLSYM (get_num_devices);
3746  DLSYM (init_device);
3747  DLSYM (fini_device);
3748  DLSYM (load_image);
3749  DLSYM (unload_image);
3750  DLSYM (alloc);
3751  DLSYM (free);
3752  DLSYM (dev2host);
3753  DLSYM (host2dev);
3754  device->capabilities = device->get_caps_func ();
3755  if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3756    {
3757      DLSYM (run);
3758      DLSYM_OPT (async_run, async_run);
3759      DLSYM_OPT (can_run, can_run);
3760      DLSYM (dev2dev);
3761    }
3762  if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3763    {
3764      if (!DLSYM_OPT (openacc.exec, openacc_exec)
3765	  || !DLSYM_OPT (openacc.create_thread_data,
3766			 openacc_create_thread_data)
3767	  || !DLSYM_OPT (openacc.destroy_thread_data,
3768			 openacc_destroy_thread_data)
3769	  || !DLSYM_OPT (openacc.async.construct, openacc_async_construct)
3770	  || !DLSYM_OPT (openacc.async.destruct, openacc_async_destruct)
3771	  || !DLSYM_OPT (openacc.async.test, openacc_async_test)
3772	  || !DLSYM_OPT (openacc.async.synchronize, openacc_async_synchronize)
3773	  || !DLSYM_OPT (openacc.async.serialize, openacc_async_serialize)
3774	  || !DLSYM_OPT (openacc.async.queue_callback,
3775			 openacc_async_queue_callback)
3776	  || !DLSYM_OPT (openacc.async.exec, openacc_async_exec)
3777	  || !DLSYM_OPT (openacc.async.dev2host, openacc_async_dev2host)
3778	  || !DLSYM_OPT (openacc.async.host2dev, openacc_async_host2dev)
3779	  || !DLSYM_OPT (openacc.get_property, openacc_get_property))
3780	{
3781	  /* Require all the OpenACC handlers if we have
3782	     GOMP_OFFLOAD_CAP_OPENACC_200.  */
3783	  err = "plugin missing OpenACC handler function";
3784	  goto fail;
3785	}
3786
3787      unsigned cuda = 0;
3788      cuda += DLSYM_OPT (openacc.cuda.get_current_device,
3789			 openacc_cuda_get_current_device);
3790      cuda += DLSYM_OPT (openacc.cuda.get_current_context,
3791			 openacc_cuda_get_current_context);
3792      cuda += DLSYM_OPT (openacc.cuda.get_stream, openacc_cuda_get_stream);
3793      cuda += DLSYM_OPT (openacc.cuda.set_stream, openacc_cuda_set_stream);
3794      if (cuda && cuda != 4)
3795	{
3796	  /* Make sure all the CUDA functions are there if any of them are.  */
3797	  err = "plugin missing OpenACC CUDA handler function";
3798	  goto fail;
3799	}
3800    }
3801#undef DLSYM
3802#undef DLSYM_OPT
3803
3804  return 1;
3805
3806 dl_fail:
3807  err = dlerror ();
3808 fail:
3809  gomp_error ("while loading %s: %s", plugin_name, err);
3810  if (last_missing)
3811    gomp_error ("missing function was %s", last_missing);
3812  if (plugin_handle)
3813    dlclose (plugin_handle);
3814
3815  return 0;
3816}
3817
3818/* This function finalizes all initialized devices.  */
3819
3820static void
3821gomp_target_fini (void)
3822{
3823  int i;
3824  for (i = 0; i < num_devices; i++)
3825    {
3826      bool ret = true;
3827      struct gomp_device_descr *devicep = &devices[i];
3828      gomp_mutex_lock (&devicep->lock);
3829      if (devicep->state == GOMP_DEVICE_INITIALIZED)
3830	ret = gomp_fini_device (devicep);
3831      gomp_mutex_unlock (&devicep->lock);
3832      if (!ret)
3833	gomp_fatal ("device finalization failed");
3834    }
3835}
3836
3837/* This function initializes the runtime for offloading.
3838   It parses the list of offload plugins, and tries to load these.
3839   On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
3840   will be set, and the array DEVICES initialized, containing descriptors for
3841   corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
3842   by the others.  */
3843
3844static void
3845gomp_target_init (void)
3846{
3847  const char *prefix ="libgomp-plugin-";
3848  const char *suffix = SONAME_SUFFIX (1);
3849  const char *cur, *next;
3850  char *plugin_name;
3851  int i, new_num_devs;
3852  int num_devs = 0, num_devs_openmp;
3853  struct gomp_device_descr *devs = NULL;
3854
3855  if (gomp_target_offload_var == GOMP_TARGET_OFFLOAD_DISABLED)
3856    return;
3857
3858  cur = OFFLOAD_PLUGINS;
3859  if (*cur)
3860    do
3861      {
3862	struct gomp_device_descr current_device;
3863	size_t prefix_len, suffix_len, cur_len;
3864
3865	next = strchr (cur, ',');
3866
3867	prefix_len = strlen (prefix);
3868	cur_len = next ? next - cur : strlen (cur);
3869	suffix_len = strlen (suffix);
3870
3871	plugin_name = (char *) malloc (prefix_len + cur_len + suffix_len + 1);
3872	if (!plugin_name)
3873	  {
3874	    num_devs = 0;
3875	    break;
3876	  }
3877
3878	memcpy (plugin_name, prefix, prefix_len);
3879	memcpy (plugin_name + prefix_len, cur, cur_len);
3880	memcpy (plugin_name + prefix_len + cur_len, suffix, suffix_len + 1);
3881
3882	if (gomp_load_plugin_for_device (&current_device, plugin_name))
3883	  {
3884	    new_num_devs = current_device.get_num_devices_func ();
3885	    if (new_num_devs >= 1)
3886	      {
3887		/* Augment DEVICES and NUM_DEVICES.  */
3888
3889		devs = realloc (devs, (num_devs + new_num_devs)
3890				      * sizeof (struct gomp_device_descr));
3891		if (!devs)
3892		  {
3893		    num_devs = 0;
3894		    free (plugin_name);
3895		    break;
3896		  }
3897
3898		current_device.name = current_device.get_name_func ();
3899		/* current_device.capabilities has already been set.  */
3900		current_device.type = current_device.get_type_func ();
3901		current_device.mem_map.root = NULL;
3902		current_device.state = GOMP_DEVICE_UNINITIALIZED;
3903		for (i = 0; i < new_num_devs; i++)
3904		  {
3905		    current_device.target_id = i;
3906		    devs[num_devs] = current_device;
3907		    gomp_mutex_init (&devs[num_devs].lock);
3908		    num_devs++;
3909		  }
3910	      }
3911	  }
3912
3913	free (plugin_name);
3914	cur = next + 1;
3915      }
3916    while (next);
3917
3918  /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
3919     NUM_DEVICES_OPENMP.  */
3920  struct gomp_device_descr *devs_s
3921    = malloc (num_devs * sizeof (struct gomp_device_descr));
3922  if (!devs_s)
3923    {
3924      num_devs = 0;
3925      free (devs);
3926      devs = NULL;
3927    }
3928  num_devs_openmp = 0;
3929  for (i = 0; i < num_devs; i++)
3930    if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
3931      devs_s[num_devs_openmp++] = devs[i];
3932  int num_devs_after_openmp = num_devs_openmp;
3933  for (i = 0; i < num_devs; i++)
3934    if (!(devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
3935      devs_s[num_devs_after_openmp++] = devs[i];
3936  free (devs);
3937  devs = devs_s;
3938
3939  for (i = 0; i < num_devs; i++)
3940    {
3941      /* The 'devices' array can be moved (by the realloc call) until we have
3942	 found all the plugins, so registering with the OpenACC runtime (which
3943	 takes a copy of the pointer argument) must be delayed until now.  */
3944      if (devs[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
3945	goacc_register (&devs[i]);
3946    }
3947
3948  num_devices = num_devs;
3949  num_devices_openmp = num_devs_openmp;
3950  devices = devs;
3951  if (atexit (gomp_target_fini) != 0)
3952    gomp_fatal ("atexit failed");
3953}
3954
3955#else /* PLUGIN_SUPPORT */
3956/* If dlfcn.h is unavailable we always fallback to host execution.
3957   GOMP_target* routines are just stubs for this case.  */
3958static void
3959gomp_target_init (void)
3960{
3961}
3962#endif /* PLUGIN_SUPPORT */
3963