1/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2   Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4   This file is part of the GNU Offloading and Multi Processing Library
5   (libgomp).
6
7   Libgomp is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15   more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26/* This file contains the support of offloading.  */
27
28#include "config.h"
29#include "libgomp.h"
30#include "oacc-plugin.h"
31#include "oacc-int.h"
32#include "gomp-constants.h"
33#include <limits.h>
34#include <stdbool.h>
35#include <stdlib.h>
36#ifdef HAVE_INTTYPES_H
37# include <inttypes.h>  /* For PRIu64.  */
38#endif
39#include <string.h>
40#include <assert.h>
41
42#ifdef PLUGIN_SUPPORT
43#include <dlfcn.h>
44#include "plugin-suffix.h"
45#endif
46
47static void gomp_target_init (void);
48
49/* The whole initialization code for offloading plugins is only run one.  */
50static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
51
52/* Mutex for offload image registration.  */
53static gomp_mutex_t register_lock;
54
55/* This structure describes an offload image.
56   It contains type of the target device, pointer to host table descriptor, and
57   pointer to target data.  */
58struct offload_image_descr {
59  enum offload_target_type type;
60  void *host_table;
61  void *target_data;
62};
63
64/* Array of descriptors of offload images.  */
65static struct offload_image_descr *offload_images;
66
67/* Total number of offload images.  */
68static int num_offload_images;
69
70/* Array of descriptors for all available devices.  */
71static struct gomp_device_descr *devices;
72
73/* Total number of available devices.  */
74static int num_devices;
75
76/* Number of GOMP_OFFLOAD_CAP_OPENMP_400 devices.  */
77static int num_devices_openmp;
78
79/* Similar to gomp_realloc, but release register_lock before gomp_fatal.  */
80
81static void *
82gomp_realloc_unlock (void *old, size_t size)
83{
84  void *ret = realloc (old, size);
85  if (ret == NULL)
86    {
87      gomp_mutex_unlock (&register_lock);
88      gomp_fatal ("Out of memory allocating %lu bytes", (unsigned long) size);
89    }
90  return ret;
91}
92
93/* The comparison function.  */
94
95attribute_hidden int
96splay_compare (splay_tree_key x, splay_tree_key y)
97{
98  if (x->host_start == x->host_end
99      && y->host_start == y->host_end)
100    return 0;
101  if (x->host_end <= y->host_start)
102    return -1;
103  if (x->host_start >= y->host_end)
104    return 1;
105  return 0;
106}
107
108#include "splay-tree.h"
109
110attribute_hidden void
111gomp_init_targets_once (void)
112{
113  (void) pthread_once (&gomp_is_initialized, gomp_target_init);
114}
115
116attribute_hidden int
117gomp_get_num_devices (void)
118{
119  gomp_init_targets_once ();
120  return num_devices_openmp;
121}
122
123static struct gomp_device_descr *
124resolve_device (int device_id)
125{
126  if (device_id == GOMP_DEVICE_ICV)
127    {
128      struct gomp_task_icv *icv = gomp_icv (false);
129      device_id = icv->default_device_var;
130    }
131
132  if (device_id < 0 || device_id >= gomp_get_num_devices ())
133    return NULL;
134
135  return &devices[device_id];
136}
137
138
139/* Handle the case where splay_tree_lookup found oldn for newn.
140   Helper function of gomp_map_vars.  */
141
142static inline void
143gomp_map_vars_existing (struct gomp_device_descr *devicep, splay_tree_key oldn,
144			splay_tree_key newn, unsigned char kind)
145{
146  if ((kind & GOMP_MAP_FLAG_FORCE)
147      || oldn->host_start > newn->host_start
148      || oldn->host_end < newn->host_end)
149    {
150      gomp_mutex_unlock (&devicep->lock);
151      gomp_fatal ("Trying to map into device [%p..%p) object when "
152		  "[%p..%p) is already mapped",
153		  (void *) newn->host_start, (void *) newn->host_end,
154		  (void *) oldn->host_start, (void *) oldn->host_end);
155    }
156  oldn->refcount++;
157}
158
159static int
160get_kind (bool is_openacc, void *kinds, int idx)
161{
162  return is_openacc ? ((unsigned short *) kinds)[idx]
163		    : ((unsigned char *) kinds)[idx];
164}
165
166attribute_hidden struct target_mem_desc *
167gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
168	       void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
169	       bool is_openacc, bool is_target)
170{
171  size_t i, tgt_align, tgt_size, not_found_cnt = 0;
172  const int rshift = is_openacc ? 8 : 3;
173  const int typemask = is_openacc ? 0xff : 0x7;
174  struct splay_tree_s *mem_map = &devicep->mem_map;
175  struct splay_tree_key_s cur_node;
176  struct target_mem_desc *tgt
177    = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum);
178  tgt->list_count = mapnum;
179  tgt->refcount = 1;
180  tgt->device_descr = devicep;
181
182  if (mapnum == 0)
183    return tgt;
184
185  tgt_align = sizeof (void *);
186  tgt_size = 0;
187  if (is_target)
188    {
189      size_t align = 4 * sizeof (void *);
190      tgt_align = align;
191      tgt_size = mapnum * sizeof (void *);
192    }
193
194  gomp_mutex_lock (&devicep->lock);
195
196  for (i = 0; i < mapnum; i++)
197    {
198      int kind = get_kind (is_openacc, kinds, i);
199      if (hostaddrs[i] == NULL)
200	{
201	  tgt->list[i] = NULL;
202	  continue;
203	}
204      cur_node.host_start = (uintptr_t) hostaddrs[i];
205      if (!GOMP_MAP_POINTER_P (kind & typemask))
206	cur_node.host_end = cur_node.host_start + sizes[i];
207      else
208	cur_node.host_end = cur_node.host_start + sizeof (void *);
209      splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
210      if (n)
211	{
212	  tgt->list[i] = n;
213	  gomp_map_vars_existing (devicep, n, &cur_node, kind & typemask);
214	}
215      else
216	{
217	  tgt->list[i] = NULL;
218
219	  size_t align = (size_t) 1 << (kind >> rshift);
220	  not_found_cnt++;
221	  if (tgt_align < align)
222	    tgt_align = align;
223	  tgt_size = (tgt_size + align - 1) & ~(align - 1);
224	  tgt_size += cur_node.host_end - cur_node.host_start;
225	  if ((kind & typemask) == GOMP_MAP_TO_PSET)
226	    {
227	      size_t j;
228	      for (j = i + 1; j < mapnum; j++)
229		if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
230					 & typemask))
231		  break;
232		else if ((uintptr_t) hostaddrs[j] < cur_node.host_start
233			 || ((uintptr_t) hostaddrs[j] + sizeof (void *)
234			     > cur_node.host_end))
235		  break;
236		else
237		  {
238		    tgt->list[j] = NULL;
239		    i++;
240		  }
241	    }
242	}
243    }
244
245  if (devaddrs)
246    {
247      if (mapnum != 1)
248	{
249	  gomp_mutex_unlock (&devicep->lock);
250	  gomp_fatal ("unexpected aggregation");
251	}
252      tgt->to_free = devaddrs[0];
253      tgt->tgt_start = (uintptr_t) tgt->to_free;
254      tgt->tgt_end = tgt->tgt_start + sizes[0];
255    }
256  else if (not_found_cnt || is_target)
257    {
258      /* Allocate tgt_align aligned tgt_size block of memory.  */
259      /* FIXME: Perhaps change interface to allocate properly aligned
260	 memory.  */
261      tgt->to_free = devicep->alloc_func (devicep->target_id,
262					  tgt_size + tgt_align - 1);
263      tgt->tgt_start = (uintptr_t) tgt->to_free;
264      tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
265      tgt->tgt_end = tgt->tgt_start + tgt_size;
266    }
267  else
268    {
269      tgt->to_free = NULL;
270      tgt->tgt_start = 0;
271      tgt->tgt_end = 0;
272    }
273
274  tgt_size = 0;
275  if (is_target)
276    tgt_size = mapnum * sizeof (void *);
277
278  tgt->array = NULL;
279  if (not_found_cnt)
280    {
281      tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array));
282      splay_tree_node array = tgt->array;
283      size_t j;
284
285      for (i = 0; i < mapnum; i++)
286	if (tgt->list[i] == NULL)
287	  {
288	    int kind = get_kind (is_openacc, kinds, i);
289	    if (hostaddrs[i] == NULL)
290	      continue;
291	    splay_tree_key k = &array->key;
292	    k->host_start = (uintptr_t) hostaddrs[i];
293	    if (!GOMP_MAP_POINTER_P (kind & typemask))
294	      k->host_end = k->host_start + sizes[i];
295	    else
296	      k->host_end = k->host_start + sizeof (void *);
297	    splay_tree_key n = splay_tree_lookup (mem_map, k);
298	    if (n)
299	      {
300		tgt->list[i] = n;
301		gomp_map_vars_existing (devicep, n, k, kind & typemask);
302	      }
303	    else
304	      {
305		size_t align = (size_t) 1 << (kind >> rshift);
306		tgt->list[i] = k;
307		tgt_size = (tgt_size + align - 1) & ~(align - 1);
308		k->tgt = tgt;
309		k->tgt_offset = tgt_size;
310		tgt_size += k->host_end - k->host_start;
311		k->copy_from = GOMP_MAP_COPY_FROM_P (kind & typemask);
312		k->refcount = 1;
313		k->async_refcount = 0;
314		tgt->refcount++;
315		array->left = NULL;
316		array->right = NULL;
317		splay_tree_insert (mem_map, array);
318		switch (kind & typemask)
319		  {
320		  case GOMP_MAP_ALLOC:
321		  case GOMP_MAP_FROM:
322		  case GOMP_MAP_FORCE_ALLOC:
323		  case GOMP_MAP_FORCE_FROM:
324		    break;
325		  case GOMP_MAP_TO:
326		  case GOMP_MAP_TOFROM:
327		  case GOMP_MAP_FORCE_TO:
328		  case GOMP_MAP_FORCE_TOFROM:
329		    /* FIXME: Perhaps add some smarts, like if copying
330		       several adjacent fields from host to target, use some
331		       host buffer to avoid sending each var individually.  */
332		    devicep->host2dev_func (devicep->target_id,
333					    (void *) (tgt->tgt_start
334						      + k->tgt_offset),
335					    (void *) k->host_start,
336					    k->host_end - k->host_start);
337		    break;
338		  case GOMP_MAP_POINTER:
339		    cur_node.host_start
340		      = (uintptr_t) *(void **) k->host_start;
341		    if (cur_node.host_start == (uintptr_t) NULL)
342		      {
343			cur_node.tgt_offset = (uintptr_t) NULL;
344			/* FIXME: see above FIXME comment.  */
345			devicep->host2dev_func (devicep->target_id,
346						(void *) (tgt->tgt_start
347							  + k->tgt_offset),
348						(void *) &cur_node.tgt_offset,
349						sizeof (void *));
350			break;
351		      }
352		    /* Add bias to the pointer value.  */
353		    cur_node.host_start += sizes[i];
354		    cur_node.host_end = cur_node.host_start + 1;
355		    n = splay_tree_lookup (mem_map, &cur_node);
356		    if (n == NULL)
357		      {
358			/* Could be possibly zero size array section.  */
359			cur_node.host_end--;
360			n = splay_tree_lookup (mem_map, &cur_node);
361			if (n == NULL)
362			  {
363			    cur_node.host_start--;
364			    n = splay_tree_lookup (mem_map, &cur_node);
365			    cur_node.host_start++;
366			  }
367		      }
368		    if (n == NULL)
369		      {
370			gomp_mutex_unlock (&devicep->lock);
371			gomp_fatal ("Pointer target of array section "
372				    "wasn't mapped");
373		      }
374		    cur_node.host_start -= n->host_start;
375		    cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset
376					  + cur_node.host_start;
377		    /* At this point tgt_offset is target address of the
378		       array section.  Now subtract bias to get what we want
379		       to initialize the pointer with.  */
380		    cur_node.tgt_offset -= sizes[i];
381		    /* FIXME: see above FIXME comment.  */
382		    devicep->host2dev_func (devicep->target_id,
383					    (void *) (tgt->tgt_start
384						      + k->tgt_offset),
385					    (void *) &cur_node.tgt_offset,
386					    sizeof (void *));
387		    break;
388		  case GOMP_MAP_TO_PSET:
389		    /* FIXME: see above FIXME comment.  */
390		    devicep->host2dev_func (devicep->target_id,
391					    (void *) (tgt->tgt_start
392						      + k->tgt_offset),
393					    (void *) k->host_start,
394					    k->host_end - k->host_start);
395
396		    for (j = i + 1; j < mapnum; j++)
397		      if (!GOMP_MAP_POINTER_P (get_kind (is_openacc, kinds, j)
398					       & typemask))
399			break;
400		      else if ((uintptr_t) hostaddrs[j] < k->host_start
401			       || ((uintptr_t) hostaddrs[j] + sizeof (void *)
402				   > k->host_end))
403			break;
404		      else
405			{
406			  tgt->list[j] = k;
407			  k->refcount++;
408			  cur_node.host_start
409			    = (uintptr_t) *(void **) hostaddrs[j];
410			  if (cur_node.host_start == (uintptr_t) NULL)
411			    {
412			      cur_node.tgt_offset = (uintptr_t) NULL;
413			      /* FIXME: see above FIXME comment.  */
414			      devicep->host2dev_func (devicep->target_id,
415				 (void *) (tgt->tgt_start + k->tgt_offset
416					   + ((uintptr_t) hostaddrs[j]
417					      - k->host_start)),
418				 (void *) &cur_node.tgt_offset,
419				 sizeof (void *));
420			      i++;
421			      continue;
422			    }
423			  /* Add bias to the pointer value.  */
424			  cur_node.host_start += sizes[j];
425			  cur_node.host_end = cur_node.host_start + 1;
426			  n = splay_tree_lookup (mem_map, &cur_node);
427			  if (n == NULL)
428			    {
429			      /* Could be possibly zero size array section.  */
430			      cur_node.host_end--;
431			      n = splay_tree_lookup (mem_map, &cur_node);
432			      if (n == NULL)
433				{
434				  cur_node.host_start--;
435				  n = splay_tree_lookup (mem_map, &cur_node);
436				  cur_node.host_start++;
437				}
438			    }
439			  if (n == NULL)
440			    {
441			      gomp_mutex_unlock (&devicep->lock);
442			      gomp_fatal ("Pointer target of array section "
443					  "wasn't mapped");
444			    }
445			  cur_node.host_start -= n->host_start;
446			  cur_node.tgt_offset = n->tgt->tgt_start
447						+ n->tgt_offset
448						+ cur_node.host_start;
449			  /* At this point tgt_offset is target address of the
450			     array section.  Now subtract bias to get what we
451			     want to initialize the pointer with.  */
452			  cur_node.tgt_offset -= sizes[j];
453			  /* FIXME: see above FIXME comment.  */
454			  devicep->host2dev_func (devicep->target_id,
455			     (void *) (tgt->tgt_start + k->tgt_offset
456				       + ((uintptr_t) hostaddrs[j]
457					  - k->host_start)),
458			     (void *) &cur_node.tgt_offset,
459			     sizeof (void *));
460			  i++;
461			}
462		    break;
463		  case GOMP_MAP_FORCE_PRESENT:
464		    {
465		      /* We already looked up the memory region above and it
466			 was missing.  */
467		      size_t size = k->host_end - k->host_start;
468		      gomp_mutex_unlock (&devicep->lock);
469#ifdef HAVE_INTTYPES_H
470		      gomp_fatal ("present clause: !acc_is_present (%p, "
471				  "%"PRIu64" (0x%"PRIx64"))",
472				  (void *) k->host_start,
473				  (uint64_t) size, (uint64_t) size);
474#else
475		      gomp_fatal ("present clause: !acc_is_present (%p, "
476				  "%lu (0x%lx))", (void *) k->host_start,
477				  (unsigned long) size, (unsigned long) size);
478#endif
479		    }
480		    break;
481		  case GOMP_MAP_FORCE_DEVICEPTR:
482		    assert (k->host_end - k->host_start == sizeof (void *));
483
484		    devicep->host2dev_func (devicep->target_id,
485					    (void *) (tgt->tgt_start
486						      + k->tgt_offset),
487					    (void *) k->host_start,
488					    sizeof (void *));
489		    break;
490		  default:
491		    gomp_mutex_unlock (&devicep->lock);
492		    gomp_fatal ("%s: unhandled kind 0x%.2x", __FUNCTION__,
493				kind);
494		  }
495		array++;
496	      }
497	  }
498    }
499
500  if (is_target)
501    {
502      for (i = 0; i < mapnum; i++)
503	{
504	  if (tgt->list[i] == NULL)
505	    cur_node.tgt_offset = (uintptr_t) NULL;
506	  else
507	    cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
508				  + tgt->list[i]->tgt_offset;
509	  /* FIXME: see above FIXME comment.  */
510	  devicep->host2dev_func (devicep->target_id,
511				  (void *) (tgt->tgt_start
512					    + i * sizeof (void *)),
513				  (void *) &cur_node.tgt_offset,
514				  sizeof (void *));
515	}
516    }
517
518  gomp_mutex_unlock (&devicep->lock);
519  return tgt;
520}
521
522static void
523gomp_unmap_tgt (struct target_mem_desc *tgt)
524{
525  /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
526  if (tgt->tgt_end)
527    tgt->device_descr->free_func (tgt->device_descr->target_id, tgt->to_free);
528
529  free (tgt->array);
530  free (tgt);
531}
532
533/* Decrease the refcount for a set of mapped variables, and queue asychronous
534   copies from the device back to the host after any work that has been issued.
535   Because the regions are still "live", increment an asynchronous reference
536   count to indicate that they should not be unmapped from host-side data
537   structures until the asynchronous copy has completed.  */
538
539attribute_hidden void
540gomp_copy_from_async (struct target_mem_desc *tgt)
541{
542  struct gomp_device_descr *devicep = tgt->device_descr;
543  size_t i;
544
545  gomp_mutex_lock (&devicep->lock);
546
547  for (i = 0; i < tgt->list_count; i++)
548    if (tgt->list[i] == NULL)
549      ;
550    else if (tgt->list[i]->refcount > 1)
551      {
552	tgt->list[i]->refcount--;
553	tgt->list[i]->async_refcount++;
554      }
555    else
556      {
557	splay_tree_key k = tgt->list[i];
558	if (k->copy_from)
559	  devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
560				  (void *) (k->tgt->tgt_start + k->tgt_offset),
561				  k->host_end - k->host_start);
562      }
563
564  gomp_mutex_unlock (&devicep->lock);
565}
566
567/* Unmap variables described by TGT.  If DO_COPYFROM is true, copy relevant
568   variables back from device to host: if it is false, it is assumed that this
569   has been done already, i.e. by gomp_copy_from_async above.  */
570
571attribute_hidden void
572gomp_unmap_vars (struct target_mem_desc *tgt, bool do_copyfrom)
573{
574  struct gomp_device_descr *devicep = tgt->device_descr;
575
576  if (tgt->list_count == 0)
577    {
578      free (tgt);
579      return;
580    }
581
582  gomp_mutex_lock (&devicep->lock);
583
584  size_t i;
585  for (i = 0; i < tgt->list_count; i++)
586    if (tgt->list[i] == NULL)
587      ;
588    else if (tgt->list[i]->refcount > 1)
589      tgt->list[i]->refcount--;
590    else if (tgt->list[i]->async_refcount > 0)
591      tgt->list[i]->async_refcount--;
592    else
593      {
594	splay_tree_key k = tgt->list[i];
595	if (k->copy_from && do_copyfrom)
596	  devicep->dev2host_func (devicep->target_id, (void *) k->host_start,
597				  (void *) (k->tgt->tgt_start + k->tgt_offset),
598				  k->host_end - k->host_start);
599	splay_tree_remove (&devicep->mem_map, k);
600	if (k->tgt->refcount > 1)
601	  k->tgt->refcount--;
602	else
603	  gomp_unmap_tgt (k->tgt);
604      }
605
606  if (tgt->refcount > 1)
607    tgt->refcount--;
608  else
609    gomp_unmap_tgt (tgt);
610
611  gomp_mutex_unlock (&devicep->lock);
612}
613
614static void
615gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
616	     size_t *sizes, void *kinds, bool is_openacc)
617{
618  size_t i;
619  struct splay_tree_key_s cur_node;
620  const int typemask = is_openacc ? 0xff : 0x7;
621
622  if (!devicep)
623    return;
624
625  if (mapnum == 0)
626    return;
627
628  gomp_mutex_lock (&devicep->lock);
629  for (i = 0; i < mapnum; i++)
630    if (sizes[i])
631      {
632	cur_node.host_start = (uintptr_t) hostaddrs[i];
633	cur_node.host_end = cur_node.host_start + sizes[i];
634	splay_tree_key n = splay_tree_lookup (&devicep->mem_map, &cur_node);
635	if (n)
636	  {
637	    int kind = get_kind (is_openacc, kinds, i);
638	    if (n->host_start > cur_node.host_start
639		|| n->host_end < cur_node.host_end)
640	      {
641		gomp_mutex_unlock (&devicep->lock);
642		gomp_fatal ("Trying to update [%p..%p) object when "
643			    "only [%p..%p) is mapped",
644			    (void *) cur_node.host_start,
645			    (void *) cur_node.host_end,
646			    (void *) n->host_start,
647			    (void *) n->host_end);
648	      }
649	    if (GOMP_MAP_COPY_TO_P (kind & typemask))
650	      devicep->host2dev_func (devicep->target_id,
651				      (void *) (n->tgt->tgt_start
652						+ n->tgt_offset
653						+ cur_node.host_start
654						- n->host_start),
655				      (void *) cur_node.host_start,
656				      cur_node.host_end - cur_node.host_start);
657	    if (GOMP_MAP_COPY_FROM_P (kind & typemask))
658	      devicep->dev2host_func (devicep->target_id,
659				      (void *) cur_node.host_start,
660				      (void *) (n->tgt->tgt_start
661						+ n->tgt_offset
662						+ cur_node.host_start
663						- n->host_start),
664				      cur_node.host_end - cur_node.host_start);
665	  }
666	else
667	  {
668	    gomp_mutex_unlock (&devicep->lock);
669	    gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
670			(void *) cur_node.host_start,
671			(void *) cur_node.host_end);
672	  }
673      }
674  gomp_mutex_unlock (&devicep->lock);
675}
676
677/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP.
678   And insert to splay tree the mapping between addresses from HOST_TABLE and
679   from loaded target image.  */
680
681static void
682gomp_offload_image_to_device (struct gomp_device_descr *devicep,
683			      void *host_table, void *target_data,
684			      bool is_register_lock)
685{
686  void **host_func_table = ((void ***) host_table)[0];
687  void **host_funcs_end  = ((void ***) host_table)[1];
688  void **host_var_table  = ((void ***) host_table)[2];
689  void **host_vars_end   = ((void ***) host_table)[3];
690
691  /* The func table contains only addresses, the var table contains addresses
692     and corresponding sizes.  */
693  int num_funcs = host_funcs_end - host_func_table;
694  int num_vars  = (host_vars_end - host_var_table) / 2;
695
696  /* Load image to device and get target addresses for the image.  */
697  struct addr_pair *target_table = NULL;
698  int i, num_target_entries
699    = devicep->load_image_func (devicep->target_id, target_data, &target_table);
700
701  if (num_target_entries != num_funcs + num_vars)
702    {
703      gomp_mutex_unlock (&devicep->lock);
704      if (is_register_lock)
705	gomp_mutex_unlock (&register_lock);
706      gomp_fatal ("Can't map target functions or variables");
707    }
708
709  /* Insert host-target address mapping into splay tree.  */
710  struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
711  tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
712  tgt->refcount = 1;
713  tgt->tgt_start = 0;
714  tgt->tgt_end = 0;
715  tgt->to_free = NULL;
716  tgt->prev = NULL;
717  tgt->list_count = 0;
718  tgt->device_descr = devicep;
719  splay_tree_node array = tgt->array;
720
721  for (i = 0; i < num_funcs; i++)
722    {
723      splay_tree_key k = &array->key;
724      k->host_start = (uintptr_t) host_func_table[i];
725      k->host_end = k->host_start + 1;
726      k->tgt = tgt;
727      k->tgt_offset = target_table[i].start;
728      k->refcount = 1;
729      k->async_refcount = 0;
730      k->copy_from = false;
731      array->left = NULL;
732      array->right = NULL;
733      splay_tree_insert (&devicep->mem_map, array);
734      array++;
735    }
736
737  for (i = 0; i < num_vars; i++)
738    {
739      struct addr_pair *target_var = &target_table[num_funcs + i];
740      if (target_var->end - target_var->start
741	  != (uintptr_t) host_var_table[i * 2 + 1])
742	{
743	  gomp_mutex_unlock (&devicep->lock);
744	  if (is_register_lock)
745	    gomp_mutex_unlock (&register_lock);
746	  gomp_fatal ("Can't map target variables (size mismatch)");
747	}
748
749      splay_tree_key k = &array->key;
750      k->host_start = (uintptr_t) host_var_table[i * 2];
751      k->host_end = k->host_start + (uintptr_t) host_var_table[i * 2 + 1];
752      k->tgt = tgt;
753      k->tgt_offset = target_var->start;
754      k->refcount = 1;
755      k->async_refcount = 0;
756      k->copy_from = false;
757      array->left = NULL;
758      array->right = NULL;
759      splay_tree_insert (&devicep->mem_map, array);
760      array++;
761    }
762
763  free (target_table);
764}
765
766/* This function should be called from every offload image while loading.
767   It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
768   the target, and TARGET_DATA needed by target plugin.  */
769
770void
771GOMP_offload_register (void *host_table, enum offload_target_type target_type,
772		       void *target_data)
773{
774  int i;
775  gomp_mutex_lock (&register_lock);
776
777  /* Load image to all initialized devices.  */
778  for (i = 0; i < num_devices; i++)
779    {
780      struct gomp_device_descr *devicep = &devices[i];
781      gomp_mutex_lock (&devicep->lock);
782      if (devicep->type == target_type && devicep->is_initialized)
783	gomp_offload_image_to_device (devicep, host_table, target_data, true);
784      gomp_mutex_unlock (&devicep->lock);
785    }
786
787  /* Insert image to array of pending images.  */
788  offload_images
789    = gomp_realloc_unlock (offload_images,
790			   (num_offload_images + 1)
791			   * sizeof (struct offload_image_descr));
792  offload_images[num_offload_images].type = target_type;
793  offload_images[num_offload_images].host_table = host_table;
794  offload_images[num_offload_images].target_data = target_data;
795
796  num_offload_images++;
797  gomp_mutex_unlock (&register_lock);
798}
799
800/* This function should be called from every offload image while unloading.
801   It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of
802   the target, and TARGET_DATA needed by target plugin.  */
803
804void
805GOMP_offload_unregister (void *host_table, enum offload_target_type target_type,
806			 void *target_data)
807{
808  void **host_func_table = ((void ***) host_table)[0];
809  void **host_funcs_end  = ((void ***) host_table)[1];
810  void **host_var_table  = ((void ***) host_table)[2];
811  void **host_vars_end   = ((void ***) host_table)[3];
812  int i;
813
814  /* The func table contains only addresses, the var table contains addresses
815     and corresponding sizes.  */
816  int num_funcs = host_funcs_end - host_func_table;
817  int num_vars  = (host_vars_end - host_var_table) / 2;
818
819  gomp_mutex_lock (&register_lock);
820
821  /* Unload image from all initialized devices.  */
822  for (i = 0; i < num_devices; i++)
823    {
824      int j;
825      struct gomp_device_descr *devicep = &devices[i];
826      gomp_mutex_lock (&devicep->lock);
827      if (devicep->type != target_type || !devicep->is_initialized)
828	{
829	  gomp_mutex_unlock (&devicep->lock);
830	  continue;
831	}
832
833      devicep->unload_image_func (devicep->target_id, target_data);
834
835      /* Remove mapping from splay tree.  */
836      struct splay_tree_key_s k;
837      splay_tree_key node = NULL;
838      if (num_funcs > 0)
839	{
840	  k.host_start = (uintptr_t) host_func_table[0];
841	  k.host_end = k.host_start + 1;
842	  node = splay_tree_lookup (&devicep->mem_map, &k);
843	}
844      else if (num_vars > 0)
845	{
846	  k.host_start = (uintptr_t) host_var_table[0];
847	  k.host_end = k.host_start + (uintptr_t) host_var_table[1];
848	  node = splay_tree_lookup (&devicep->mem_map, &k);
849	}
850
851      for (j = 0; j < num_funcs; j++)
852	{
853	  k.host_start = (uintptr_t) host_func_table[j];
854	  k.host_end = k.host_start + 1;
855	  splay_tree_remove (&devicep->mem_map, &k);
856	}
857
858      for (j = 0; j < num_vars; j++)
859	{
860	  k.host_start = (uintptr_t) host_var_table[j * 2];
861	  k.host_end = k.host_start + (uintptr_t) host_var_table[j * 2 + 1];
862	  splay_tree_remove (&devicep->mem_map, &k);
863	}
864
865      if (node)
866	{
867	  free (node->tgt);
868	  free (node);
869	}
870
871      gomp_mutex_unlock (&devicep->lock);
872    }
873
874  /* Remove image from array of pending images.  */
875  for (i = 0; i < num_offload_images; i++)
876    if (offload_images[i].target_data == target_data)
877      {
878	offload_images[i] = offload_images[--num_offload_images];
879	break;
880      }
881
882  gomp_mutex_unlock (&register_lock);
883}
884
885/* This function initializes the target device, specified by DEVICEP.  DEVICEP
886   must be locked on entry, and remains locked on return.  */
887
888attribute_hidden void
889gomp_init_device (struct gomp_device_descr *devicep)
890{
891  int i;
892  devicep->init_device_func (devicep->target_id);
893
894  /* Load to device all images registered by the moment.  */
895  for (i = 0; i < num_offload_images; i++)
896    {
897      struct offload_image_descr *image = &offload_images[i];
898      if (image->type == devicep->type)
899	gomp_offload_image_to_device (devicep, image->host_table,
900				      image->target_data, false);
901    }
902
903  devicep->is_initialized = true;
904}
905
906/* Free address mapping tables.  MM must be locked on entry, and remains locked
907   on return.  */
908
909attribute_hidden void
910gomp_free_memmap (struct splay_tree_s *mem_map)
911{
912  while (mem_map->root)
913    {
914      struct target_mem_desc *tgt = mem_map->root->key.tgt;
915
916      splay_tree_remove (mem_map, &mem_map->root->key);
917      free (tgt->array);
918      free (tgt);
919    }
920}
921
922/* This function de-initializes the target device, specified by DEVICEP.
923   DEVICEP must be locked on entry, and remains locked on return.  */
924
925attribute_hidden void
926gomp_fini_device (struct gomp_device_descr *devicep)
927{
928  if (devicep->is_initialized)
929    devicep->fini_device_func (devicep->target_id);
930
931  devicep->is_initialized = false;
932}
933
934/* Called when encountering a target directive.  If DEVICE
935   is GOMP_DEVICE_ICV, it means use device-var ICV.  If it is
936   GOMP_DEVICE_HOST_FALLBACK (or any value
937   larger than last available hw device), use host fallback.
938   FN is address of host code, UNUSED is part of the current ABI, but
939   we're not actually using it.  HOSTADDRS, SIZES and KINDS are arrays
940   with MAPNUM entries, with addresses of the host objects,
941   sizes of the host objects (resp. for pointer kind pointer bias
942   and assumed sizeof (void *) size) and kinds.  */
943
944void
945GOMP_target (int device, void (*fn) (void *), const void *unused,
946	     size_t mapnum, void **hostaddrs, size_t *sizes,
947	     unsigned char *kinds)
948{
949  struct gomp_device_descr *devicep = resolve_device (device);
950
951  if (devicep == NULL
952      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
953    {
954      /* Host fallback.  */
955      struct gomp_thread old_thr, *thr = gomp_thread ();
956      old_thr = *thr;
957      memset (thr, '\0', sizeof (*thr));
958      if (gomp_places_list)
959	{
960	  thr->place = old_thr.place;
961	  thr->ts.place_partition_len = gomp_places_list_len;
962	}
963      fn (hostaddrs);
964      gomp_free_thread (thr);
965      *thr = old_thr;
966      return;
967    }
968
969  gomp_mutex_lock (&devicep->lock);
970  if (!devicep->is_initialized)
971    gomp_init_device (devicep);
972  gomp_mutex_unlock (&devicep->lock);
973
974  void *fn_addr;
975
976  if (devicep->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC)
977    fn_addr = (void *) fn;
978  else
979    {
980      gomp_mutex_lock (&devicep->lock);
981      struct splay_tree_key_s k;
982      k.host_start = (uintptr_t) fn;
983      k.host_end = k.host_start + 1;
984      splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k);
985      if (tgt_fn == NULL)
986	{
987	  gomp_mutex_unlock (&devicep->lock);
988	  gomp_fatal ("Target function wasn't mapped");
989	}
990      gomp_mutex_unlock (&devicep->lock);
991
992      fn_addr = (void *) tgt_fn->tgt_offset;
993    }
994
995  struct target_mem_desc *tgt_vars
996    = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
997		     true);
998  struct gomp_thread old_thr, *thr = gomp_thread ();
999  old_thr = *thr;
1000  memset (thr, '\0', sizeof (*thr));
1001  if (gomp_places_list)
1002    {
1003      thr->place = old_thr.place;
1004      thr->ts.place_partition_len = gomp_places_list_len;
1005    }
1006  devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start);
1007  gomp_free_thread (thr);
1008  *thr = old_thr;
1009  gomp_unmap_vars (tgt_vars, true);
1010}
1011
1012void
1013GOMP_target_data (int device, const void *unused, size_t mapnum,
1014		  void **hostaddrs, size_t *sizes, unsigned char *kinds)
1015{
1016  struct gomp_device_descr *devicep = resolve_device (device);
1017
1018  if (devicep == NULL
1019      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1020    {
1021      /* Host fallback.  */
1022      struct gomp_task_icv *icv = gomp_icv (false);
1023      if (icv->target_data)
1024	{
1025	  /* Even when doing a host fallback, if there are any active
1026	     #pragma omp target data constructs, need to remember the
1027	     new #pragma omp target data, otherwise GOMP_target_end_data
1028	     would get out of sync.  */
1029	  struct target_mem_desc *tgt
1030	    = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, false, false);
1031	  tgt->prev = icv->target_data;
1032	  icv->target_data = tgt;
1033	}
1034      return;
1035    }
1036
1037  gomp_mutex_lock (&devicep->lock);
1038  if (!devicep->is_initialized)
1039    gomp_init_device (devicep);
1040  gomp_mutex_unlock (&devicep->lock);
1041
1042  struct target_mem_desc *tgt
1043    = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false,
1044		     false);
1045  struct gomp_task_icv *icv = gomp_icv (true);
1046  tgt->prev = icv->target_data;
1047  icv->target_data = tgt;
1048}
1049
1050void
1051GOMP_target_end_data (void)
1052{
1053  struct gomp_task_icv *icv = gomp_icv (false);
1054  if (icv->target_data)
1055    {
1056      struct target_mem_desc *tgt = icv->target_data;
1057      icv->target_data = tgt->prev;
1058      gomp_unmap_vars (tgt, true);
1059    }
1060}
1061
1062void
1063GOMP_target_update (int device, const void *unused, size_t mapnum,
1064		    void **hostaddrs, size_t *sizes, unsigned char *kinds)
1065{
1066  struct gomp_device_descr *devicep = resolve_device (device);
1067
1068  if (devicep == NULL
1069      || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1070    return;
1071
1072  gomp_mutex_lock (&devicep->lock);
1073  if (!devicep->is_initialized)
1074    gomp_init_device (devicep);
1075  gomp_mutex_unlock (&devicep->lock);
1076
1077  gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false);
1078}
1079
1080void
1081GOMP_teams (unsigned int num_teams, unsigned int thread_limit)
1082{
1083  if (thread_limit)
1084    {
1085      struct gomp_task_icv *icv = gomp_icv (true);
1086      icv->thread_limit_var
1087	= thread_limit > INT_MAX ? UINT_MAX : thread_limit;
1088    }
1089  (void) num_teams;
1090}
1091
1092#ifdef PLUGIN_SUPPORT
1093
1094/* This function tries to load a plugin for DEVICE.  Name of plugin is passed
1095   in PLUGIN_NAME.
1096   The handles of the found functions are stored in the corresponding fields
1097   of DEVICE.  The function returns TRUE on success and FALSE otherwise.  */
1098
1099static bool
1100gomp_load_plugin_for_device (struct gomp_device_descr *device,
1101			     const char *plugin_name)
1102{
1103  const char *err = NULL, *last_missing = NULL;
1104  int optional_present, optional_total;
1105
1106  /* Clear any existing error.  */
1107  dlerror ();
1108
1109  void *plugin_handle = dlopen (plugin_name, RTLD_LAZY);
1110  if (!plugin_handle)
1111    {
1112      err = dlerror ();
1113      goto out;
1114    }
1115
1116  /* Check if all required functions are available in the plugin and store
1117     their handlers.  */
1118#define DLSYM(f)							\
1119  do									\
1120    {									\
1121      device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #f);	\
1122      err = dlerror ();							\
1123      if (err != NULL)							\
1124	goto out;							\
1125    }									\
1126  while (0)
1127  /* Similar, but missing functions are not an error.  */
1128#define DLSYM_OPT(f, n)						\
1129  do									\
1130    {									\
1131      const char *tmp_err;							\
1132      device->f##_func = dlsym (plugin_handle, "GOMP_OFFLOAD_" #n);	\
1133      tmp_err = dlerror ();						\
1134      if (tmp_err == NULL)						\
1135        optional_present++;						\
1136      else								\
1137        last_missing = #n;						\
1138      optional_total++;							\
1139    }									\
1140  while (0)
1141
1142  DLSYM (get_name);
1143  DLSYM (get_caps);
1144  DLSYM (get_type);
1145  DLSYM (get_num_devices);
1146  DLSYM (init_device);
1147  DLSYM (fini_device);
1148  DLSYM (load_image);
1149  DLSYM (unload_image);
1150  DLSYM (alloc);
1151  DLSYM (free);
1152  DLSYM (dev2host);
1153  DLSYM (host2dev);
1154  device->capabilities = device->get_caps_func ();
1155  if (device->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1156    DLSYM (run);
1157  if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1158    {
1159      optional_present = optional_total = 0;
1160      DLSYM_OPT (openacc.exec, openacc_parallel);
1161      DLSYM_OPT (openacc.register_async_cleanup,
1162		 openacc_register_async_cleanup);
1163      DLSYM_OPT (openacc.async_test, openacc_async_test);
1164      DLSYM_OPT (openacc.async_test_all, openacc_async_test_all);
1165      DLSYM_OPT (openacc.async_wait, openacc_async_wait);
1166      DLSYM_OPT (openacc.async_wait_async, openacc_async_wait_async);
1167      DLSYM_OPT (openacc.async_wait_all, openacc_async_wait_all);
1168      DLSYM_OPT (openacc.async_wait_all_async, openacc_async_wait_all_async);
1169      DLSYM_OPT (openacc.async_set_async, openacc_async_set_async);
1170      DLSYM_OPT (openacc.create_thread_data, openacc_create_thread_data);
1171      DLSYM_OPT (openacc.destroy_thread_data, openacc_destroy_thread_data);
1172      /* Require all the OpenACC handlers if we have
1173	 GOMP_OFFLOAD_CAP_OPENACC_200.  */
1174      if (optional_present != optional_total)
1175	{
1176	  err = "plugin missing OpenACC handler function";
1177	  goto out;
1178	}
1179      optional_present = optional_total = 0;
1180      DLSYM_OPT (openacc.cuda.get_current_device,
1181		 openacc_get_current_cuda_device);
1182      DLSYM_OPT (openacc.cuda.get_current_context,
1183		 openacc_get_current_cuda_context);
1184      DLSYM_OPT (openacc.cuda.get_stream, openacc_get_cuda_stream);
1185      DLSYM_OPT (openacc.cuda.set_stream, openacc_set_cuda_stream);
1186      /* Make sure all the CUDA functions are there if any of them are.  */
1187      if (optional_present && optional_present != optional_total)
1188	{
1189	  err = "plugin missing OpenACC CUDA handler function";
1190	  goto out;
1191	}
1192    }
1193#undef DLSYM
1194#undef DLSYM_OPT
1195
1196 out:
1197  if (err != NULL)
1198    {
1199      gomp_error ("while loading %s: %s", plugin_name, err);
1200      if (last_missing)
1201        gomp_error ("missing function was %s", last_missing);
1202      if (plugin_handle)
1203	dlclose (plugin_handle);
1204    }
1205  return err == NULL;
1206}
1207
1208/* This function initializes the runtime needed for offloading.
1209   It parses the list of offload targets and tries to load the plugins for
1210   these targets.  On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP
1211   will be set, and the array DEVICES initialized, containing descriptors for
1212   corresponding devices, first the GOMP_OFFLOAD_CAP_OPENMP_400 ones, follows
1213   by the others.  */
1214
1215static void
1216gomp_target_init (void)
1217{
1218  const char *prefix ="libgomp-plugin-";
1219  const char *suffix = SONAME_SUFFIX (1);
1220  const char *cur, *next;
1221  char *plugin_name;
1222  int i, new_num_devices;
1223
1224  num_devices = 0;
1225  devices = NULL;
1226
1227  cur = OFFLOAD_TARGETS;
1228  if (*cur)
1229    do
1230      {
1231	struct gomp_device_descr current_device;
1232
1233	next = strchr (cur, ',');
1234
1235	plugin_name = (char *) malloc (1 + (next ? next - cur : strlen (cur))
1236				       + strlen (prefix) + strlen (suffix));
1237	if (!plugin_name)
1238	  {
1239	    num_devices = 0;
1240	    break;
1241	  }
1242
1243	strcpy (plugin_name, prefix);
1244	strncat (plugin_name, cur, next ? next - cur : strlen (cur));
1245	strcat (plugin_name, suffix);
1246
1247	if (gomp_load_plugin_for_device (&current_device, plugin_name))
1248	  {
1249	    new_num_devices = current_device.get_num_devices_func ();
1250	    if (new_num_devices >= 1)
1251	      {
1252		/* Augment DEVICES and NUM_DEVICES.  */
1253
1254		devices = realloc (devices, (num_devices + new_num_devices)
1255				   * sizeof (struct gomp_device_descr));
1256		if (!devices)
1257		  {
1258		    num_devices = 0;
1259		    free (plugin_name);
1260		    break;
1261		  }
1262
1263		current_device.name = current_device.get_name_func ();
1264		/* current_device.capabilities has already been set.  */
1265		current_device.type = current_device.get_type_func ();
1266		current_device.mem_map.root = NULL;
1267		current_device.is_initialized = false;
1268		current_device.openacc.data_environ = NULL;
1269		for (i = 0; i < new_num_devices; i++)
1270		  {
1271		    current_device.target_id = i;
1272		    devices[num_devices] = current_device;
1273		    gomp_mutex_init (&devices[num_devices].lock);
1274		    num_devices++;
1275		  }
1276	      }
1277	  }
1278
1279	free (plugin_name);
1280	cur = next + 1;
1281      }
1282    while (next);
1283
1284  /* In DEVICES, sort the GOMP_OFFLOAD_CAP_OPENMP_400 ones first, and set
1285     NUM_DEVICES_OPENMP.  */
1286  struct gomp_device_descr *devices_s
1287    = malloc (num_devices * sizeof (struct gomp_device_descr));
1288  if (!devices_s)
1289    {
1290      num_devices = 0;
1291      free (devices);
1292      devices = NULL;
1293    }
1294  num_devices_openmp = 0;
1295  for (i = 0; i < num_devices; i++)
1296    if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)
1297      devices_s[num_devices_openmp++] = devices[i];
1298  int num_devices_after_openmp = num_devices_openmp;
1299  for (i = 0; i < num_devices; i++)
1300    if (!(devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
1301      devices_s[num_devices_after_openmp++] = devices[i];
1302  free (devices);
1303  devices = devices_s;
1304
1305  for (i = 0; i < num_devices; i++)
1306    {
1307      /* The 'devices' array can be moved (by the realloc call) until we have
1308	 found all the plugins, so registering with the OpenACC runtime (which
1309	 takes a copy of the pointer argument) must be delayed until now.  */
1310      if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200)
1311	goacc_register (&devices[i]);
1312    }
1313}
1314
1315#else /* PLUGIN_SUPPORT */
1316/* If dlfcn.h is unavailable we always fallback to host execution.
1317   GOMP_target* routines are just stubs for this case.  */
1318static void
1319gomp_target_init (void)
1320{
1321}
1322#endif /* PLUGIN_SUPPORT */
1323