oacc-mem.c revision 1.1.1.4
1/* OpenACC Runtime initialization routines
2
3   Copyright (C) 2013-2017 Free Software Foundation, Inc.
4
5   Contributed by Mentor Embedded.
6
7   This file is part of the GNU Offloading and Multi Processing Library
8   (libgomp).
9
10   Libgomp is free software; you can redistribute it and/or modify it
11   under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 3, or (at your option)
13   any later version.
14
15   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18   more details.
19
20   Under Section 7 of GPL version 3, you are granted additional
21   permissions described in the GCC Runtime Library Exception, version
22   3.1, as published by the Free Software Foundation.
23
24   You should have received a copy of the GNU General Public License and
25   a copy of the GCC Runtime Library Exception along with this program;
26   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27   <http://www.gnu.org/licenses/>.  */
28
29#include "openacc.h"
30#include "config.h"
31#include "libgomp.h"
32#include "gomp-constants.h"
33#include "oacc-int.h"
34#include <stdint.h>
35#include <string.h>
36#include <assert.h>
37
38/* Return block containing [H->S), or NULL if not contained.  The device lock
39   for DEV must be locked on entry, and remains locked on exit.  */
40
41static splay_tree_key
42lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
43{
44  struct splay_tree_key_s node;
45  splay_tree_key key;
46
47  node.host_start = (uintptr_t) h;
48  node.host_end = (uintptr_t) h + s;
49
50  key = splay_tree_lookup (&dev->mem_map, &node);
51
52  return key;
53}
54
55/* Return block containing [D->S), or NULL if not contained.
56   The list isn't ordered by device address, so we have to iterate
57   over the whole array.  This is not expected to be a common
58   operation.  The device lock associated with TGT must be locked on entry, and
59   remains locked on exit.  */
60
61static splay_tree_key
62lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
63{
64  int i;
65  struct target_mem_desc *t;
66
67  if (!tgt)
68    return NULL;
69
70  for (t = tgt; t != NULL; t = t->prev)
71    {
72      if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
73        break;
74    }
75
76  if (!t)
77    return NULL;
78
79  for (i = 0; i < t->list_count; i++)
80    {
81      void * offset;
82
83      splay_tree_key k = &t->array[i].key;
84      offset = d - t->tgt_start + k->tgt_offset;
85
86      if (k->host_start + offset <= (void *) k->host_end)
87        return k;
88    }
89
90  return NULL;
91}
92
93/* OpenACC is silent on how memory exhaustion is indicated.  We return
94   NULL.  */
95
96void *
97acc_malloc (size_t s)
98{
99  if (!s)
100    return NULL;
101
102  goacc_lazy_initialize ();
103
104  struct goacc_thread *thr = goacc_thread ();
105
106  assert (thr->dev);
107
108  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
109    return malloc (s);
110
111  return thr->dev->alloc_func (thr->dev->target_id, s);
112}
113
114/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
115   the device address is mapped. We choose to check if it mapped,
116   and if it is, to unmap it. */
117void
118acc_free (void *d)
119{
120  splay_tree_key k;
121
122  if (!d)
123    return;
124
125  struct goacc_thread *thr = goacc_thread ();
126
127  assert (thr && thr->dev);
128
129  struct gomp_device_descr *acc_dev = thr->dev;
130
131  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
132    return free (d);
133
134  gomp_mutex_lock (&acc_dev->lock);
135
136  /* We don't have to call lazy open here, as the ptr value must have
137     been returned by acc_malloc.  It's not permitted to pass NULL in
138     (unless you got that null from acc_malloc).  */
139  if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
140    {
141      void *offset;
142
143      offset = d - k->tgt->tgt_start + k->tgt_offset;
144
145      gomp_mutex_unlock (&acc_dev->lock);
146
147      acc_unmap_data ((void *)(k->host_start + offset));
148    }
149  else
150    gomp_mutex_unlock (&acc_dev->lock);
151
152  if (!acc_dev->free_func (acc_dev->target_id, d))
153    gomp_fatal ("error in freeing device memory in %s", __FUNCTION__);
154}
155
156void
157acc_memcpy_to_device (void *d, void *h, size_t s)
158{
159  /* No need to call lazy open here, as the device pointer must have
160     been obtained from a routine that did that.  */
161  struct goacc_thread *thr = goacc_thread ();
162
163  assert (thr && thr->dev);
164
165  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
166    {
167      memmove (d, h, s);
168      return;
169    }
170
171  if (!thr->dev->host2dev_func (thr->dev->target_id, d, h, s))
172    gomp_fatal ("error in %s", __FUNCTION__);
173}
174
175void
176acc_memcpy_from_device (void *h, void *d, size_t s)
177{
178  /* No need to call lazy open here, as the device pointer must have
179     been obtained from a routine that did that.  */
180  struct goacc_thread *thr = goacc_thread ();
181
182  assert (thr && thr->dev);
183
184  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
185    {
186      memmove (h, d, s);
187      return;
188    }
189
190  if (!thr->dev->dev2host_func (thr->dev->target_id, h, d, s))
191    gomp_fatal ("error in %s", __FUNCTION__);
192}
193
194/* Return the device pointer that corresponds to host data H.  Or NULL
195   if no mapping.  */
196
197void *
198acc_deviceptr (void *h)
199{
200  splay_tree_key n;
201  void *d;
202  void *offset;
203
204  goacc_lazy_initialize ();
205
206  struct goacc_thread *thr = goacc_thread ();
207  struct gomp_device_descr *dev = thr->dev;
208
209  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
210    return h;
211
212  gomp_mutex_lock (&dev->lock);
213
214  n = lookup_host (dev, h, 1);
215
216  if (!n)
217    {
218      gomp_mutex_unlock (&dev->lock);
219      return NULL;
220    }
221
222  offset = h - n->host_start;
223
224  d = n->tgt->tgt_start + n->tgt_offset + offset;
225
226  gomp_mutex_unlock (&dev->lock);
227
228  return d;
229}
230
231/* Return the host pointer that corresponds to device data D.  Or NULL
232   if no mapping.  */
233
234void *
235acc_hostptr (void *d)
236{
237  splay_tree_key n;
238  void *h;
239  void *offset;
240
241  goacc_lazy_initialize ();
242
243  struct goacc_thread *thr = goacc_thread ();
244  struct gomp_device_descr *acc_dev = thr->dev;
245
246  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
247    return d;
248
249  gomp_mutex_lock (&acc_dev->lock);
250
251  n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
252
253  if (!n)
254    {
255      gomp_mutex_unlock (&acc_dev->lock);
256      return NULL;
257    }
258
259  offset = d - n->tgt->tgt_start + n->tgt_offset;
260
261  h = n->host_start + offset;
262
263  gomp_mutex_unlock (&acc_dev->lock);
264
265  return h;
266}
267
268/* Return 1 if host data [H,+S] is present on the device.  */
269
270int
271acc_is_present (void *h, size_t s)
272{
273  splay_tree_key n;
274
275  if (!s || !h)
276    return 0;
277
278  goacc_lazy_initialize ();
279
280  struct goacc_thread *thr = goacc_thread ();
281  struct gomp_device_descr *acc_dev = thr->dev;
282
283  if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
284    return h != NULL;
285
286  gomp_mutex_lock (&acc_dev->lock);
287
288  n = lookup_host (acc_dev, h, s);
289
290  if (n && ((uintptr_t)h < n->host_start
291	    || (uintptr_t)h + s > n->host_end
292	    || s > n->host_end - n->host_start))
293    n = NULL;
294
295  gomp_mutex_unlock (&acc_dev->lock);
296
297  return n != NULL;
298}
299
300/* Create a mapping for host [H,+S] -> device [D,+S] */
301
302void
303acc_map_data (void *h, void *d, size_t s)
304{
305  struct target_mem_desc *tgt = NULL;
306  size_t mapnum = 1;
307  void *hostaddrs = h;
308  void *devaddrs = d;
309  size_t sizes = s;
310  unsigned short kinds = GOMP_MAP_ALLOC;
311
312  goacc_lazy_initialize ();
313
314  struct goacc_thread *thr = goacc_thread ();
315  struct gomp_device_descr *acc_dev = thr->dev;
316
317  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
318    {
319      if (d != h)
320        gomp_fatal ("cannot map data on shared-memory system");
321    }
322  else
323    {
324      struct goacc_thread *thr = goacc_thread ();
325
326      if (!d || !h || !s)
327	gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
328                    (void *)h, (int)s, (void *)d, (int)s);
329
330      gomp_mutex_lock (&acc_dev->lock);
331
332      if (lookup_host (acc_dev, h, s))
333        {
334	  gomp_mutex_unlock (&acc_dev->lock);
335	  gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
336		      (int)s);
337	}
338
339      if (lookup_dev (thr->dev->openacc.data_environ, d, s))
340        {
341	  gomp_mutex_unlock (&acc_dev->lock);
342	  gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
343		      (int)s);
344	}
345
346      gomp_mutex_unlock (&acc_dev->lock);
347
348      tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
349			   &kinds, true, GOMP_MAP_VARS_OPENACC);
350    }
351
352  gomp_mutex_lock (&acc_dev->lock);
353  tgt->prev = acc_dev->openacc.data_environ;
354  acc_dev->openacc.data_environ = tgt;
355  gomp_mutex_unlock (&acc_dev->lock);
356}
357
358void
359acc_unmap_data (void *h)
360{
361  struct goacc_thread *thr = goacc_thread ();
362  struct gomp_device_descr *acc_dev = thr->dev;
363
364  /* No need to call lazy open, as the address must have been mapped.  */
365
366  /* This is a no-op on shared-memory targets.  */
367  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
368    return;
369
370  size_t host_size;
371
372  gomp_mutex_lock (&acc_dev->lock);
373
374  splay_tree_key n = lookup_host (acc_dev, h, 1);
375  struct target_mem_desc *t;
376
377  if (!n)
378    {
379      gomp_mutex_unlock (&acc_dev->lock);
380      gomp_fatal ("%p is not a mapped block", (void *)h);
381    }
382
383  host_size = n->host_end - n->host_start;
384
385  if (n->host_start != (uintptr_t) h)
386    {
387      gomp_mutex_unlock (&acc_dev->lock);
388      gomp_fatal ("[%p,%d] surrounds %p",
389		  (void *) n->host_start, (int) host_size, (void *) h);
390    }
391
392  t = n->tgt;
393
394  if (t->refcount == 2)
395    {
396      struct target_mem_desc *tp;
397
398      /* This is the last reference, so pull the descriptor off the
399         chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
400         freeing the device memory. */
401      t->tgt_end = 0;
402      t->to_free = 0;
403
404      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
405	   tp = t, t = t->prev)
406	if (n->tgt == t)
407	  {
408	    if (tp)
409	      tp->prev = t->prev;
410	    else
411	      acc_dev->openacc.data_environ = t->prev;
412
413	    break;
414	  }
415    }
416
417  gomp_mutex_unlock (&acc_dev->lock);
418
419  gomp_unmap_vars (t, true);
420}
421
422#define FLAG_PRESENT (1 << 0)
423#define FLAG_CREATE (1 << 1)
424#define FLAG_COPY (1 << 2)
425
426static void *
427present_create_copy (unsigned f, void *h, size_t s)
428{
429  void *d;
430  splay_tree_key n;
431
432  if (!h || !s)
433    gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
434
435  goacc_lazy_initialize ();
436
437  struct goacc_thread *thr = goacc_thread ();
438  struct gomp_device_descr *acc_dev = thr->dev;
439
440  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
441    return h;
442
443  gomp_mutex_lock (&acc_dev->lock);
444
445  n = lookup_host (acc_dev, h, s);
446  if (n)
447    {
448      /* Present. */
449      d = (void *) (n->tgt->tgt_start + n->tgt_offset);
450
451      if (!(f & FLAG_PRESENT))
452        {
453	  gomp_mutex_unlock (&acc_dev->lock);
454          gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
455        	      (void *)h, (int)s, (void *)d, (int)s);
456	}
457      if ((h + s) > (void *)n->host_end)
458	{
459	  gomp_mutex_unlock (&acc_dev->lock);
460	  gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
461	}
462
463      gomp_mutex_unlock (&acc_dev->lock);
464    }
465  else if (!(f & FLAG_CREATE))
466    {
467      gomp_mutex_unlock (&acc_dev->lock);
468      gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
469    }
470  else
471    {
472      struct target_mem_desc *tgt;
473      size_t mapnum = 1;
474      unsigned short kinds;
475      void *hostaddrs = h;
476
477      if (f & FLAG_COPY)
478	kinds = GOMP_MAP_TO;
479      else
480	kinds = GOMP_MAP_ALLOC;
481
482      gomp_mutex_unlock (&acc_dev->lock);
483
484      tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
485			   GOMP_MAP_VARS_OPENACC);
486
487      gomp_mutex_lock (&acc_dev->lock);
488
489      d = tgt->to_free;
490      tgt->prev = acc_dev->openacc.data_environ;
491      acc_dev->openacc.data_environ = tgt;
492
493      gomp_mutex_unlock (&acc_dev->lock);
494    }
495
496  return d;
497}
498
499void *
500acc_create (void *h, size_t s)
501{
502  return present_create_copy (FLAG_CREATE, h, s);
503}
504
505void *
506acc_copyin (void *h, size_t s)
507{
508  return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
509}
510
511void *
512acc_present_or_create (void *h, size_t s)
513{
514  return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
515}
516
517void *
518acc_present_or_copyin (void *h, size_t s)
519{
520  return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
521}
522
523#define FLAG_COPYOUT (1 << 0)
524
525static void
526delete_copyout (unsigned f, void *h, size_t s, const char *libfnname)
527{
528  size_t host_size;
529  splay_tree_key n;
530  void *d;
531  struct goacc_thread *thr = goacc_thread ();
532  struct gomp_device_descr *acc_dev = thr->dev;
533
534  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
535    return;
536
537  gomp_mutex_lock (&acc_dev->lock);
538
539  n = lookup_host (acc_dev, h, s);
540
541  /* No need to call lazy open, as the data must already have been
542     mapped.  */
543
544  if (!n)
545    {
546      gomp_mutex_unlock (&acc_dev->lock);
547      gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
548    }
549
550  d = (void *) (n->tgt->tgt_start + n->tgt_offset
551		+ (uintptr_t) h - n->host_start);
552
553  host_size = n->host_end - n->host_start;
554
555  if (n->host_start != (uintptr_t) h || host_size != s)
556    {
557      gomp_mutex_unlock (&acc_dev->lock);
558      gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
559		  (void *) n->host_start, (int) host_size, (void *) h, (int) s);
560    }
561
562  gomp_mutex_unlock (&acc_dev->lock);
563
564  if (f & FLAG_COPYOUT)
565    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
566
567  acc_unmap_data (h);
568
569  if (!acc_dev->free_func (acc_dev->target_id, d))
570    gomp_fatal ("error in freeing device memory in %s", libfnname);
571}
572
573void
574acc_delete (void *h , size_t s)
575{
576  delete_copyout (0, h, s, __FUNCTION__);
577}
578
579void
580acc_copyout (void *h, size_t s)
581{
582  delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__);
583}
584
585static void
586update_dev_host (int is_dev, void *h, size_t s)
587{
588  splay_tree_key n;
589  void *d;
590
591  goacc_lazy_initialize ();
592
593  struct goacc_thread *thr = goacc_thread ();
594  struct gomp_device_descr *acc_dev = thr->dev;
595
596  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
597    return;
598
599  gomp_mutex_lock (&acc_dev->lock);
600
601  n = lookup_host (acc_dev, h, s);
602
603  if (!n)
604    {
605      gomp_mutex_unlock (&acc_dev->lock);
606      gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
607    }
608
609  d = (void *) (n->tgt->tgt_start + n->tgt_offset
610		+ (uintptr_t) h - n->host_start);
611
612  if (is_dev)
613    acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
614  else
615    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
616
617  gomp_mutex_unlock (&acc_dev->lock);
618}
619
620void
621acc_update_device (void *h, size_t s)
622{
623  update_dev_host (1, h, s);
624}
625
626void
627acc_update_self (void *h, size_t s)
628{
629  update_dev_host (0, h, s);
630}
631
632void
633gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
634			 void *kinds)
635{
636  struct target_mem_desc *tgt;
637  struct goacc_thread *thr = goacc_thread ();
638  struct gomp_device_descr *acc_dev = thr->dev;
639
640  gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
641  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
642		       NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
643  gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
644
645  gomp_mutex_lock (&acc_dev->lock);
646  tgt->prev = acc_dev->openacc.data_environ;
647  acc_dev->openacc.data_environ = tgt;
648  gomp_mutex_unlock (&acc_dev->lock);
649}
650
651void
652gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
653{
654  struct goacc_thread *thr = goacc_thread ();
655  struct gomp_device_descr *acc_dev = thr->dev;
656  splay_tree_key n;
657  struct target_mem_desc *t;
658  int minrefs = (mapnum == 1) ? 2 : 3;
659
660  gomp_mutex_lock (&acc_dev->lock);
661
662  n = lookup_host (acc_dev, h, 1);
663
664  if (!n)
665    {
666      gomp_mutex_unlock (&acc_dev->lock);
667      gomp_fatal ("%p is not a mapped block", (void *)h);
668    }
669
670  gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
671
672  t = n->tgt;
673
674  struct target_mem_desc *tp;
675
676  if (t->refcount == minrefs)
677    {
678      /* This is the last reference, so pull the descriptor off the
679	 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
680	 freeing the device memory. */
681      t->tgt_end = 0;
682      t->to_free = 0;
683
684      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
685	   tp = t, t = t->prev)
686	{
687	  if (n->tgt == t)
688	    {
689	      if (tp)
690		tp->prev = t->prev;
691	      else
692		acc_dev->openacc.data_environ = t->prev;
693	      break;
694	    }
695	}
696    }
697
698  if (force_copyfrom)
699    t->list[0].copy_from = 1;
700
701  gomp_mutex_unlock (&acc_dev->lock);
702
703  /* If running synchronously, unmap immediately.  */
704  if (async < acc_async_noval)
705    gomp_unmap_vars (t, true);
706  else
707    t->device_descr->openacc.register_async_cleanup_func (t, async);
708
709  gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
710}
711