oacc-mem.c revision 1.1.1.3
1/* OpenACC Runtime initialization routines
2
3   Copyright (C) 2013-2016 Free Software Foundation, Inc.
4
5   Contributed by Mentor Embedded.
6
7   This file is part of the GNU Offloading and Multi Processing Library
8   (libgomp).
9
10   Libgomp is free software; you can redistribute it and/or modify it
11   under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 3, or (at your option)
13   any later version.
14
15   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18   more details.
19
20   Under Section 7 of GPL version 3, you are granted additional
21   permissions described in the GCC Runtime Library Exception, version
22   3.1, as published by the Free Software Foundation.
23
24   You should have received a copy of the GNU General Public License and
25   a copy of the GCC Runtime Library Exception along with this program;
26   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27   <http://www.gnu.org/licenses/>.  */
28
29#include "openacc.h"
30#include "config.h"
31#include "libgomp.h"
32#include "gomp-constants.h"
33#include "oacc-int.h"
34#include <stdint.h>
35#include <assert.h>
36
37/* Return block containing [H->S), or NULL if not contained.  The device lock
38   for DEV must be locked on entry, and remains locked on exit.  */
39
40static splay_tree_key
41lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
42{
43  struct splay_tree_key_s node;
44  splay_tree_key key;
45
46  node.host_start = (uintptr_t) h;
47  node.host_end = (uintptr_t) h + s;
48
49  key = splay_tree_lookup (&dev->mem_map, &node);
50
51  return key;
52}
53
54/* Return block containing [D->S), or NULL if not contained.
55   The list isn't ordered by device address, so we have to iterate
56   over the whole array.  This is not expected to be a common
57   operation.  The device lock associated with TGT must be locked on entry, and
58   remains locked on exit.  */
59
60static splay_tree_key
61lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
62{
63  int i;
64  struct target_mem_desc *t;
65
66  if (!tgt)
67    return NULL;
68
69  for (t = tgt; t != NULL; t = t->prev)
70    {
71      if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
72        break;
73    }
74
75  if (!t)
76    return NULL;
77
78  for (i = 0; i < t->list_count; i++)
79    {
80      void * offset;
81
82      splay_tree_key k = &t->array[i].key;
83      offset = d - t->tgt_start + k->tgt_offset;
84
85      if (k->host_start + offset <= (void *) k->host_end)
86        return k;
87    }
88
89  return NULL;
90}
91
92/* OpenACC is silent on how memory exhaustion is indicated.  We return
93   NULL.  */
94
95void *
96acc_malloc (size_t s)
97{
98  if (!s)
99    return NULL;
100
101  goacc_lazy_initialize ();
102
103  struct goacc_thread *thr = goacc_thread ();
104
105  assert (thr->dev);
106
107  return thr->dev->alloc_func (thr->dev->target_id, s);
108}
109
110/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
111   the device address is mapped. We choose to check if it mapped,
112   and if it is, to unmap it. */
113void
114acc_free (void *d)
115{
116  splay_tree_key k;
117
118  if (!d)
119    return;
120
121  struct goacc_thread *thr = goacc_thread ();
122
123  assert (thr && thr->dev);
124
125  struct gomp_device_descr *acc_dev = thr->dev;
126
127  gomp_mutex_lock (&acc_dev->lock);
128
129  /* We don't have to call lazy open here, as the ptr value must have
130     been returned by acc_malloc.  It's not permitted to pass NULL in
131     (unless you got that null from acc_malloc).  */
132  if ((k = lookup_dev (acc_dev->openacc.data_environ, d, 1)))
133    {
134      void *offset;
135
136      offset = d - k->tgt->tgt_start + k->tgt_offset;
137
138      gomp_mutex_unlock (&acc_dev->lock);
139
140      acc_unmap_data ((void *)(k->host_start + offset));
141    }
142  else
143    gomp_mutex_unlock (&acc_dev->lock);
144
145  acc_dev->free_func (acc_dev->target_id, d);
146}
147
148void
149acc_memcpy_to_device (void *d, void *h, size_t s)
150{
151  /* No need to call lazy open here, as the device pointer must have
152     been obtained from a routine that did that.  */
153  struct goacc_thread *thr = goacc_thread ();
154
155  assert (thr && thr->dev);
156
157  thr->dev->host2dev_func (thr->dev->target_id, d, h, s);
158}
159
160void
161acc_memcpy_from_device (void *h, void *d, size_t s)
162{
163  /* No need to call lazy open here, as the device pointer must have
164     been obtained from a routine that did that.  */
165  struct goacc_thread *thr = goacc_thread ();
166
167  assert (thr && thr->dev);
168
169  thr->dev->dev2host_func (thr->dev->target_id, h, d, s);
170}
171
172/* Return the device pointer that corresponds to host data H.  Or NULL
173   if no mapping.  */
174
175void *
176acc_deviceptr (void *h)
177{
178  splay_tree_key n;
179  void *d;
180  void *offset;
181
182  goacc_lazy_initialize ();
183
184  struct goacc_thread *thr = goacc_thread ();
185  struct gomp_device_descr *dev = thr->dev;
186
187  gomp_mutex_lock (&dev->lock);
188
189  n = lookup_host (dev, h, 1);
190
191  if (!n)
192    {
193      gomp_mutex_unlock (&dev->lock);
194      return NULL;
195    }
196
197  offset = h - n->host_start;
198
199  d = n->tgt->tgt_start + n->tgt_offset + offset;
200
201  gomp_mutex_unlock (&dev->lock);
202
203  return d;
204}
205
206/* Return the host pointer that corresponds to device data D.  Or NULL
207   if no mapping.  */
208
209void *
210acc_hostptr (void *d)
211{
212  splay_tree_key n;
213  void *h;
214  void *offset;
215
216  goacc_lazy_initialize ();
217
218  struct goacc_thread *thr = goacc_thread ();
219  struct gomp_device_descr *acc_dev = thr->dev;
220
221  gomp_mutex_lock (&acc_dev->lock);
222
223  n = lookup_dev (acc_dev->openacc.data_environ, d, 1);
224
225  if (!n)
226    {
227      gomp_mutex_unlock (&acc_dev->lock);
228      return NULL;
229    }
230
231  offset = d - n->tgt->tgt_start + n->tgt_offset;
232
233  h = n->host_start + offset;
234
235  gomp_mutex_unlock (&acc_dev->lock);
236
237  return h;
238}
239
240/* Return 1 if host data [H,+S] is present on the device.  */
241
242int
243acc_is_present (void *h, size_t s)
244{
245  splay_tree_key n;
246
247  if (!s || !h)
248    return 0;
249
250  goacc_lazy_initialize ();
251
252  struct goacc_thread *thr = goacc_thread ();
253  struct gomp_device_descr *acc_dev = thr->dev;
254
255  gomp_mutex_lock (&acc_dev->lock);
256
257  n = lookup_host (acc_dev, h, s);
258
259  if (n && ((uintptr_t)h < n->host_start
260	    || (uintptr_t)h + s > n->host_end
261	    || s > n->host_end - n->host_start))
262    n = NULL;
263
264  gomp_mutex_unlock (&acc_dev->lock);
265
266  return n != NULL;
267}
268
269/* Create a mapping for host [H,+S] -> device [D,+S] */
270
271void
272acc_map_data (void *h, void *d, size_t s)
273{
274  struct target_mem_desc *tgt;
275  size_t mapnum = 1;
276  void *hostaddrs = h;
277  void *devaddrs = d;
278  size_t sizes = s;
279  unsigned short kinds = GOMP_MAP_ALLOC;
280
281  goacc_lazy_initialize ();
282
283  struct goacc_thread *thr = goacc_thread ();
284  struct gomp_device_descr *acc_dev = thr->dev;
285
286  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
287    {
288      if (d != h)
289        gomp_fatal ("cannot map data on shared-memory system");
290
291      tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true,
292			   GOMP_MAP_VARS_OPENACC);
293    }
294  else
295    {
296      struct goacc_thread *thr = goacc_thread ();
297
298      if (!d || !h || !s)
299	gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
300                    (void *)h, (int)s, (void *)d, (int)s);
301
302      gomp_mutex_lock (&acc_dev->lock);
303
304      if (lookup_host (acc_dev, h, s))
305        {
306	  gomp_mutex_unlock (&acc_dev->lock);
307	  gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
308		      (int)s);
309	}
310
311      if (lookup_dev (thr->dev->openacc.data_environ, d, s))
312        {
313	  gomp_mutex_unlock (&acc_dev->lock);
314	  gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
315		      (int)s);
316	}
317
318      gomp_mutex_unlock (&acc_dev->lock);
319
320      tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
321			   &kinds, true, GOMP_MAP_VARS_OPENACC);
322    }
323
324  gomp_mutex_lock (&acc_dev->lock);
325  tgt->prev = acc_dev->openacc.data_environ;
326  acc_dev->openacc.data_environ = tgt;
327  gomp_mutex_unlock (&acc_dev->lock);
328}
329
330void
331acc_unmap_data (void *h)
332{
333  struct goacc_thread *thr = goacc_thread ();
334  struct gomp_device_descr *acc_dev = thr->dev;
335
336  /* No need to call lazy open, as the address must have been mapped.  */
337
338  size_t host_size;
339
340  gomp_mutex_lock (&acc_dev->lock);
341
342  splay_tree_key n = lookup_host (acc_dev, h, 1);
343  struct target_mem_desc *t;
344
345  if (!n)
346    {
347      gomp_mutex_unlock (&acc_dev->lock);
348      gomp_fatal ("%p is not a mapped block", (void *)h);
349    }
350
351  host_size = n->host_end - n->host_start;
352
353  if (n->host_start != (uintptr_t) h)
354    {
355      gomp_mutex_unlock (&acc_dev->lock);
356      gomp_fatal ("[%p,%d] surrounds %p",
357		  (void *) n->host_start, (int) host_size, (void *) h);
358    }
359
360  t = n->tgt;
361
362  if (t->refcount == 2)
363    {
364      struct target_mem_desc *tp;
365
366      /* This is the last reference, so pull the descriptor off the
367         chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
368         freeing the device memory. */
369      t->tgt_end = 0;
370      t->to_free = 0;
371
372      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
373	   tp = t, t = t->prev)
374	if (n->tgt == t)
375	  {
376	    if (tp)
377	      tp->prev = t->prev;
378	    else
379	      acc_dev->openacc.data_environ = t->prev;
380
381	    break;
382	  }
383    }
384
385  gomp_mutex_unlock (&acc_dev->lock);
386
387  gomp_unmap_vars (t, true);
388}
389
390#define FLAG_PRESENT (1 << 0)
391#define FLAG_CREATE (1 << 1)
392#define FLAG_COPY (1 << 2)
393
394static void *
395present_create_copy (unsigned f, void *h, size_t s)
396{
397  void *d;
398  splay_tree_key n;
399
400  if (!h || !s)
401    gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
402
403  goacc_lazy_initialize ();
404
405  struct goacc_thread *thr = goacc_thread ();
406  struct gomp_device_descr *acc_dev = thr->dev;
407
408  gomp_mutex_lock (&acc_dev->lock);
409
410  n = lookup_host (acc_dev, h, s);
411  if (n)
412    {
413      /* Present. */
414      d = (void *) (n->tgt->tgt_start + n->tgt_offset);
415
416      if (!(f & FLAG_PRESENT))
417        {
418	  gomp_mutex_unlock (&acc_dev->lock);
419          gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
420        	      (void *)h, (int)s, (void *)d, (int)s);
421	}
422      if ((h + s) > (void *)n->host_end)
423	{
424	  gomp_mutex_unlock (&acc_dev->lock);
425	  gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
426	}
427
428      gomp_mutex_unlock (&acc_dev->lock);
429    }
430  else if (!(f & FLAG_CREATE))
431    {
432      gomp_mutex_unlock (&acc_dev->lock);
433      gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
434    }
435  else
436    {
437      struct target_mem_desc *tgt;
438      size_t mapnum = 1;
439      unsigned short kinds;
440      void *hostaddrs = h;
441
442      if (f & FLAG_COPY)
443	kinds = GOMP_MAP_TO;
444      else
445	kinds = GOMP_MAP_ALLOC;
446
447      gomp_mutex_unlock (&acc_dev->lock);
448
449      tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
450			   GOMP_MAP_VARS_OPENACC);
451
452      gomp_mutex_lock (&acc_dev->lock);
453
454      d = tgt->to_free;
455      tgt->prev = acc_dev->openacc.data_environ;
456      acc_dev->openacc.data_environ = tgt;
457
458      gomp_mutex_unlock (&acc_dev->lock);
459    }
460
461  return d;
462}
463
464void *
465acc_create (void *h, size_t s)
466{
467  return present_create_copy (FLAG_CREATE, h, s);
468}
469
470void *
471acc_copyin (void *h, size_t s)
472{
473  return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
474}
475
476void *
477acc_present_or_create (void *h, size_t s)
478{
479  return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
480}
481
482void *
483acc_present_or_copyin (void *h, size_t s)
484{
485  return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
486}
487
488#define FLAG_COPYOUT (1 << 0)
489
490static void
491delete_copyout (unsigned f, void *h, size_t s)
492{
493  size_t host_size;
494  splay_tree_key n;
495  void *d;
496  struct goacc_thread *thr = goacc_thread ();
497  struct gomp_device_descr *acc_dev = thr->dev;
498
499  gomp_mutex_lock (&acc_dev->lock);
500
501  n = lookup_host (acc_dev, h, s);
502
503  /* No need to call lazy open, as the data must already have been
504     mapped.  */
505
506  if (!n)
507    {
508      gomp_mutex_unlock (&acc_dev->lock);
509      gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
510    }
511
512  d = (void *) (n->tgt->tgt_start + n->tgt_offset
513		+ (uintptr_t) h - n->host_start);
514
515  host_size = n->host_end - n->host_start;
516
517  if (n->host_start != (uintptr_t) h || host_size != s)
518    {
519      gomp_mutex_unlock (&acc_dev->lock);
520      gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
521		  (void *) n->host_start, (int) host_size, (void *) h, (int) s);
522    }
523
524  gomp_mutex_unlock (&acc_dev->lock);
525
526  if (f & FLAG_COPYOUT)
527    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
528
529  acc_unmap_data (h);
530
531  acc_dev->free_func (acc_dev->target_id, d);
532}
533
534void
535acc_delete (void *h , size_t s)
536{
537  delete_copyout (0, h, s);
538}
539
540void acc_copyout (void *h, size_t s)
541{
542  delete_copyout (FLAG_COPYOUT, h, s);
543}
544
545static void
546update_dev_host (int is_dev, void *h, size_t s)
547{
548  splay_tree_key n;
549  void *d;
550
551  goacc_lazy_initialize ();
552
553  struct goacc_thread *thr = goacc_thread ();
554  struct gomp_device_descr *acc_dev = thr->dev;
555
556  gomp_mutex_lock (&acc_dev->lock);
557
558  n = lookup_host (acc_dev, h, s);
559
560  if (!n)
561    {
562      gomp_mutex_unlock (&acc_dev->lock);
563      gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
564    }
565
566  d = (void *) (n->tgt->tgt_start + n->tgt_offset
567		+ (uintptr_t) h - n->host_start);
568
569  gomp_mutex_unlock (&acc_dev->lock);
570
571  if (is_dev)
572    acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
573  else
574    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
575}
576
577void
578acc_update_device (void *h, size_t s)
579{
580  update_dev_host (1, h, s);
581}
582
583void
584acc_update_self (void *h, size_t s)
585{
586  update_dev_host (0, h, s);
587}
588
589void
590gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
591			 void *kinds)
592{
593  struct target_mem_desc *tgt;
594  struct goacc_thread *thr = goacc_thread ();
595  struct gomp_device_descr *acc_dev = thr->dev;
596
597  gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
598  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
599		       NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
600  gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
601
602  gomp_mutex_lock (&acc_dev->lock);
603  tgt->prev = acc_dev->openacc.data_environ;
604  acc_dev->openacc.data_environ = tgt;
605  gomp_mutex_unlock (&acc_dev->lock);
606}
607
608void
609gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
610{
611  struct goacc_thread *thr = goacc_thread ();
612  struct gomp_device_descr *acc_dev = thr->dev;
613  splay_tree_key n;
614  struct target_mem_desc *t;
615  int minrefs = (mapnum == 1) ? 2 : 3;
616
617  gomp_mutex_lock (&acc_dev->lock);
618
619  n = lookup_host (acc_dev, h, 1);
620
621  if (!n)
622    {
623      gomp_mutex_unlock (&acc_dev->lock);
624      gomp_fatal ("%p is not a mapped block", (void *)h);
625    }
626
627  gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
628
629  t = n->tgt;
630
631  struct target_mem_desc *tp;
632
633  if (t->refcount == minrefs)
634    {
635      /* This is the last reference, so pull the descriptor off the
636	 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
637	 freeing the device memory. */
638      t->tgt_end = 0;
639      t->to_free = 0;
640
641      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
642	   tp = t, t = t->prev)
643	{
644	  if (n->tgt == t)
645	    {
646	      if (tp)
647		tp->prev = t->prev;
648	      else
649		acc_dev->openacc.data_environ = t->prev;
650	      break;
651	    }
652	}
653    }
654
655  if (force_copyfrom)
656    t->list[0].copy_from = 1;
657
658  gomp_mutex_unlock (&acc_dev->lock);
659
660  /* If running synchronously, unmap immediately.  */
661  if (async < acc_async_noval)
662    gomp_unmap_vars (t, true);
663  else
664    {
665      gomp_copy_from_async (t);
666      acc_dev->openacc.register_async_cleanup_func (t);
667    }
668
669  gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
670}
671