oacc-mem.c revision 1.1.1.1
1/* OpenACC Runtime initialization routines
2
3   Copyright (C) 2013-2015 Free Software Foundation, Inc.
4
5   Contributed by Mentor Embedded.
6
7   This file is part of the GNU Offloading and Multi Processing Library
8   (libgomp).
9
10   Libgomp is free software; you can redistribute it and/or modify it
11   under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 3, or (at your option)
13   any later version.
14
15   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18   more details.
19
20   Under Section 7 of GPL version 3, you are granted additional
21   permissions described in the GCC Runtime Library Exception, version
22   3.1, as published by the Free Software Foundation.
23
24   You should have received a copy of the GNU General Public License and
25   a copy of the GCC Runtime Library Exception along with this program;
26   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27   <http://www.gnu.org/licenses/>.  */
28
29#include "openacc.h"
30#include "config.h"
31#include "libgomp.h"
32#include "gomp-constants.h"
33#include "oacc-int.h"
34#include "splay-tree.h"
35#include <stdint.h>
36#include <assert.h>
37
38/* Return block containing [H->S), or NULL if not contained.  */
39
40static splay_tree_key
41lookup_host (struct gomp_device_descr *dev, void *h, size_t s)
42{
43  struct splay_tree_key_s node;
44  splay_tree_key key;
45
46  node.host_start = (uintptr_t) h;
47  node.host_end = (uintptr_t) h + s;
48
49  gomp_mutex_lock (&dev->lock);
50  key = splay_tree_lookup (&dev->mem_map, &node);
51  gomp_mutex_unlock (&dev->lock);
52
53  return key;
54}
55
56/* Return block containing [D->S), or NULL if not contained.
57   The list isn't ordered by device address, so we have to iterate
58   over the whole array.  This is not expected to be a common
59   operation.  */
60
61static splay_tree_key
62lookup_dev (struct target_mem_desc *tgt, void *d, size_t s)
63{
64  int i;
65  struct target_mem_desc *t;
66
67  if (!tgt)
68    return NULL;
69
70  gomp_mutex_lock (&tgt->device_descr->lock);
71
72  for (t = tgt; t != NULL; t = t->prev)
73    {
74      if (t->tgt_start <= (uintptr_t) d && t->tgt_end >= (uintptr_t) d + s)
75        break;
76    }
77
78  gomp_mutex_unlock (&tgt->device_descr->lock);
79
80  if (!t)
81    return NULL;
82
83  for (i = 0; i < t->list_count; i++)
84    {
85      void * offset;
86
87      splay_tree_key k = &t->array[i].key;
88      offset = d - t->tgt_start + k->tgt_offset;
89
90      if (k->host_start + offset <= (void *) k->host_end)
91        return k;
92    }
93
94  return NULL;
95}
96
97/* OpenACC is silent on how memory exhaustion is indicated.  We return
98   NULL.  */
99
100void *
101acc_malloc (size_t s)
102{
103  if (!s)
104    return NULL;
105
106  goacc_lazy_initialize ();
107
108  struct goacc_thread *thr = goacc_thread ();
109
110  assert (thr->dev);
111
112  return thr->dev->alloc_func (thr->dev->target_id, s);
113}
114
115/* OpenACC 2.0a (3.2.16) doesn't specify what to do in the event
116   the device address is mapped. We choose to check if it mapped,
117   and if it is, to unmap it. */
118void
119acc_free (void *d)
120{
121  splay_tree_key k;
122  struct goacc_thread *thr = goacc_thread ();
123
124  if (!d)
125    return;
126
127  assert (thr && thr->dev);
128
129  /* We don't have to call lazy open here, as the ptr value must have
130     been returned by acc_malloc.  It's not permitted to pass NULL in
131     (unless you got that null from acc_malloc).  */
132  if ((k = lookup_dev (thr->dev->openacc.data_environ, d, 1)))
133   {
134     void *offset;
135
136     offset = d - k->tgt->tgt_start + k->tgt_offset;
137
138     acc_unmap_data ((void *)(k->host_start + offset));
139   }
140
141  thr->dev->free_func (thr->dev->target_id, d);
142}
143
144void
145acc_memcpy_to_device (void *d, void *h, size_t s)
146{
147  /* No need to call lazy open here, as the device pointer must have
148     been obtained from a routine that did that.  */
149  struct goacc_thread *thr = goacc_thread ();
150
151  assert (thr && thr->dev);
152
153  thr->dev->host2dev_func (thr->dev->target_id, d, h, s);
154}
155
156void
157acc_memcpy_from_device (void *h, void *d, size_t s)
158{
159  /* No need to call lazy open here, as the device pointer must have
160     been obtained from a routine that did that.  */
161  struct goacc_thread *thr = goacc_thread ();
162
163  assert (thr && thr->dev);
164
165  thr->dev->dev2host_func (thr->dev->target_id, h, d, s);
166}
167
168/* Return the device pointer that corresponds to host data H.  Or NULL
169   if no mapping.  */
170
171void *
172acc_deviceptr (void *h)
173{
174  splay_tree_key n;
175  void *d;
176  void *offset;
177
178  goacc_lazy_initialize ();
179
180  struct goacc_thread *thr = goacc_thread ();
181
182  n = lookup_host (thr->dev, h, 1);
183
184  if (!n)
185    return NULL;
186
187  offset = h - n->host_start;
188
189  d = n->tgt->tgt_start + n->tgt_offset + offset;
190
191  return d;
192}
193
194/* Return the host pointer that corresponds to device data D.  Or NULL
195   if no mapping.  */
196
197void *
198acc_hostptr (void *d)
199{
200  splay_tree_key n;
201  void *h;
202  void *offset;
203
204  goacc_lazy_initialize ();
205
206  struct goacc_thread *thr = goacc_thread ();
207
208  n = lookup_dev (thr->dev->openacc.data_environ, d, 1);
209
210  if (!n)
211    return NULL;
212
213  offset = d - n->tgt->tgt_start + n->tgt_offset;
214
215  h = n->host_start + offset;
216
217  return h;
218}
219
220/* Return 1 if host data [H,+S] is present on the device.  */
221
222int
223acc_is_present (void *h, size_t s)
224{
225  splay_tree_key n;
226
227  if (!s || !h)
228    return 0;
229
230  goacc_lazy_initialize ();
231
232  struct goacc_thread *thr = goacc_thread ();
233  struct gomp_device_descr *acc_dev = thr->dev;
234
235  n = lookup_host (acc_dev, h, s);
236
237  if (n && ((uintptr_t)h < n->host_start
238	    || (uintptr_t)h + s > n->host_end
239	    || s > n->host_end - n->host_start))
240    n = NULL;
241
242  return n != NULL;
243}
244
245/* Create a mapping for host [H,+S] -> device [D,+S] */
246
247void
248acc_map_data (void *h, void *d, size_t s)
249{
250  struct target_mem_desc *tgt;
251  size_t mapnum = 1;
252  void *hostaddrs = h;
253  void *devaddrs = d;
254  size_t sizes = s;
255  unsigned short kinds = GOMP_MAP_ALLOC;
256
257  goacc_lazy_initialize ();
258
259  struct goacc_thread *thr = goacc_thread ();
260  struct gomp_device_descr *acc_dev = thr->dev;
261
262  if (acc_dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
263    {
264      if (d != h)
265        gomp_fatal ("cannot map data on shared-memory system");
266
267      tgt = gomp_map_vars (NULL, 0, NULL, NULL, NULL, NULL, true, false);
268    }
269  else
270    {
271      struct goacc_thread *thr = goacc_thread ();
272
273      if (!d || !h || !s)
274	gomp_fatal ("[%p,+%d]->[%p,+%d] is a bad map",
275                    (void *)h, (int)s, (void *)d, (int)s);
276
277      if (lookup_host (acc_dev, h, s))
278	gomp_fatal ("host address [%p, +%d] is already mapped", (void *)h,
279		    (int)s);
280
281      if (lookup_dev (thr->dev->openacc.data_environ, d, s))
282	gomp_fatal ("device address [%p, +%d] is already mapped", (void *)d,
283		    (int)s);
284
285      tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, &devaddrs, &sizes,
286			   &kinds, true, false);
287    }
288
289  tgt->prev = acc_dev->openacc.data_environ;
290  acc_dev->openacc.data_environ = tgt;
291}
292
293void
294acc_unmap_data (void *h)
295{
296  struct goacc_thread *thr = goacc_thread ();
297  struct gomp_device_descr *acc_dev = thr->dev;
298
299  /* No need to call lazy open, as the address must have been mapped.  */
300
301  size_t host_size;
302  splay_tree_key n = lookup_host (acc_dev, h, 1);
303  struct target_mem_desc *t;
304
305  if (!n)
306    gomp_fatal ("%p is not a mapped block", (void *)h);
307
308  host_size = n->host_end - n->host_start;
309
310  if (n->host_start != (uintptr_t) h)
311    gomp_fatal ("[%p,%d] surrounds1 %p",
312		(void *) n->host_start, (int) host_size, (void *) h);
313
314  t = n->tgt;
315
316  if (t->refcount == 2)
317    {
318      struct target_mem_desc *tp;
319
320      /* This is the last reference, so pull the descriptor off the
321         chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
322         freeing the device memory. */
323      t->tgt_end = 0;
324      t->to_free = 0;
325
326      gomp_mutex_lock (&acc_dev->lock);
327
328      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
329	   tp = t, t = t->prev)
330	if (n->tgt == t)
331	  {
332	    if (tp)
333	      tp->prev = t->prev;
334	    else
335	      acc_dev->openacc.data_environ = t->prev;
336
337	    break;
338	  }
339
340      gomp_mutex_unlock (&acc_dev->lock);
341    }
342
343  gomp_unmap_vars (t, true);
344}
345
346#define FLAG_PRESENT (1 << 0)
347#define FLAG_CREATE (1 << 1)
348#define FLAG_COPY (1 << 2)
349
350static void *
351present_create_copy (unsigned f, void *h, size_t s)
352{
353  void *d;
354  splay_tree_key n;
355
356  if (!h || !s)
357    gomp_fatal ("[%p,+%d] is a bad range", (void *)h, (int)s);
358
359  goacc_lazy_initialize ();
360
361  struct goacc_thread *thr = goacc_thread ();
362  struct gomp_device_descr *acc_dev = thr->dev;
363
364  n = lookup_host (acc_dev, h, s);
365  if (n)
366    {
367      /* Present. */
368      d = (void *) (n->tgt->tgt_start + n->tgt_offset);
369
370      if (!(f & FLAG_PRESENT))
371        gomp_fatal ("[%p,+%d] already mapped to [%p,+%d]",
372            (void *)h, (int)s, (void *)d, (int)s);
373      if ((h + s) > (void *)n->host_end)
374        gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
375    }
376  else if (!(f & FLAG_CREATE))
377    {
378      gomp_fatal ("[%p,+%d] not mapped", (void *)h, (int)s);
379    }
380  else
381    {
382      struct target_mem_desc *tgt;
383      size_t mapnum = 1;
384      unsigned short kinds;
385      void *hostaddrs = h;
386
387      if (f & FLAG_COPY)
388	kinds = GOMP_MAP_TO;
389      else
390	kinds = GOMP_MAP_ALLOC;
391
392      tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
393			   false);
394
395      gomp_mutex_lock (&acc_dev->lock);
396
397      d = tgt->to_free;
398      tgt->prev = acc_dev->openacc.data_environ;
399      acc_dev->openacc.data_environ = tgt;
400
401      gomp_mutex_unlock (&acc_dev->lock);
402    }
403
404  return d;
405}
406
407void *
408acc_create (void *h, size_t s)
409{
410  return present_create_copy (FLAG_CREATE, h, s);
411}
412
413void *
414acc_copyin (void *h, size_t s)
415{
416  return present_create_copy (FLAG_CREATE | FLAG_COPY, h, s);
417}
418
419void *
420acc_present_or_create (void *h, size_t s)
421{
422  return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s);
423}
424
425void *
426acc_present_or_copyin (void *h, size_t s)
427{
428  return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s);
429}
430
431#define FLAG_COPYOUT (1 << 0)
432
433static void
434delete_copyout (unsigned f, void *h, size_t s)
435{
436  size_t host_size;
437  splay_tree_key n;
438  void *d;
439  struct goacc_thread *thr = goacc_thread ();
440  struct gomp_device_descr *acc_dev = thr->dev;
441
442  n = lookup_host (acc_dev, h, s);
443
444  /* No need to call lazy open, as the data must already have been
445     mapped.  */
446
447  if (!n)
448    gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s);
449
450  d = (void *) (n->tgt->tgt_start + n->tgt_offset);
451
452  host_size = n->host_end - n->host_start;
453
454  if (n->host_start != (uintptr_t) h || host_size != s)
455    gomp_fatal ("[%p,%d] surrounds2 [%p,+%d]",
456		(void *) n->host_start, (int) host_size, (void *) h, (int) s);
457
458  if (f & FLAG_COPYOUT)
459    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
460
461  acc_unmap_data (h);
462
463  acc_dev->free_func (acc_dev->target_id, d);
464}
465
466void
467acc_delete (void *h , size_t s)
468{
469  delete_copyout (0, h, s);
470}
471
472void acc_copyout (void *h, size_t s)
473{
474  delete_copyout (FLAG_COPYOUT, h, s);
475}
476
477static void
478update_dev_host (int is_dev, void *h, size_t s)
479{
480  splay_tree_key n;
481  void *d;
482  struct goacc_thread *thr = goacc_thread ();
483  struct gomp_device_descr *acc_dev = thr->dev;
484
485  n = lookup_host (acc_dev, h, s);
486
487  /* No need to call lazy open, as the data must already have been
488     mapped.  */
489
490  if (!n)
491    gomp_fatal ("[%p,%d] is not mapped", h, (int)s);
492
493  d = (void *) (n->tgt->tgt_start + n->tgt_offset);
494
495  if (is_dev)
496    acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
497  else
498    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
499}
500
501void
502acc_update_device (void *h, size_t s)
503{
504  update_dev_host (1, h, s);
505}
506
507void
508acc_update_self (void *h, size_t s)
509{
510  update_dev_host (0, h, s);
511}
512
513void
514gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
515			 void *kinds)
516{
517  struct target_mem_desc *tgt;
518  struct goacc_thread *thr = goacc_thread ();
519  struct gomp_device_descr *acc_dev = thr->dev;
520
521  gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
522  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
523		       NULL, sizes, kinds, true, false);
524  gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
525  tgt->prev = acc_dev->openacc.data_environ;
526  acc_dev->openacc.data_environ = tgt;
527}
528
529void
530gomp_acc_remove_pointer (void *h, bool force_copyfrom, int async, int mapnum)
531{
532  struct goacc_thread *thr = goacc_thread ();
533  struct gomp_device_descr *acc_dev = thr->dev;
534  splay_tree_key n;
535  struct target_mem_desc *t;
536  int minrefs = (mapnum == 1) ? 2 : 3;
537
538  n = lookup_host (acc_dev, h, 1);
539
540  if (!n)
541    gomp_fatal ("%p is not a mapped block", (void *)h);
542
543  gomp_debug (0, "  %s: restore mappings\n", __FUNCTION__);
544
545  t = n->tgt;
546
547  struct target_mem_desc *tp;
548
549  gomp_mutex_lock (&acc_dev->lock);
550
551  if (t->refcount == minrefs)
552    {
553      /* This is the last reference, so pull the descriptor off the
554	 chain. This avoids gomp_unmap_vars via gomp_unmap_tgt from
555	 freeing the device memory. */
556      t->tgt_end = 0;
557      t->to_free = 0;
558
559      for (tp = NULL, t = acc_dev->openacc.data_environ; t != NULL;
560	   tp = t, t = t->prev)
561	{
562	  if (n->tgt == t)
563	    {
564	      if (tp)
565		tp->prev = t->prev;
566	      else
567		acc_dev->openacc.data_environ = t->prev;
568	      break;
569	    }
570	}
571    }
572
573  if (force_copyfrom)
574    t->list[0]->copy_from = 1;
575
576  gomp_mutex_unlock (&acc_dev->lock);
577
578  /* If running synchronously, unmap immediately.  */
579  if (async < acc_async_noval)
580    gomp_unmap_vars (t, true);
581  else
582    {
583      gomp_copy_from_async (t);
584      acc_dev->openacc.register_async_cleanup_func (t);
585    }
586
587  gomp_debug (0, "  %s: mappings restored\n", __FUNCTION__);
588}
589