oacc-init.c revision 1.1.1.4
1/* OpenACC Runtime initialization routines
2
3   Copyright (C) 2013-2018 Free Software Foundation, Inc.
4
5   Contributed by Mentor Embedded.
6
7   This file is part of the GNU Offloading and Multi Processing Library
8   (libgomp).
9
10   Libgomp is free software; you can redistribute it and/or modify it
11   under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 3, or (at your option)
13   any later version.
14
15   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18   more details.
19
20   Under Section 7 of GPL version 3, you are granted additional
21   permissions described in the GCC Runtime Library Exception, version
22   3.1, as published by the Free Software Foundation.
23
24   You should have received a copy of the GNU General Public License and
25   a copy of the GCC Runtime Library Exception along with this program;
26   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27   <http://www.gnu.org/licenses/>.  */
28
29#include "libgomp.h"
30#include "oacc-int.h"
31#include "openacc.h"
32#include <assert.h>
33#include <stdlib.h>
34#include <strings.h>
35#include <stdbool.h>
36#include <string.h>
37
38/* This lock is used to protect access to cached_base_dev, dispatchers and
39   the (abstract) initialisation state of attached offloading devices.  */
40
41static gomp_mutex_t acc_device_lock;
42
43/* A cached version of the dispatcher for the global "current" accelerator type,
44   e.g. used as the default when creating new host threads.  This is the
45   device-type equivalent of goacc_device_num (which specifies which device to
46   use out of potentially several of the same type).  If there are several
47   devices of a given type, this points at the first one.  */
48
49static struct gomp_device_descr *cached_base_dev = NULL;
50
51#if defined HAVE_TLS || defined USE_EMUTLS
52__thread struct goacc_thread *goacc_tls_data;
53#else
54pthread_key_t goacc_tls_key;
55#endif
56static pthread_key_t goacc_cleanup_key;
57
58static struct goacc_thread *goacc_threads;
59static gomp_mutex_t goacc_thread_lock;
60
61/* An array of dispatchers for device types, indexed by the type.  This array
62   only references "base" devices, and other instances of the same type are
63   found by simply indexing from each such device (which are stored linearly,
64   grouped by device in target.c:devices).  */
65static struct gomp_device_descr *dispatchers[_ACC_device_hwm] = { 0 };
66
67attribute_hidden void
68goacc_register (struct gomp_device_descr *disp)
69{
70  /* Only register the 0th device here.  */
71  if (disp->target_id != 0)
72    return;
73
74  gomp_mutex_lock (&acc_device_lock);
75
76  assert (acc_device_type (disp->type) != acc_device_none
77	  && acc_device_type (disp->type) != acc_device_default
78	  && acc_device_type (disp->type) != acc_device_not_host);
79  assert (!dispatchers[disp->type]);
80  dispatchers[disp->type] = disp;
81
82  gomp_mutex_unlock (&acc_device_lock);
83}
84
85/* OpenACC names some things a little differently.  */
86
87static const char *
88get_openacc_name (const char *name)
89{
90  if (strcmp (name, "nvptx") == 0)
91    return "nvidia";
92  else
93    return name;
94}
95
96static const char *
97name_of_acc_device_t (enum acc_device_t type)
98{
99  switch (type)
100    {
101    case acc_device_none: return "none";
102    case acc_device_default: return "default";
103    case acc_device_host: return "host";
104    case acc_device_not_host: return "not_host";
105    case acc_device_nvidia: return "nvidia";
106    default: gomp_fatal ("unknown device type %u", (unsigned) type);
107    }
108}
109
110/* ACC_DEVICE_LOCK must be held before calling this function.  If FAIL_IS_ERROR
111   is true, this function raises an error if there are no devices of type D,
112   otherwise it returns NULL in that case.  */
113
114static struct gomp_device_descr *
115resolve_device (acc_device_t d, bool fail_is_error)
116{
117  acc_device_t d_arg = d;
118
119  switch (d)
120    {
121    case acc_device_default:
122      {
123	if (goacc_device_type)
124	  {
125	    /* Lookup the named device.  */
126	    while (++d != _ACC_device_hwm)
127	      if (dispatchers[d]
128		  && !strcasecmp (goacc_device_type,
129				  get_openacc_name (dispatchers[d]->name))
130		  && dispatchers[d]->get_num_devices_func () > 0)
131		goto found;
132
133	    if (fail_is_error)
134	      {
135		gomp_mutex_unlock (&acc_device_lock);
136		gomp_fatal ("device type %s not supported", goacc_device_type);
137	      }
138	    else
139	      return NULL;
140	  }
141
142	/* No default device specified, so start scanning for any non-host
143	   device that is available.  */
144	d = acc_device_not_host;
145      }
146      /* FALLTHROUGH */
147
148    case acc_device_not_host:
149      /* Find the first available device after acc_device_not_host.  */
150      while (++d != _ACC_device_hwm)
151	if (dispatchers[d] && dispatchers[d]->get_num_devices_func () > 0)
152	  goto found;
153      if (d_arg == acc_device_default)
154	{
155	  d = acc_device_host;
156	  goto found;
157	}
158      if (fail_is_error)
159        {
160	  gomp_mutex_unlock (&acc_device_lock);
161	  gomp_fatal ("no device found");
162	}
163      else
164        return NULL;
165      break;
166
167    case acc_device_host:
168      break;
169
170    default:
171      if (d > _ACC_device_hwm)
172	{
173	  if (fail_is_error)
174	    goto unsupported_device;
175	  else
176	    return NULL;
177	}
178      break;
179    }
180 found:
181
182  assert (d != acc_device_none
183	  && d != acc_device_default
184	  && d != acc_device_not_host);
185
186  if (dispatchers[d] == NULL && fail_is_error)
187    {
188    unsupported_device:
189      gomp_mutex_unlock (&acc_device_lock);
190      gomp_fatal ("device type %s not supported", name_of_acc_device_t (d));
191    }
192
193  return dispatchers[d];
194}
195
196/* Emit a suitable error if no device of a particular type is available, or
197   the given device number is out-of-range.  */
198static void
199acc_dev_num_out_of_range (acc_device_t d, int ord, int ndevs)
200{
201  if (ndevs == 0)
202    gomp_fatal ("no devices of type %s available", name_of_acc_device_t (d));
203  else
204    gomp_fatal ("device %u out of range", ord);
205}
206
207/* This is called when plugins have been initialized, and serves to call
208   (indirectly) the target's device_init hook.  Calling multiple times without
209   an intervening acc_shutdown_1 call is an error.  ACC_DEVICE_LOCK must be
210   held before calling this function.  */
211
212static struct gomp_device_descr *
213acc_init_1 (acc_device_t d)
214{
215  struct gomp_device_descr *base_dev, *acc_dev;
216  int ndevs;
217
218  base_dev = resolve_device (d, true);
219
220  ndevs = base_dev->get_num_devices_func ();
221
222  if (ndevs <= 0 || goacc_device_num >= ndevs)
223    acc_dev_num_out_of_range (d, goacc_device_num, ndevs);
224
225  acc_dev = &base_dev[goacc_device_num];
226
227  gomp_mutex_lock (&acc_dev->lock);
228  if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
229    {
230      gomp_mutex_unlock (&acc_dev->lock);
231      gomp_fatal ("device already active");
232    }
233
234  gomp_init_device (acc_dev);
235  gomp_mutex_unlock (&acc_dev->lock);
236
237  return base_dev;
238}
239
240/* ACC_DEVICE_LOCK must be held before calling this function.  */
241
242static void
243acc_shutdown_1 (acc_device_t d)
244{
245  struct gomp_device_descr *base_dev;
246  struct goacc_thread *walk;
247  int ndevs, i;
248  bool devices_active = false;
249
250  /* Get the base device for this device type.  */
251  base_dev = resolve_device (d, true);
252
253  ndevs = base_dev->get_num_devices_func ();
254
255  /* Unload all the devices of this type that have been opened.  */
256  for (i = 0; i < ndevs; i++)
257    {
258      struct gomp_device_descr *acc_dev = &base_dev[i];
259
260      gomp_mutex_lock (&acc_dev->lock);
261      gomp_unload_device (acc_dev);
262      gomp_mutex_unlock (&acc_dev->lock);
263    }
264
265  gomp_mutex_lock (&goacc_thread_lock);
266
267  /* Free target-specific TLS data and close all devices.  */
268  for (walk = goacc_threads; walk != NULL; walk = walk->next)
269    {
270      if (walk->target_tls)
271	base_dev->openacc.destroy_thread_data_func (walk->target_tls);
272
273      walk->target_tls = NULL;
274
275      /* This would mean the user is shutting down OpenACC in the middle of an
276         "acc data" pragma.  Likely not intentional.  */
277      if (walk->mapped_data)
278	{
279	  gomp_mutex_unlock (&goacc_thread_lock);
280	  gomp_fatal ("shutdown in 'acc data' region");
281	}
282
283      /* Similarly, if this happens then user code has done something weird.  */
284      if (walk->saved_bound_dev)
285	{
286	  gomp_mutex_unlock (&goacc_thread_lock);
287	  gomp_fatal ("shutdown during host fallback");
288	}
289
290      if (walk->dev)
291	{
292	  gomp_mutex_lock (&walk->dev->lock);
293	  gomp_free_memmap (&walk->dev->mem_map);
294	  gomp_mutex_unlock (&walk->dev->lock);
295
296	  walk->dev = NULL;
297	  walk->base_dev = NULL;
298	}
299    }
300
301  gomp_mutex_unlock (&goacc_thread_lock);
302
303  /* Close all the devices of this type that have been opened.  */
304  bool ret = true;
305  for (i = 0; i < ndevs; i++)
306    {
307      struct gomp_device_descr *acc_dev = &base_dev[i];
308      gomp_mutex_lock (&acc_dev->lock);
309      if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
310        {
311	  devices_active = true;
312	  ret &= acc_dev->fini_device_func (acc_dev->target_id);
313	  acc_dev->state = GOMP_DEVICE_UNINITIALIZED;
314	}
315      gomp_mutex_unlock (&acc_dev->lock);
316    }
317
318  if (!ret)
319    gomp_fatal ("device finalization failed");
320
321  if (!devices_active)
322    gomp_fatal ("no device initialized");
323}
324
325static struct goacc_thread *
326goacc_new_thread (void)
327{
328  struct goacc_thread *thr = gomp_malloc (sizeof (struct goacc_thread));
329
330#if defined HAVE_TLS || defined USE_EMUTLS
331  goacc_tls_data = thr;
332#else
333  pthread_setspecific (goacc_tls_key, thr);
334#endif
335
336  pthread_setspecific (goacc_cleanup_key, thr);
337
338  gomp_mutex_lock (&goacc_thread_lock);
339  thr->next = goacc_threads;
340  goacc_threads = thr;
341  gomp_mutex_unlock (&goacc_thread_lock);
342
343  return thr;
344}
345
346static void
347goacc_destroy_thread (void *data)
348{
349  struct goacc_thread *thr = data, *walk, *prev;
350
351  gomp_mutex_lock (&goacc_thread_lock);
352
353  if (thr)
354    {
355      struct gomp_device_descr *acc_dev = thr->dev;
356
357      if (acc_dev && thr->target_tls)
358	{
359	  acc_dev->openacc.destroy_thread_data_func (thr->target_tls);
360	  thr->target_tls = NULL;
361	}
362
363      assert (!thr->mapped_data);
364
365      /* Remove from thread list.  */
366      for (prev = NULL, walk = goacc_threads; walk;
367	   prev = walk, walk = walk->next)
368	if (walk == thr)
369	  {
370	    if (prev == NULL)
371	      goacc_threads = walk->next;
372	    else
373	      prev->next = walk->next;
374
375	    free (thr);
376
377	    break;
378	  }
379
380      assert (walk);
381    }
382
383  gomp_mutex_unlock (&goacc_thread_lock);
384}
385
386/* Use the ORD'th device instance for the current host thread (or -1 for the
387   current global default).  The device (and the runtime) must be initialised
388   before calling this function.  */
389
390void
391goacc_attach_host_thread_to_device (int ord)
392{
393  struct goacc_thread *thr = goacc_thread ();
394  struct gomp_device_descr *acc_dev = NULL, *base_dev = NULL;
395  int num_devices;
396
397  if (thr && thr->dev && (thr->dev->target_id == ord || ord < 0))
398    return;
399
400  if (ord < 0)
401    ord = goacc_device_num;
402
403  /* Decide which type of device to use.  If the current thread has a device
404     type already (e.g. set by acc_set_device_type), use that, else use the
405     global default.  */
406  if (thr && thr->base_dev)
407    base_dev = thr->base_dev;
408  else
409    {
410      assert (cached_base_dev);
411      base_dev = cached_base_dev;
412    }
413
414  num_devices = base_dev->get_num_devices_func ();
415  if (num_devices <= 0 || ord >= num_devices)
416    acc_dev_num_out_of_range (acc_device_type (base_dev->type), ord,
417			      num_devices);
418
419  if (!thr)
420    thr = goacc_new_thread ();
421
422  thr->base_dev = base_dev;
423  thr->dev = acc_dev = &base_dev[ord];
424  thr->saved_bound_dev = NULL;
425  thr->mapped_data = NULL;
426
427  thr->target_tls
428    = acc_dev->openacc.create_thread_data_func (ord);
429
430  acc_dev->openacc.async_set_async_func (acc_async_sync);
431}
432
433/* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
434   init/shutdown is per-process or per-thread.  We choose per-process.  */
435
436void
437acc_init (acc_device_t d)
438{
439  gomp_init_targets_once ();
440
441  gomp_mutex_lock (&acc_device_lock);
442
443  cached_base_dev = acc_init_1 (d);
444
445  gomp_mutex_unlock (&acc_device_lock);
446
447  goacc_attach_host_thread_to_device (-1);
448}
449
450ialias (acc_init)
451
452void
453acc_shutdown (acc_device_t d)
454{
455  gomp_init_targets_once ();
456
457  gomp_mutex_lock (&acc_device_lock);
458
459  acc_shutdown_1 (d);
460
461  gomp_mutex_unlock (&acc_device_lock);
462}
463
464ialias (acc_shutdown)
465
466int
467acc_get_num_devices (acc_device_t d)
468{
469  int n = 0;
470  struct gomp_device_descr *acc_dev;
471
472  if (d == acc_device_none)
473    return 0;
474
475  gomp_init_targets_once ();
476
477  gomp_mutex_lock (&acc_device_lock);
478  acc_dev = resolve_device (d, false);
479  gomp_mutex_unlock (&acc_device_lock);
480
481  if (!acc_dev)
482    return 0;
483
484  n = acc_dev->get_num_devices_func ();
485  if (n < 0)
486    n = 0;
487
488  return n;
489}
490
491ialias (acc_get_num_devices)
492
493/* Set the device type for the current thread only (using the current global
494   default device number), initialising that device if necessary.  Also set the
495   default device type for new threads to D.  */
496
497void
498acc_set_device_type (acc_device_t d)
499{
500  struct gomp_device_descr *base_dev, *acc_dev;
501  struct goacc_thread *thr = goacc_thread ();
502
503  gomp_init_targets_once ();
504
505  gomp_mutex_lock (&acc_device_lock);
506
507  cached_base_dev = base_dev = resolve_device (d, true);
508  acc_dev = &base_dev[goacc_device_num];
509
510  gomp_mutex_lock (&acc_dev->lock);
511  if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
512    gomp_init_device (acc_dev);
513  gomp_mutex_unlock (&acc_dev->lock);
514
515  gomp_mutex_unlock (&acc_device_lock);
516
517  /* We're changing device type: invalidate the current thread's dev and
518     base_dev pointers.  */
519  if (thr && thr->base_dev != base_dev)
520    {
521      thr->base_dev = thr->dev = NULL;
522      if (thr->mapped_data)
523        gomp_fatal ("acc_set_device_type in 'acc data' region");
524    }
525
526  goacc_attach_host_thread_to_device (-1);
527}
528
529ialias (acc_set_device_type)
530
531acc_device_t
532acc_get_device_type (void)
533{
534  acc_device_t res = acc_device_none;
535  struct gomp_device_descr *dev;
536  struct goacc_thread *thr = goacc_thread ();
537
538  if (thr && thr->base_dev)
539    res = acc_device_type (thr->base_dev->type);
540  else
541    {
542      gomp_init_targets_once ();
543
544      gomp_mutex_lock (&acc_device_lock);
545      dev = resolve_device (acc_device_default, true);
546      gomp_mutex_unlock (&acc_device_lock);
547      res = acc_device_type (dev->type);
548    }
549
550  assert (res != acc_device_default
551	  && res != acc_device_not_host);
552
553  return res;
554}
555
556ialias (acc_get_device_type)
557
558int
559acc_get_device_num (acc_device_t d)
560{
561  const struct gomp_device_descr *dev;
562  struct goacc_thread *thr = goacc_thread ();
563
564  if (d >= _ACC_device_hwm)
565    gomp_fatal ("unknown device type %u", (unsigned) d);
566
567  gomp_init_targets_once ();
568
569  gomp_mutex_lock (&acc_device_lock);
570  dev = resolve_device (d, true);
571  gomp_mutex_unlock (&acc_device_lock);
572
573  if (thr && thr->base_dev == dev && thr->dev)
574    return thr->dev->target_id;
575
576  return goacc_device_num;
577}
578
579ialias (acc_get_device_num)
580
581void
582acc_set_device_num (int ord, acc_device_t d)
583{
584  struct gomp_device_descr *base_dev, *acc_dev;
585  int num_devices;
586
587  gomp_init_targets_once ();
588
589  if (ord < 0)
590    ord = goacc_device_num;
591
592  if ((int) d == 0)
593    /* Set whatever device is being used by the current host thread to use
594       device instance ORD.  It's unclear if this is supposed to affect other
595       host threads too (OpenACC 2.0 (3.2.4) acc_set_device_num).  */
596    goacc_attach_host_thread_to_device (ord);
597  else
598    {
599      gomp_mutex_lock (&acc_device_lock);
600
601      cached_base_dev = base_dev = resolve_device (d, true);
602
603      num_devices = base_dev->get_num_devices_func ();
604
605      if (num_devices <= 0 || ord >= num_devices)
606        acc_dev_num_out_of_range (d, ord, num_devices);
607
608      acc_dev = &base_dev[ord];
609
610      gomp_mutex_lock (&acc_dev->lock);
611      if (acc_dev->state == GOMP_DEVICE_UNINITIALIZED)
612        gomp_init_device (acc_dev);
613      gomp_mutex_unlock (&acc_dev->lock);
614
615      gomp_mutex_unlock (&acc_device_lock);
616
617      goacc_attach_host_thread_to_device (ord);
618    }
619
620  goacc_device_num = ord;
621}
622
623ialias (acc_set_device_num)
624
625/* For -O and higher, the compiler always attempts to expand acc_on_device, but
626   if the user disables the builtin, or calls it via a pointer, we'll need this
627   version.
628
629   Compile this with optimization, so that the compiler expands
630   this, rather than generating infinitely recursive code.  */
631
632int __attribute__ ((__optimize__ ("O2")))
633acc_on_device (acc_device_t dev)
634{
635  return __builtin_acc_on_device (dev);
636}
637
638ialias (acc_on_device)
639
640attribute_hidden void
641goacc_runtime_initialize (void)
642{
643  gomp_mutex_init (&acc_device_lock);
644
645#if !(defined HAVE_TLS || defined USE_EMUTLS)
646  pthread_key_create (&goacc_tls_key, NULL);
647#endif
648
649  pthread_key_create (&goacc_cleanup_key, goacc_destroy_thread);
650
651  cached_base_dev = NULL;
652
653  goacc_threads = NULL;
654  gomp_mutex_init (&goacc_thread_lock);
655
656  /* Initialize and register the 'host' device type.  */
657  goacc_host_init ();
658}
659
660/* Compiler helper functions */
661
662attribute_hidden void
663goacc_save_and_set_bind (acc_device_t d)
664{
665  struct goacc_thread *thr = goacc_thread ();
666
667  assert (!thr->saved_bound_dev);
668
669  thr->saved_bound_dev = thr->dev;
670  thr->dev = dispatchers[d];
671}
672
673attribute_hidden void
674goacc_restore_bind (void)
675{
676  struct goacc_thread *thr = goacc_thread ();
677
678  thr->dev = thr->saved_bound_dev;
679  thr->saved_bound_dev = NULL;
680}
681
682/* This is called from any OpenACC support function that may need to implicitly
683   initialize the libgomp runtime, either globally or from a new host thread.
684   On exit "goacc_thread" will return a valid & populated thread block.  */
685
686attribute_hidden void
687goacc_lazy_initialize (void)
688{
689  struct goacc_thread *thr = goacc_thread ();
690
691  if (thr && thr->dev)
692    return;
693
694  if (!cached_base_dev)
695    acc_init (acc_device_default);
696  else
697    goacc_attach_host_thread_to_device (-1);
698}
699