1/* OpenACC Runtime Library Definitions.
2
3   Copyright (C) 2013-2020 Free Software Foundation, Inc.
4
5   Contributed by Mentor Embedded.
6
7   This file is part of the GNU Offloading and Multi Processing Library
8   (libgomp).
9
10   Libgomp is free software; you can redistribute it and/or modify it
11   under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 3, or (at your option)
13   any later version.
14
15   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18   more details.
19
20   Under Section 7 of GPL version 3, you are granted additional
21   permissions described in the GCC Runtime Library Exception, version
22   3.1, as published by the Free Software Foundation.
23
24   You should have received a copy of the GNU General Public License and
25   a copy of the GCC Runtime Library Exception along with this program;
26   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27   <http://www.gnu.org/licenses/>.  */
28
29#include <assert.h>
30#include <string.h>
31#include "openacc.h"
32#include "libgomp.h"
33#include "oacc-int.h"
34
35static struct goacc_thread *
36get_goacc_thread (void)
37{
38  struct goacc_thread *thr = goacc_thread ();
39
40  if (!thr || !thr->dev)
41    gomp_fatal ("no device active");
42
43  return thr;
44}
45
46static int
47validate_async_val (int async)
48{
49  if (!async_valid_p (async))
50    gomp_fatal ("invalid async-argument: %d", async);
51
52  if (async == acc_async_sync)
53    return -1;
54
55  if (async == acc_async_noval)
56    return 0;
57
58  if (async >= 0)
59    /* TODO: we reserve 0 for acc_async_noval before we can clarify the
60       semantics of "default_async".  */
61    return 1 + async;
62  else
63    __builtin_unreachable ();
64}
65
66/* Return the asyncqueue to be used for OpenACC async-argument ASYNC.  This
67   might return NULL if no asyncqueue is to be used.  Otherwise, if CREATE,
68   create the asyncqueue if it doesn't exist yet.
69
70   Unless CREATE, this will not generate any OpenACC Profiling Interface
71   events.  */
72
73attribute_hidden struct goacc_asyncqueue *
74lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
75{
76  async = validate_async_val (async);
77  if (async < 0)
78    return NULL;
79
80  struct goacc_asyncqueue *ret_aq = NULL;
81  struct gomp_device_descr *dev = thr->dev;
82
83  gomp_mutex_lock (&dev->openacc.async.lock);
84
85  if (!create
86      && (async >= dev->openacc.async.nasyncqueue
87	  || !dev->openacc.async.asyncqueue[async]))
88    goto end;
89
90  if (async >= dev->openacc.async.nasyncqueue)
91    {
92      int diff = async + 1 - dev->openacc.async.nasyncqueue;
93      dev->openacc.async.asyncqueue
94	= gomp_realloc (dev->openacc.async.asyncqueue,
95			sizeof (goacc_aq) * (async + 1));
96      memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
97	      0, sizeof (goacc_aq) * diff);
98      dev->openacc.async.nasyncqueue = async + 1;
99    }
100
101  if (!dev->openacc.async.asyncqueue[async])
102    {
103      dev->openacc.async.asyncqueue[async]
104	= dev->openacc.async.construct_func (dev->target_id);
105
106      if (!dev->openacc.async.asyncqueue[async])
107	{
108	  gomp_mutex_unlock (&dev->openacc.async.lock);
109	  gomp_fatal ("async %d creation failed", async);
110	}
111
112      /* Link new async queue into active list.  */
113      goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
114      n->aq = dev->openacc.async.asyncqueue[async];
115      n->next = dev->openacc.async.active;
116      dev->openacc.async.active = n;
117    }
118
119  ret_aq = dev->openacc.async.asyncqueue[async];
120
121 end:
122  gomp_mutex_unlock (&dev->openacc.async.lock);
123  return ret_aq;
124}
125
126/* Return the asyncqueue to be used for OpenACC async-argument ASYNC.  This
127   might return NULL if no asyncqueue is to be used.  Otherwise, create the
128   asyncqueue if it doesn't exist yet.  */
129
130attribute_hidden struct goacc_asyncqueue *
131get_goacc_asyncqueue (int async)
132{
133  struct goacc_thread *thr = get_goacc_thread ();
134  return lookup_goacc_asyncqueue (thr, true, async);
135}
136
137int
138acc_async_test (int async)
139{
140  struct goacc_thread *thr = goacc_thread ();
141
142  if (!thr || !thr->dev)
143    gomp_fatal ("no device active");
144
145  goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
146  if (!aq)
147    return 1;
148
149  acc_prof_info prof_info;
150  acc_api_info api_info;
151  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
152  if (profiling_p)
153    {
154      prof_info.async = async;
155      prof_info.async_queue = prof_info.async;
156    }
157
158  int res = thr->dev->openacc.async.test_func (aq);
159
160  if (profiling_p)
161    {
162      thr->prof_info = NULL;
163      thr->api_info = NULL;
164    }
165
166  return res;
167}
168
169int
170acc_async_test_all (void)
171{
172  struct goacc_thread *thr = get_goacc_thread ();
173
174  acc_prof_info prof_info;
175  acc_api_info api_info;
176  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
177
178  int ret = 1;
179  gomp_mutex_lock (&thr->dev->openacc.async.lock);
180  for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
181    if (!thr->dev->openacc.async.test_func (l->aq))
182      {
183	ret = 0;
184	break;
185      }
186  gomp_mutex_unlock (&thr->dev->openacc.async.lock);
187
188  if (profiling_p)
189    {
190      thr->prof_info = NULL;
191      thr->api_info = NULL;
192    }
193
194  return ret;
195}
196
197void
198acc_wait (int async)
199{
200  struct goacc_thread *thr = get_goacc_thread ();
201
202  goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
203  if (!aq)
204    return;
205
206  acc_prof_info prof_info;
207  acc_api_info api_info;
208  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
209  if (profiling_p)
210    {
211      prof_info.async = async;
212      prof_info.async_queue = prof_info.async;
213    }
214
215  if (!thr->dev->openacc.async.synchronize_func (aq))
216    gomp_fatal ("wait on %d failed", async);
217
218  if (profiling_p)
219    {
220      thr->prof_info = NULL;
221      thr->api_info = NULL;
222    }
223}
224
225/* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait.  */
226#ifdef HAVE_ATTRIBUTE_ALIAS
227strong_alias (acc_wait, acc_async_wait)
228#else
229void
230acc_async_wait (int async)
231{
232  acc_wait (async);
233}
234#endif
235
236void
237acc_wait_async (int async1, int async2)
238{
239  struct goacc_thread *thr = get_goacc_thread ();
240
241  goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1);
242  /* TODO: Is this also correct for acc_async_sync, assuming that in this case,
243     we'll always be synchronous anyways?  */
244  if (!aq1)
245    return;
246
247  acc_prof_info prof_info;
248  acc_api_info api_info;
249  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
250  if (profiling_p)
251    {
252      prof_info.async = async2;
253      prof_info.async_queue = prof_info.async;
254    }
255
256  goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2);
257  /* An async queue is always synchronized with itself.  */
258  if (aq1 == aq2)
259    goto out_prof;
260
261  if (aq2)
262    {
263      if (!thr->dev->openacc.async.serialize_func (aq1, aq2))
264	gomp_fatal ("ordering of async ids %d and %d failed", async1, async2);
265    }
266  else
267    {
268      /* TODO: Local thread synchronization.
269	 Necessary for the "async2 == acc_async_sync" case, or can just skip?  */
270      if (!thr->dev->openacc.async.synchronize_func (aq1))
271	gomp_fatal ("wait on %d failed", async1);
272    }
273
274 out_prof:
275  if (profiling_p)
276    {
277      thr->prof_info = NULL;
278      thr->api_info = NULL;
279    }
280}
281
282void
283acc_wait_all (void)
284{
285  struct goacc_thread *thr = goacc_thread ();
286
287  acc_prof_info prof_info;
288  acc_api_info api_info;
289  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
290
291  bool ret = true;
292  gomp_mutex_lock (&thr->dev->openacc.async.lock);
293  for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
294    ret &= thr->dev->openacc.async.synchronize_func (l->aq);
295  gomp_mutex_unlock (&thr->dev->openacc.async.lock);
296
297  if (profiling_p)
298    {
299      thr->prof_info = NULL;
300      thr->api_info = NULL;
301    }
302
303  if (!ret)
304    gomp_fatal ("wait all failed");
305}
306
307/* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all.  */
308#ifdef HAVE_ATTRIBUTE_ALIAS
309strong_alias (acc_wait_all, acc_async_wait_all)
310#else
311void
312acc_async_wait_all (void)
313{
314  acc_wait_all ();
315}
316#endif
317
318void
319acc_wait_all_async (int async)
320{
321  struct goacc_thread *thr = get_goacc_thread ();
322
323  acc_prof_info prof_info;
324  acc_api_info api_info;
325  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
326  if (profiling_p)
327    {
328      prof_info.async = async;
329      prof_info.async_queue = prof_info.async;
330    }
331
332  goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async);
333
334  bool ret = true;
335  gomp_mutex_lock (&thr->dev->openacc.async.lock);
336  for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
337    {
338      if (waiting_queue)
339	ret &= thr->dev->openacc.async.serialize_func (l->aq, waiting_queue);
340      else
341	/* TODO: Local thread synchronization.
342	   Necessary for the "async2 == acc_async_sync" case, or can just skip?  */
343	ret &= thr->dev->openacc.async.synchronize_func (l->aq);
344    }
345  gomp_mutex_unlock (&thr->dev->openacc.async.lock);
346
347  if (profiling_p)
348    {
349      thr->prof_info = NULL;
350      thr->api_info = NULL;
351    }
352
353  if (!ret)
354    gomp_fatal ("wait all async(%d) failed", async);
355}
356
357void
358GOACC_wait (int async, int num_waits, ...)
359{
360  goacc_lazy_initialize ();
361
362  struct goacc_thread *thr = goacc_thread ();
363
364  /* No nesting.  */
365  assert (thr->prof_info == NULL);
366  assert (thr->api_info == NULL);
367  acc_prof_info prof_info;
368  acc_api_info api_info;
369  bool profiling_p = GOACC_PROFILING_SETUP_P (thr, &prof_info, &api_info);
370  if (profiling_p)
371    {
372      prof_info.async = async;
373      prof_info.async_queue = prof_info.async;
374    }
375
376  if (num_waits)
377    {
378      va_list ap;
379
380      va_start (ap, num_waits);
381      goacc_wait (async, num_waits, &ap);
382      va_end (ap);
383    }
384  else if (async == acc_async_sync)
385    acc_wait_all ();
386  else
387    acc_wait_all_async (async);
388
389  if (profiling_p)
390    {
391      thr->prof_info = NULL;
392      thr->api_info = NULL;
393    }
394}
395
396attribute_hidden void
397goacc_wait (int async, int num_waits, va_list *ap)
398{
399  while (num_waits--)
400    {
401      int qid = va_arg (*ap, int);
402
403      /* Waiting on ACC_ASYNC_NOVAL maps to 'wait all'.  */
404      if (qid == acc_async_noval)
405	{
406	  if (async == acc_async_sync)
407	    acc_wait_all ();
408	  else
409	    acc_wait_all_async (async);
410	  break;
411	}
412
413      if (acc_async_test (qid))
414	continue;
415
416      if (async == acc_async_sync)
417	acc_wait (qid);
418      else if (qid == async)
419	/* If we're waiting on the same asynchronous queue as we're
420	   launching on, the queue itself will order work as
421	   required, so there's no need to wait explicitly.  */
422	;
423      else
424	acc_wait_async (qid, async);
425    }
426}
427
428attribute_hidden void
429goacc_async_free (struct gomp_device_descr *devicep,
430		  struct goacc_asyncqueue *aq, void *ptr)
431{
432  if (!aq)
433    free (ptr);
434  else
435    devicep->openacc.async.queue_callback_func (aq, free, ptr);
436}
437
438/* This function initializes the asyncqueues for the device specified by
439   DEVICEP.  TODO DEVICEP must be locked on entry, and remains locked on
440   return.  */
441
442attribute_hidden void
443goacc_init_asyncqueues (struct gomp_device_descr *devicep)
444{
445  devicep->openacc.async.nasyncqueue = 0;
446  devicep->openacc.async.asyncqueue = NULL;
447  devicep->openacc.async.active = NULL;
448  gomp_mutex_init (&devicep->openacc.async.lock);
449}
450
451/* This function finalizes the asyncqueues for the device specified by DEVICEP.
452   TODO DEVICEP must be locked on entry, and remains locked on return.  */
453
454attribute_hidden bool
455goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
456{
457  bool ret = true;
458  gomp_mutex_lock (&devicep->openacc.async.lock);
459  if (devicep->openacc.async.nasyncqueue > 0)
460    {
461      goacc_aq_list next;
462      for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
463	{
464	  ret &= devicep->openacc.async.destruct_func (l->aq);
465	  next = l->next;
466	  free (l);
467	}
468      free (devicep->openacc.async.asyncqueue);
469      devicep->openacc.async.nasyncqueue = 0;
470      devicep->openacc.async.asyncqueue = NULL;
471      devicep->openacc.async.active = NULL;
472    }
473  gomp_mutex_unlock (&devicep->openacc.async.lock);
474  gomp_mutex_destroy (&devicep->openacc.async.lock);
475  return ret;
476}
477