1/* OpenACC Profiling Interface
2
3   Copyright (C) 2019-2020 Free Software Foundation, Inc.
4
5   Contributed by Mentor, a Siemens Business.
6
7   This file is part of the GNU Offloading and Multi Processing Library
8   (libgomp).
9
10   Libgomp is free software; you can redistribute it and/or modify it
11   under the terms of the GNU General Public License as published by
12   the Free Software Foundation; either version 3, or (at your option)
13   any later version.
14
15   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
18   more details.
19
20   Under Section 7 of GPL version 3, you are granted additional
21   permissions described in the GCC Runtime Library Exception, version
22   3.1, as published by the Free Software Foundation.
23
24   You should have received a copy of the GNU General Public License and
25   a copy of the GCC Runtime Library Exception along with this program;
26   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
27   <http://www.gnu.org/licenses/>.  */
28
29#define _GNU_SOURCE
30#include "libgomp.h"
31#include "oacc-int.h"
32#include "secure_getenv.h"
33#include "acc_prof.h"
34#include <assert.h>
35#ifdef HAVE_STRING_H
36# include <string.h>
37#endif
38#ifdef PLUGIN_SUPPORT
39# include <dlfcn.h>
40#endif
41
42#define STATIC_ASSERT(expr) _Static_assert (expr, "!(" #expr ")")
43
44/* Statically assert that the layout of the common fields in the
45   'acc_event_info' variants matches.  */
46/* 'event_type' */
47STATIC_ASSERT (offsetof (acc_event_info, event_type)
48	       == offsetof (acc_event_info, data_event.event_type));
49STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
50	       == offsetof (acc_event_info, launch_event.event_type));
51STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
52	       == offsetof (acc_event_info, other_event.event_type));
53/* 'valid_bytes' */
54STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
55	       == offsetof (acc_event_info, launch_event.valid_bytes));
56STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
57	       == offsetof (acc_event_info, other_event.valid_bytes));
58/* 'parent_construct' */
59STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
60	       == offsetof (acc_event_info, launch_event.parent_construct));
61STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
62	       == offsetof (acc_event_info, other_event.parent_construct));
63/* 'implicit' */
64STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
65	       == offsetof (acc_event_info, launch_event.implicit));
66STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
67	       == offsetof (acc_event_info, other_event.implicit));
68/* 'tool_info' */
69STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
70	       == offsetof (acc_event_info, launch_event.tool_info));
71STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
72	       == offsetof (acc_event_info, other_event.tool_info));
73
74struct goacc_prof_callback_entry
75{
76  acc_prof_callback cb;
77  int ref;
78  bool enabled;
79  struct goacc_prof_callback_entry *next;
80};
81
82/* Use a separate flag to minimize run-time performance impact for the (very
83   common) case that profiling is not enabled.
84
85   Once enabled, we're not going to disable this anymore, anywhere.  We
86   probably could, by adding appropriate logic to 'acc_prof_register',
87   'acc_prof_unregister'.  */
88bool goacc_prof_enabled = false;
89
90/* Global state for registered callbacks.
91   'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle.  */
92static bool goacc_prof_callbacks_enabled[acc_ev_last];
93static struct goacc_prof_callback_entry *goacc_prof_callback_entries[acc_ev_last];
94/* Lock used to protect access to 'goacc_prof_callbacks_enabled', and
95   'goacc_prof_callback_entries'.  */
96static gomp_mutex_t goacc_prof_lock;
97
98void
99goacc_profiling_initialize (void)
100{
101  gomp_mutex_init (&goacc_prof_lock);
102
103  /* Initially, all callbacks for all events are enabled.  */
104  for (int i = 0; i < acc_ev_last; ++i)
105    goacc_prof_callbacks_enabled[i] = true;
106
107
108#ifdef PLUGIN_SUPPORT
109  char *acc_proflibs = secure_getenv ("ACC_PROFLIB");
110  while (acc_proflibs != NULL && acc_proflibs[0] != '\0')
111    {
112      char *acc_proflibs_sep = strchr (acc_proflibs, ';');
113      char *acc_proflib;
114      if (acc_proflibs_sep == acc_proflibs)
115	{
116	  /* Stray ';' separator: make sure we don't 'dlopen' the main
117	     program.  */
118	  acc_proflib = NULL;
119	}
120      else
121	{
122	  if (acc_proflibs_sep != NULL)
123	    {
124	      /* Single out the first library.  */
125	      acc_proflib = gomp_malloc (acc_proflibs_sep - acc_proflibs + 1);
126	      memcpy (acc_proflib, acc_proflibs,
127		      acc_proflibs_sep - acc_proflibs);
128	      acc_proflib[acc_proflibs_sep - acc_proflibs] = '\0';
129	    }
130	  else
131	    {
132	      /* No ';' separator, so only one library.  */
133	      acc_proflib = acc_proflibs;
134	    }
135
136	  gomp_debug (0, "%s: dlopen (\"%s\")\n", __FUNCTION__, acc_proflib);
137	  void *dl_handle = dlopen (acc_proflib, RTLD_LAZY);
138	  if (dl_handle != NULL)
139	    {
140	      typeof (&acc_register_library) a_r_l
141		= dlsym (dl_handle, "acc_register_library");
142	      if (a_r_l == NULL)
143		goto dl_fail;
144	      gomp_debug (0, "  %s: calling %s:acc_register_library\n",
145			  __FUNCTION__, acc_proflib);
146	      a_r_l (acc_prof_register, acc_prof_unregister,
147		     acc_prof_lookup);
148	    }
149	  else
150	    {
151	    dl_fail:
152	      gomp_error ("while loading ACC_PROFLIB \"%s\": %s",
153			  acc_proflib, dlerror ());
154	      if (dl_handle != NULL)
155		{
156		  int err = dlclose (dl_handle);
157		  dl_handle = NULL;
158		  if (err != 0)
159		    goto dl_fail;
160		}
161	    }
162	}
163
164      if (acc_proflib != acc_proflibs)
165	{
166	  free (acc_proflib);
167
168	  acc_proflibs = acc_proflibs_sep + 1;
169	}
170      else
171	acc_proflibs = NULL;
172    }
173#endif /* PLUGIN_SUPPORT */
174}
175
176void
177acc_prof_register (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
178{
179  gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
180	      __FUNCTION__, (int) ev, (void *) cb, (int) reg);
181
182
183  /* For any events to be dispatched, the user first has to register a
184     callback, which makes this here a good place for enabling the whole
185     machinery.  */
186  if (!GOACC_PROF_ENABLED)
187    __atomic_store_n (&goacc_prof_enabled, true, MEMMODEL_RELEASE);
188
189
190  enum
191  {
192    EVENT_KIND_BOGUS,
193    EVENT_KIND_NORMAL,
194    /* As end events invoke callbacks in the reverse order, we register these
195       in the reverse order here.  */
196    EVENT_KIND_END,
197  } event_kind = EVENT_KIND_BOGUS;
198  switch (ev)
199    {
200    case acc_ev_none:
201    case acc_ev_device_init_start:
202    case acc_ev_device_shutdown_start:
203    case acc_ev_runtime_shutdown:
204    case acc_ev_create:
205    case acc_ev_delete:
206    case acc_ev_alloc:
207    case acc_ev_free:
208    case acc_ev_enter_data_start:
209    case acc_ev_exit_data_start:
210    case acc_ev_update_start:
211    case acc_ev_compute_construct_start:
212    case acc_ev_enqueue_launch_start:
213    case acc_ev_enqueue_upload_start:
214    case acc_ev_enqueue_download_start:
215    case acc_ev_wait_start:
216      event_kind = EVENT_KIND_NORMAL;
217      break;
218    case acc_ev_device_init_end:
219    case acc_ev_device_shutdown_end:
220    case acc_ev_enter_data_end:
221    case acc_ev_exit_data_end:
222    case acc_ev_update_end:
223    case acc_ev_compute_construct_end:
224    case acc_ev_enqueue_launch_end:
225    case acc_ev_enqueue_upload_end:
226    case acc_ev_enqueue_download_end:
227    case acc_ev_wait_end:
228      event_kind = EVENT_KIND_END;
229      break;
230    case acc_ev_last:
231      break;
232    }
233  if (event_kind == EVENT_KIND_BOGUS)
234    {
235      /* Silently ignore.  */
236      gomp_debug (0, "  ignoring request for bogus 'acc_event_t'\n");
237      return;
238    }
239
240  bool bogus = true;
241  switch (reg)
242    {
243    case acc_reg:
244    case acc_toggle:
245    case acc_toggle_per_thread:
246      bogus = false;
247      break;
248    }
249  if (bogus)
250    {
251      /* Silently ignore.  */
252      gomp_debug (0, "  ignoring request with bogus 'acc_register_t'\n");
253      return;
254    }
255
256  /* Special cases.  */
257  if (reg == acc_toggle)
258    {
259      if (cb == NULL)
260	{
261	  gomp_debug (0, "  globally enabling callbacks\n");
262	  gomp_mutex_lock (&goacc_prof_lock);
263	  /* For 'acc_ev_none', this acts as a global toggle.  */
264	  goacc_prof_callbacks_enabled[ev] = true;
265	  gomp_mutex_unlock (&goacc_prof_lock);
266	  return;
267	}
268      else if (ev == acc_ev_none && cb != NULL)
269	{
270	  gomp_debug (0, "  ignoring request\n");
271	  return;
272	}
273    }
274  else if (reg == acc_toggle_per_thread)
275    {
276      if (ev == acc_ev_none && cb == NULL)
277	{
278	  gomp_debug (0, "  thread: enabling callbacks\n");
279	  goacc_lazy_initialize ();
280	  struct goacc_thread *thr = goacc_thread ();
281	  thr->prof_callbacks_enabled = true;
282	  return;
283	}
284      /* Silently ignore.  */
285      gomp_debug (0, "  ignoring bogus request\n");
286      return;
287    }
288
289  gomp_mutex_lock (&goacc_prof_lock);
290
291  struct goacc_prof_callback_entry *it, *it_p;
292  it = goacc_prof_callback_entries[ev];
293  it_p = NULL;
294  while (it)
295    {
296      if (it->cb == cb)
297	break;
298      it_p = it;
299      it = it->next;
300    }
301
302  switch (reg)
303    {
304    case acc_reg:
305      /* If we already have this callback registered, just increment its
306	 reference count.  */
307      if (it != NULL)
308	{
309	  it->ref++;
310	  gomp_debug (0, "  already registered;"
311		      " incrementing reference count to: %d\n", it->ref);
312	}
313      else
314	{
315	  struct goacc_prof_callback_entry *e
316	    = gomp_malloc (sizeof (struct goacc_prof_callback_entry));
317	  e->cb = cb;
318	  e->ref = 1;
319	  e->enabled = true;
320	  bool prepend = (event_kind == EVENT_KIND_END);
321	  /* If we don't have any callback registered yet, also use the
322	     'prepend' code path.  */
323	  if (it_p == NULL)
324	    prepend = true;
325	  if (prepend)
326	    {
327	      gomp_debug (0, "  prepending\n");
328	      e->next = goacc_prof_callback_entries[ev];
329	      goacc_prof_callback_entries[ev] = e;
330	    }
331	  else
332	    {
333	      gomp_debug (0, "  appending\n");
334	      e->next = NULL;
335	      it_p->next = e;
336	    }
337	}
338      break;
339
340    case acc_toggle:
341      if (it == NULL)
342	{
343	  gomp_debug (0, "  ignoring request: is not registered\n");
344	  break;
345	}
346      else
347	{
348	  gomp_debug (0, "  enabling\n");
349	  it->enabled = true;
350	}
351      break;
352
353    case acc_toggle_per_thread:
354      __builtin_unreachable ();
355    }
356
357  gomp_mutex_unlock (&goacc_prof_lock);
358}
359
360void
361acc_prof_unregister (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
362{
363  gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
364	      __FUNCTION__, (int) ev, (void *) cb, (int) reg);
365
366  /* If profiling is not enabled, there cannot be anything to unregister.  */
367  if (!GOACC_PROF_ENABLED)
368    return;
369
370  if (ev < acc_ev_none
371      || ev >= acc_ev_last)
372    {
373      /* Silently ignore.  */
374      gomp_debug (0, "  ignoring request for bogus 'acc_event_t'\n");
375      return;
376    }
377
378  bool bogus = true;
379  switch (reg)
380    {
381    case acc_reg:
382    case acc_toggle:
383    case acc_toggle_per_thread:
384      bogus = false;
385      break;
386    }
387  if (bogus)
388    {
389      /* Silently ignore.  */
390      gomp_debug (0, "  ignoring request with bogus 'acc_register_t'\n");
391      return;
392    }
393
394  /* Special cases.  */
395  if (reg == acc_toggle)
396    {
397      if (cb == NULL)
398	{
399	  gomp_debug (0, "  globally disabling callbacks\n");
400	  gomp_mutex_lock (&goacc_prof_lock);
401	  /* For 'acc_ev_none', this acts as a global toggle.  */
402	  goacc_prof_callbacks_enabled[ev] = false;
403	  gomp_mutex_unlock (&goacc_prof_lock);
404	  return;
405	}
406      else if (ev == acc_ev_none && cb != NULL)
407	{
408	  gomp_debug (0, "  ignoring request\n");
409	  return;
410	}
411    }
412  else if (reg == acc_toggle_per_thread)
413    {
414      if (ev == acc_ev_none && cb == NULL)
415	{
416	  gomp_debug (0, "  thread: disabling callbacks\n");
417	  goacc_lazy_initialize ();
418	  struct goacc_thread *thr = goacc_thread ();
419	  thr->prof_callbacks_enabled = false;
420	  return;
421	}
422      /* Silently ignore.  */
423      gomp_debug (0, "  ignoring bogus request\n");
424      return;
425    }
426
427  gomp_mutex_lock (&goacc_prof_lock);
428
429  struct goacc_prof_callback_entry *it, *it_p;
430  it = goacc_prof_callback_entries[ev];
431  it_p = NULL;
432  while (it)
433    {
434      if (it->cb == cb)
435	break;
436      it_p = it;
437      it = it->next;
438    }
439
440  switch (reg)
441    {
442    case acc_reg:
443      if (it == NULL)
444	{
445	  /* Silently ignore.  */
446	  gomp_debug (0, "  ignoring bogus request: is not registered\n");
447	  break;
448	}
449      it->ref--;
450      gomp_debug (0, "  decrementing reference count to: %d\n", it->ref);
451      if (it->ref == 0)
452	{
453	  if (it_p == NULL)
454	    goacc_prof_callback_entries[ev] = it->next;
455	  else
456	    it_p->next = it->next;
457	  free (it);
458	}
459      break;
460
461    case acc_toggle:
462      if (it == NULL)
463	{
464	  gomp_debug (0, "  ignoring request: is not registered\n");
465	  break;
466	}
467      else
468	{
469	  gomp_debug (0, "  disabling\n");
470	  it->enabled = false;
471	}
472      break;
473
474    case acc_toggle_per_thread:
475      __builtin_unreachable ();
476    }
477
478  gomp_mutex_unlock (&goacc_prof_lock);
479}
480
481acc_query_fn
482acc_prof_lookup (const char *name)
483{
484  gomp_debug (0, "%s (%s)\n",
485	      __FUNCTION__, name ?: "NULL");
486
487  return NULL;
488}
489
490void
491acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
492		      acc_prof_lookup_func lookup)
493{
494  gomp_fatal ("TODO");
495}
496
497/* Prepare to dispatch events?  */
498
499bool
500_goacc_profiling_dispatch_p (bool check_not_nested_p)
501{
502  gomp_debug (0, "%s\n", __FUNCTION__);
503
504  bool ret;
505
506  struct goacc_thread *thr = goacc_thread ();
507  if (__builtin_expect (thr == NULL, false))
508    {
509      /* If we don't have any per-thread state yet, that means that per-thread
510	 callback dispatch has not been explicitly disabled (which only a call
511	 to 'acc_prof_unregister' with 'acc_toggle_per_thread' would do, and
512	 that would have allocated per-thread state via
513	 'goacc_lazy_initialize'); initially, all callbacks for all events are
514	 enabled.  */
515      gomp_debug (0, "  %s: don't have any per-thread state yet\n", __FUNCTION__);
516    }
517  else
518    {
519      if (check_not_nested_p)
520	{
521	  /* No nesting.  */
522	  assert (thr->prof_info == NULL);
523	  assert (thr->api_info == NULL);
524	}
525
526      if (__builtin_expect (!thr->prof_callbacks_enabled, true))
527	{
528	  gomp_debug (0, "  %s: disabled for this thread\n", __FUNCTION__);
529	  ret = false;
530	  goto out;
531	}
532    }
533
534  gomp_mutex_lock (&goacc_prof_lock);
535
536  /* 'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle.  */
537  if (__builtin_expect (!goacc_prof_callbacks_enabled[acc_ev_none], true))
538    {
539      gomp_debug (0, "  %s: disabled globally\n", __FUNCTION__);
540      ret = false;
541      goto out_unlock;
542    }
543  else
544    ret = true;
545
546 out_unlock:
547  gomp_mutex_unlock (&goacc_prof_lock);
548
549 out:
550  return ret;
551}
552
553/* Set up to dispatch events?  */
554
555bool
556_goacc_profiling_setup_p (struct goacc_thread *thr,
557			  acc_prof_info *prof_info, acc_api_info *api_info)
558{
559  gomp_debug (0, "%s (%p)\n", __FUNCTION__, thr);
560
561  /* If we don't have any per-thread state yet, we can't register 'prof_info'
562     and 'api_info'.  */
563  if (__builtin_expect (thr == NULL, false))
564    {
565      gomp_debug (0, "Can't dispatch OpenACC Profiling Interface events for"
566		  " the current call, construct, or directive\n");
567      return false;
568    }
569
570  if (thr->prof_info != NULL)
571    {
572      /* Profiling has already been set up for an outer construct.  In this
573	 case, we continue to use the existing information, and thus return
574	 'false' here.
575
576	 This can happen, for example, for an 'enter data' directive, which
577	 sets up profiling, then calls into 'acc_copyin', which should not
578	 again set up profiling, should not overwrite the existing
579	 information.  */
580      return false;
581    }
582
583  thr->prof_info = prof_info;
584  thr->api_info = api_info;
585
586  /* Fill in some defaults.  */
587
588  prof_info->event_type = -1; /* Must be set later.  */
589  prof_info->valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
590  prof_info->version = _ACC_PROF_INFO_VERSION;
591  if (thr->dev)
592    {
593      prof_info->device_type = acc_device_type (thr->dev->type);
594      prof_info->device_number = thr->dev->target_id;
595    }
596  else
597    {
598      prof_info->device_type = -1;
599      prof_info->device_number = -1;
600    }
601  prof_info->thread_id = -1;
602  prof_info->async = acc_async_sync;
603  prof_info->async_queue = prof_info->async;
604  prof_info->src_file = NULL;
605  prof_info->func_name = NULL;
606  prof_info->line_no = -1;
607  prof_info->end_line_no = -1;
608  prof_info->func_line_no = -1;
609  prof_info->func_end_line_no = -1;
610
611  api_info->device_api = acc_device_api_none;
612  api_info->valid_bytes = _ACC_API_INFO_VALID_BYTES;
613  api_info->device_type = prof_info->device_type;
614  api_info->vendor = -1;
615  api_info->device_handle = NULL;
616  api_info->context_handle = NULL;
617  api_info->async_handle = NULL;
618
619  return true;
620}
621
622/* Dispatch events.
623
624   This must only be called if 'GOACC_PROFILING_DISPATCH_P' or
625   'GOACC_PROFILING_SETUP_P' returned a true result.  */
626
627void
628goacc_profiling_dispatch (acc_prof_info *prof_info, acc_event_info *event_info,
629			  acc_api_info *apt_info)
630{
631  acc_event_t event_type = event_info->event_type;
632  gomp_debug (0, "%s: event_type=%d\n", __FUNCTION__, (int) event_type);
633  assert (event_type > acc_ev_none
634	  && event_type < acc_ev_last);
635
636  gomp_mutex_lock (&goacc_prof_lock);
637
638  if (!goacc_prof_callbacks_enabled[event_type])
639    {
640      gomp_debug (0, "  disabled for this event type\n");
641
642      goto out_unlock;
643    }
644
645  for (struct goacc_prof_callback_entry *e
646	 = goacc_prof_callback_entries[event_type];
647       e != NULL;
648       e = e->next)
649    {
650      if (!e->enabled)
651	{
652	  gomp_debug (0, "  disabled for callback %p\n", e->cb);
653	  continue;
654	}
655
656      gomp_debug (0, "  calling callback %p\n", e->cb);
657      e->cb (prof_info, event_info, apt_info);
658    }
659
660 out_unlock:
661  gomp_mutex_unlock (&goacc_prof_lock);
662}
663