1/* Copyright (C) 2017-2020 Free Software Foundation, Inc.
2   Contributed by Mentor Embedded.
3
4   This file is part of the GNU Offloading and Multi Processing Library
5   (libgomp).
6
7   Libgomp is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15   more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26/* This file handles maintenance of threads on AMD GCN.  */
27
28#include "libgomp.h"
29#include <stdlib.h>
30#include <string.h>
31
32static void gomp_thread_start (struct gomp_thread_pool *);
33
34/* This externally visible function handles target region entry.  It
35   sets up a per-team thread pool and transfers control by returning to
36   the kernel in the master thread or gomp_thread_start in other threads.
37
38   The name of this function is part of the interface with the compiler: for
39   each OpenMP kernel the compiler configures the stack, then calls here.
40
41   Likewise, gomp_gcn_exit_kernel is called during the kernel epilogue.  */
42
43void
44gomp_gcn_enter_kernel (void)
45{
46  int threadid = __builtin_gcn_dim_pos (1);
47
48  if (threadid == 0)
49    {
50      int numthreads = __builtin_gcn_dim_size (1);
51      int teamid = __builtin_gcn_dim_pos(0);
52
53      /* Set up the global state.
54	 Every team will do this, but that should be harmless.  */
55      gomp_global_icv.nthreads_var = 16;
56      gomp_global_icv.thread_limit_var = numthreads;
57      /* Starting additional threads is not supported.  */
58      gomp_global_icv.dyn_var = true;
59
60      /* Initialize the team arena for optimized memory allocation.
61         The arena has been allocated on the host side, and the address
62         passed in via the kernargs.  Each team takes a small slice of it.  */
63      register void **kernargs asm("s8");
64      void *team_arena = (kernargs[4] + TEAM_ARENA_SIZE*teamid);
65      void * __lds *arena_start = (void * __lds *)TEAM_ARENA_START;
66      void * __lds *arena_free = (void * __lds *)TEAM_ARENA_FREE;
67      void * __lds *arena_end = (void * __lds *)TEAM_ARENA_END;
68      *arena_start = team_arena;
69      *arena_free = team_arena;
70      *arena_end = team_arena + TEAM_ARENA_SIZE;
71
72      /* Allocate and initialize the team-local-storage data.  */
73      struct gomp_thread *thrs = team_malloc_cleared (sizeof (*thrs)
74						      * numthreads);
75      set_gcn_thrs (thrs);
76
77      /* Allocate and initialize a pool of threads in the team.
78         The threads are already running, of course, we just need to manage
79         the communication between them.  */
80      struct gomp_thread_pool *pool = team_malloc (sizeof (*pool));
81      pool->threads = team_malloc (sizeof (void *) * numthreads);
82      for (int tid = 0; tid < numthreads; tid++)
83	pool->threads[tid] = &thrs[tid];
84      pool->threads_size = numthreads;
85      pool->threads_used = numthreads;
86      pool->threads_busy = 1;
87      pool->last_team = NULL;
88      gomp_simple_barrier_init (&pool->threads_dock, numthreads);
89      thrs->thread_pool = pool;
90
91      asm ("s_barrier" ::: "memory");
92      return;  /* Return to kernel.  */
93    }
94  else
95    {
96      asm ("s_barrier" ::: "memory");
97      gomp_thread_start (gcn_thrs ()[0].thread_pool);
98      /* gomp_thread_start does not return.  */
99    }
100}
101
102void
103gomp_gcn_exit_kernel (void)
104{
105  gomp_free_thread (gcn_thrs ());
106  team_free (gcn_thrs ());
107}
108
109/* This function contains the idle loop in which a thread waits
110   to be called up to become part of a team.  */
111
112static void
113gomp_thread_start (struct gomp_thread_pool *pool)
114{
115  struct gomp_thread *thr = gomp_thread ();
116
117  gomp_sem_init (&thr->release, 0);
118  thr->thread_pool = pool;
119
120  /* The loop exits only when "fn" is assigned "gomp_free_pool_helper",
121     which contains "s_endpgm", or an infinite no-op loop is
122     suspected (this happens when the thread master crashes).  */
123  int nul_limit = 99;
124  do
125    {
126      gomp_simple_barrier_wait (&pool->threads_dock);
127      if (!thr->fn)
128	{
129	  if (nul_limit-- > 0)
130	    continue;
131	  else
132	    {
133	      const char msg[] = ("team master not responding;"
134				  " slave thread aborting");
135	      write (2, msg, sizeof (msg)-1);
136	      abort();
137	    }
138	}
139      thr->fn (thr->data);
140      thr->fn = NULL;
141
142      struct gomp_task *task = thr->task;
143      gomp_team_barrier_wait_final (&thr->ts.team->barrier);
144      gomp_finish_task (task);
145    }
146  while (1);
147}
148
149/* Launch a team.  */
150
151void
152gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
153		 unsigned flags, struct gomp_team *team,
154		 struct gomp_taskgroup *taskgroup)
155{
156  struct gomp_thread *thr, *nthr;
157  struct gomp_task *task;
158  struct gomp_task_icv *icv;
159  struct gomp_thread_pool *pool;
160  unsigned long nthreads_var;
161
162  thr = gomp_thread ();
163  pool = thr->thread_pool;
164  task = thr->task;
165  icv = task ? &task->icv : &gomp_global_icv;
166
167  /* Always save the previous state, even if this isn't a nested team.
168     In particular, we should save any work share state from an outer
169     orphaned work share construct.  */
170  team->prev_ts = thr->ts;
171
172  thr->ts.team = team;
173  thr->ts.team_id = 0;
174  ++thr->ts.level;
175  if (nthreads > 1)
176    ++thr->ts.active_level;
177  thr->ts.work_share = &team->work_shares[0];
178  thr->ts.last_work_share = NULL;
179  thr->ts.single_count = 0;
180  thr->ts.static_trip = 0;
181  thr->task = &team->implicit_task[0];
182  nthreads_var = icv->nthreads_var;
183  gomp_init_task (thr->task, task, icv);
184  team->implicit_task[0].icv.nthreads_var = nthreads_var;
185  team->implicit_task[0].taskgroup = taskgroup;
186
187  if (nthreads == 1)
188    return;
189
190  /* Release existing idle threads.  */
191  for (unsigned i = 1; i < nthreads; ++i)
192    {
193      nthr = pool->threads[i];
194      nthr->ts.team = team;
195      nthr->ts.work_share = &team->work_shares[0];
196      nthr->ts.last_work_share = NULL;
197      nthr->ts.team_id = i;
198      nthr->ts.level = team->prev_ts.level + 1;
199      nthr->ts.active_level = thr->ts.active_level;
200      nthr->ts.single_count = 0;
201      nthr->ts.static_trip = 0;
202      nthr->task = &team->implicit_task[i];
203      gomp_init_task (nthr->task, task, icv);
204      team->implicit_task[i].icv.nthreads_var = nthreads_var;
205      team->implicit_task[i].taskgroup = taskgroup;
206      nthr->fn = fn;
207      nthr->data = data;
208      team->ordered_release[i] = &nthr->release;
209    }
210
211  gomp_simple_barrier_wait (&pool->threads_dock);
212}
213
214#include "../../team.c"
215