1/* Copyright (C) 2005-2020 Free Software Foundation, Inc.
2   Contributed by Richard Henderson <rth@redhat.com>.
3
4   This file is part of the GNU Offloading and Multi Processing Library
5   (libgomp).
6
7   Libgomp is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15   more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26/* This file contains routines to manage the work-share queue for a team
27   of threads.  */
28
29#include "libgomp.h"
30#include <stddef.h>
31#include <stdlib.h>
32#include <string.h>
33
34
35/* Allocate a new work share structure, preferably from current team's
36   free gomp_work_share cache.  */
37
38static struct gomp_work_share *
39alloc_work_share (struct gomp_team *team)
40{
41  struct gomp_work_share *ws;
42  unsigned int i;
43
44  /* This is called in a critical section.  */
45  if (team->work_share_list_alloc != NULL)
46    {
47      ws = team->work_share_list_alloc;
48      team->work_share_list_alloc = ws->next_free;
49      return ws;
50    }
51
52#ifdef HAVE_SYNC_BUILTINS
53  ws = team->work_share_list_free;
54  /* We need atomic read from work_share_list_free,
55     as free_work_share can be called concurrently.  */
56  __asm ("" : "+r" (ws));
57
58  if (ws && ws->next_free)
59    {
60      struct gomp_work_share *next = ws->next_free;
61      ws->next_free = NULL;
62      team->work_share_list_alloc = next->next_free;
63      return next;
64    }
65#else
66  gomp_mutex_lock (&team->work_share_list_free_lock);
67  ws = team->work_share_list_free;
68  if (ws)
69    {
70      team->work_share_list_alloc = ws->next_free;
71      team->work_share_list_free = NULL;
72      gomp_mutex_unlock (&team->work_share_list_free_lock);
73      return ws;
74    }
75  gomp_mutex_unlock (&team->work_share_list_free_lock);
76#endif
77
78  team->work_share_chunk *= 2;
79  /* Allocating gomp_work_share structures aligned is just an
80     optimization, don't do it when using the fallback method.  */
81#ifdef GOMP_HAVE_EFFICIENT_ALIGNED_ALLOC
82  ws = gomp_aligned_alloc (__alignof (struct gomp_work_share),
83			   team->work_share_chunk
84			   * sizeof (struct gomp_work_share));
85#else
86  ws = gomp_malloc (team->work_share_chunk * sizeof (struct gomp_work_share));
87#endif
88  ws->next_alloc = team->work_shares[0].next_alloc;
89  team->work_shares[0].next_alloc = ws;
90  team->work_share_list_alloc = &ws[1];
91  for (i = 1; i < team->work_share_chunk - 1; i++)
92    ws[i].next_free = &ws[i + 1];
93  ws[i].next_free = NULL;
94  return ws;
95}
96
97/* Initialize an already allocated struct gomp_work_share.
98   This shouldn't touch the next_alloc field.  */
99
100void
101gomp_init_work_share (struct gomp_work_share *ws, size_t ordered,
102		      unsigned nthreads)
103{
104  gomp_mutex_init (&ws->lock);
105  if (__builtin_expect (ordered, 0))
106    {
107#define INLINE_ORDERED_TEAM_IDS_SIZE \
108  (sizeof (struct gomp_work_share) \
109   - offsetof (struct gomp_work_share, inline_ordered_team_ids))
110
111      if (__builtin_expect (ordered != 1, 0))
112	{
113	  size_t o = nthreads * sizeof (*ws->ordered_team_ids);
114	  o += __alignof__ (long long) - 1;
115	  if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
116	       & (__alignof__ (long long) - 1)) == 0)
117	    o &= ~(__alignof__ (long long) - 1);
118	  ordered += o - 1;
119	}
120      else
121	ordered = nthreads * sizeof (*ws->ordered_team_ids);
122      if (ordered > INLINE_ORDERED_TEAM_IDS_SIZE)
123	ws->ordered_team_ids = team_malloc (ordered);
124      else
125	ws->ordered_team_ids = ws->inline_ordered_team_ids;
126      memset (ws->ordered_team_ids, '\0', ordered);
127      ws->ordered_num_used = 0;
128      ws->ordered_owner = -1;
129      ws->ordered_cur = 0;
130    }
131  else
132    ws->ordered_team_ids = ws->inline_ordered_team_ids;
133  gomp_ptrlock_init (&ws->next_ws, NULL);
134  ws->threads_completed = 0;
135}
136
137/* Do any needed destruction of gomp_work_share fields before it
138   is put back into free gomp_work_share cache or freed.  */
139
140void
141gomp_fini_work_share (struct gomp_work_share *ws)
142{
143  gomp_mutex_destroy (&ws->lock);
144  if (ws->ordered_team_ids != ws->inline_ordered_team_ids)
145    team_free (ws->ordered_team_ids);
146  gomp_ptrlock_destroy (&ws->next_ws);
147}
148
149/* Free a work share struct, if not orphaned, put it into current
150   team's free gomp_work_share cache.  */
151
152static inline void
153free_work_share (struct gomp_team *team, struct gomp_work_share *ws)
154{
155  gomp_fini_work_share (ws);
156  if (__builtin_expect (team == NULL, 0))
157    free (ws);
158  else
159    {
160      struct gomp_work_share *next_ws;
161#ifdef HAVE_SYNC_BUILTINS
162      do
163	{
164	  next_ws = team->work_share_list_free;
165	  ws->next_free = next_ws;
166	}
167      while (!__sync_bool_compare_and_swap (&team->work_share_list_free,
168					    next_ws, ws));
169#else
170      gomp_mutex_lock (&team->work_share_list_free_lock);
171      next_ws = team->work_share_list_free;
172      ws->next_free = next_ws;
173      team->work_share_list_free = ws;
174      gomp_mutex_unlock (&team->work_share_list_free_lock);
175#endif
176    }
177}
178
179/* The current thread is ready to begin the next work sharing construct.
180   In all cases, thr->ts.work_share is updated to point to the new
181   structure.  In all cases the work_share lock is locked.  Return true
182   if this was the first thread to reach this point.  */
183
184bool
185gomp_work_share_start (size_t ordered)
186{
187  struct gomp_thread *thr = gomp_thread ();
188  struct gomp_team *team = thr->ts.team;
189  struct gomp_work_share *ws;
190
191  /* Work sharing constructs can be orphaned.  */
192  if (team == NULL)
193    {
194      ws = gomp_malloc (sizeof (*ws));
195      gomp_init_work_share (ws, ordered, 1);
196      thr->ts.work_share = ws;
197      return true;
198    }
199
200  ws = thr->ts.work_share;
201  thr->ts.last_work_share = ws;
202  ws = gomp_ptrlock_get (&ws->next_ws);
203  if (ws == NULL)
204    {
205      /* This thread encountered a new ws first.  */
206      struct gomp_work_share *ws = alloc_work_share (team);
207      gomp_init_work_share (ws, ordered, team->nthreads);
208      thr->ts.work_share = ws;
209      return true;
210    }
211  else
212    {
213      thr->ts.work_share = ws;
214      return false;
215    }
216}
217
218/* The current thread is done with its current work sharing construct.
219   This version does imply a barrier at the end of the work-share.  */
220
221void
222gomp_work_share_end (void)
223{
224  struct gomp_thread *thr = gomp_thread ();
225  struct gomp_team *team = thr->ts.team;
226  gomp_barrier_state_t bstate;
227
228  /* Work sharing constructs can be orphaned.  */
229  if (team == NULL)
230    {
231      free_work_share (NULL, thr->ts.work_share);
232      thr->ts.work_share = NULL;
233      return;
234    }
235
236  bstate = gomp_barrier_wait_start (&team->barrier);
237
238  if (gomp_barrier_last_thread (bstate))
239    {
240      if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
241	{
242	  team->work_shares_to_free = thr->ts.work_share;
243	  free_work_share (team, thr->ts.last_work_share);
244	}
245    }
246
247  gomp_team_barrier_wait_end (&team->barrier, bstate);
248  thr->ts.last_work_share = NULL;
249}
250
251/* The current thread is done with its current work sharing construct.
252   This version implies a cancellable barrier at the end of the work-share.  */
253
254bool
255gomp_work_share_end_cancel (void)
256{
257  struct gomp_thread *thr = gomp_thread ();
258  struct gomp_team *team = thr->ts.team;
259  gomp_barrier_state_t bstate;
260
261  /* Cancellable work sharing constructs cannot be orphaned.  */
262  bstate = gomp_barrier_wait_cancel_start (&team->barrier);
263
264  if (gomp_barrier_last_thread (bstate))
265    {
266      if (__builtin_expect (thr->ts.last_work_share != NULL, 1))
267	{
268	  team->work_shares_to_free = thr->ts.work_share;
269	  free_work_share (team, thr->ts.last_work_share);
270	}
271    }
272  thr->ts.last_work_share = NULL;
273
274  return gomp_team_barrier_wait_cancel_end (&team->barrier, bstate);
275}
276
277/* The current thread is done with its current work sharing construct.
278   This version does NOT imply a barrier at the end of the work-share.  */
279
280void
281gomp_work_share_end_nowait (void)
282{
283  struct gomp_thread *thr = gomp_thread ();
284  struct gomp_team *team = thr->ts.team;
285  struct gomp_work_share *ws = thr->ts.work_share;
286  unsigned completed;
287
288  /* Work sharing constructs can be orphaned.  */
289  if (team == NULL)
290    {
291      free_work_share (NULL, ws);
292      thr->ts.work_share = NULL;
293      return;
294    }
295
296  if (__builtin_expect (thr->ts.last_work_share == NULL, 0))
297    return;
298
299#ifdef HAVE_SYNC_BUILTINS
300  completed = __sync_add_and_fetch (&ws->threads_completed, 1);
301#else
302  gomp_mutex_lock (&ws->lock);
303  completed = ++ws->threads_completed;
304  gomp_mutex_unlock (&ws->lock);
305#endif
306
307  if (completed == team->nthreads)
308    {
309      team->work_shares_to_free = thr->ts.work_share;
310      free_work_share (team, thr->ts.last_work_share);
311    }
312  thr->ts.last_work_share = NULL;
313}
314