1/* Copyright (C) 2015-2022 Free Software Foundation, Inc.
2   Contributed by Mentor Embedded.
3
4   This file is part of the GNU Offloading and Multi Processing Library
5   (libgomp).
6
7   Libgomp is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15   more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26/* This is an AMD GCN specific implementation of a barrier synchronization
27   mechanism for libgomp.  This type is private to the library.  This
28   implementation uses atomic instructions and s_barrier instruction.  It
29   uses MEMMODEL_RELAXED here because barriers are within workgroups and
30   therefore don't need to flush caches.  */
31
32#include <limits.h>
33#include "libgomp.h"
34
35
36void
37gomp_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
38{
39  if (__builtin_expect (state & BAR_WAS_LAST, 0))
40    {
41      /* Next time we'll be awaiting TOTAL threads again.  */
42      bar->awaited = bar->total;
43      __atomic_store_n (&bar->generation, bar->generation + BAR_INCR,
44			MEMMODEL_RELAXED);
45    }
46  if (bar->total > 1)
47    asm ("s_barrier" ::: "memory");
48}
49
50void
51gomp_barrier_wait (gomp_barrier_t *bar)
52{
53  gomp_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
54}
55
56/* Like gomp_barrier_wait, except that if the encountering thread
57   is not the last one to hit the barrier, it returns immediately.
58   The intended usage is that a thread which intends to gomp_barrier_destroy
59   this barrier calls gomp_barrier_wait, while all other threads
60   call gomp_barrier_wait_last.  When gomp_barrier_wait returns,
61   the barrier can be safely destroyed.  */
62
63void
64gomp_barrier_wait_last (gomp_barrier_t *bar)
65{
66  /* Deferring to gomp_barrier_wait does not use the optimization opportunity
67     allowed by the interface contract for all-but-last participants.  The
68     original implementation in config/linux/bar.c handles this better.  */
69  gomp_barrier_wait (bar);
70}
71
72void
73gomp_team_barrier_wake (gomp_barrier_t *bar, int count)
74{
75  if (bar->total > 1)
76    asm ("s_barrier" ::: "memory");
77}
78
79void
80gomp_team_barrier_wait_end (gomp_barrier_t *bar, gomp_barrier_state_t state)
81{
82  unsigned int generation, gen;
83
84  if (__builtin_expect (state & BAR_WAS_LAST, 0))
85    {
86      /* Next time we'll be awaiting TOTAL threads again.  */
87      struct gomp_thread *thr = gomp_thread ();
88      struct gomp_team *team = thr->ts.team;
89
90      bar->awaited = bar->total;
91      team->work_share_cancelled = 0;
92      if (__builtin_expect (team->task_count, 0))
93	{
94	  gomp_barrier_handle_tasks (state);
95	  state &= ~BAR_WAS_LAST;
96	}
97      else
98	{
99	  state &= ~BAR_CANCELLED;
100	  state += BAR_INCR - BAR_WAS_LAST;
101	  __atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
102	  if (bar->total > 1)
103	    asm ("s_barrier" ::: "memory");
104	  return;
105	}
106    }
107
108  generation = state;
109  state &= ~BAR_CANCELLED;
110  int retry = 100;
111  do
112    {
113      if (retry-- == 0)
114	{
115	  /* It really shouldn't happen that barriers get out of sync, but
116	     if they do then this will loop until they realign, so we need
117	     to avoid an infinite loop where the thread just isn't there.  */
118	  const char msg[] = ("Barrier sync failed (another thread died?);"
119			      " aborting.");
120	  write (2, msg, sizeof (msg)-1);
121	  abort();
122	}
123
124      asm ("s_barrier" ::: "memory");
125      gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
126      if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
127	{
128	  gomp_barrier_handle_tasks (state);
129	  gen = __atomic_load_n (&bar->generation, MEMMODEL_ACQUIRE);
130	}
131      generation |= gen & BAR_WAITING_FOR_TASK;
132    }
133  while (gen != state + BAR_INCR);
134}
135
136void
137gomp_team_barrier_wait (gomp_barrier_t *bar)
138{
139  gomp_team_barrier_wait_end (bar, gomp_barrier_wait_start (bar));
140}
141
142void
143gomp_team_barrier_wait_final (gomp_barrier_t *bar)
144{
145  gomp_barrier_state_t state = gomp_barrier_wait_final_start (bar);
146  if (__builtin_expect (state & BAR_WAS_LAST, 0))
147    bar->awaited_final = bar->total;
148  gomp_team_barrier_wait_end (bar, state);
149}
150
151bool
152gomp_team_barrier_wait_cancel_end (gomp_barrier_t *bar,
153				   gomp_barrier_state_t state)
154{
155  unsigned int generation, gen;
156
157  if (__builtin_expect (state & BAR_WAS_LAST, 0))
158    {
159      /* Next time we'll be awaiting TOTAL threads again.  */
160      /* BAR_CANCELLED should never be set in state here, because
161	 cancellation means that at least one of the threads has been
162	 cancelled, thus on a cancellable barrier we should never see
163	 all threads to arrive.  */
164      struct gomp_thread *thr = gomp_thread ();
165      struct gomp_team *team = thr->ts.team;
166
167      bar->awaited = bar->total;
168      team->work_share_cancelled = 0;
169      if (__builtin_expect (team->task_count, 0))
170	{
171	  gomp_barrier_handle_tasks (state);
172	  state &= ~BAR_WAS_LAST;
173	}
174      else
175	{
176	  state += BAR_INCR - BAR_WAS_LAST;
177	  __atomic_store_n (&bar->generation, state, MEMMODEL_RELAXED);
178	  if (bar->total > 1)
179	    asm ("s_barrier" ::: "memory");
180	  return false;
181	}
182    }
183
184  if (__builtin_expect (state & BAR_CANCELLED, 0))
185    return true;
186
187  generation = state;
188  int retry = 100;
189  do
190    {
191      if (retry-- == 0)
192	{
193	  /* It really shouldn't happen that barriers get out of sync, but
194	     if they do then this will loop until they realign, so we need
195	     to avoid an infinite loop where the thread just isn't there.  */
196	  const char msg[] = ("Barrier sync failed (another thread died?);"
197			      " aborting.");
198	  write (2, msg, sizeof (msg)-1);
199	  abort();
200	}
201
202      if (bar->total > 1)
203	asm ("s_barrier" ::: "memory");
204      gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
205      if (__builtin_expect (gen & BAR_CANCELLED, 0))
206	return true;
207      if (__builtin_expect (gen & BAR_TASK_PENDING, 0))
208	{
209	  gomp_barrier_handle_tasks (state);
210	  gen = __atomic_load_n (&bar->generation, MEMMODEL_RELAXED);
211	}
212      generation |= gen & BAR_WAITING_FOR_TASK;
213    }
214  while (gen != state + BAR_INCR);
215
216  return false;
217}
218
219bool
220gomp_team_barrier_wait_cancel (gomp_barrier_t *bar)
221{
222  return gomp_team_barrier_wait_cancel_end (bar, gomp_barrier_wait_start (bar));
223}
224
225void
226gomp_team_barrier_cancel (struct gomp_team *team)
227{
228  gomp_mutex_lock (&team->task_lock);
229  if (team->barrier.generation & BAR_CANCELLED)
230    {
231      gomp_mutex_unlock (&team->task_lock);
232      return;
233    }
234  team->barrier.generation |= BAR_CANCELLED;
235  gomp_mutex_unlock (&team->task_lock);
236  gomp_team_barrier_wake (&team->barrier, INT_MAX);
237}
238