1169695Skan/* Copyright (C) 2005 Free Software Foundation, Inc.
2169695Skan   Contributed by Richard Henderson <rth@redhat.com>.
3169695Skan
4169695Skan   This file is part of the GNU OpenMP Library (libgomp).
5169695Skan
6169695Skan   Libgomp is free software; you can redistribute it and/or modify it
7169695Skan   under the terms of the GNU Lesser General Public License as published by
8169695Skan   the Free Software Foundation; either version 2.1 of the License, or
9169695Skan   (at your option) any later version.
10169695Skan
11169695Skan   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
12169695Skan   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13169695Skan   FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
14169695Skan   more details.
15169695Skan
16169695Skan   You should have received a copy of the GNU Lesser General Public License
17169695Skan   along with libgomp; see the file COPYING.LIB.  If not, write to the
18169695Skan   Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19169695Skan   MA 02110-1301, USA.  */
20169695Skan
21169695Skan/* As a special exception, if you link this library with other files, some
22169695Skan   of which are compiled with GCC, to produce an executable, this library
23169695Skan   does not by itself cause the resulting executable to be covered by the
24169695Skan   GNU General Public License.  This exception does not however invalidate
25169695Skan   any other reasons why the executable file might be covered by the GNU
26169695Skan   General Public License.  */
27169695Skan
28169695Skan/* This file contains routines for managing work-share iteration, both
29169695Skan   for loops and sections.  */
30169695Skan
31169695Skan#include "libgomp.h"
32169695Skan#include <stdlib.h>
33169695Skan
34169695Skan
35169695Skan/* This function implements the STATIC scheduling method.  The caller should
36169695Skan   iterate *pstart <= x < *pend.  Return zero if there are more iterations
37169695Skan   to perform; nonzero if not.  Return less than 0 if this thread had
38169695Skan   received the absolutely last iteration.  */
39169695Skan
40169695Skanint
41169695Skangomp_iter_static_next (long *pstart, long *pend)
42169695Skan{
43169695Skan  struct gomp_thread *thr = gomp_thread ();
44169695Skan  struct gomp_team *team = thr->ts.team;
45169695Skan  struct gomp_work_share *ws = thr->ts.work_share;
46169695Skan  unsigned long nthreads = team ? team->nthreads : 1;
47169695Skan
48169695Skan  if (thr->ts.static_trip == -1)
49169695Skan    return -1;
50169695Skan
51169695Skan  /* Quick test for degenerate teams and orphaned constructs.  */
52169695Skan  if (nthreads == 1)
53169695Skan    {
54169695Skan      *pstart = ws->next;
55169695Skan      *pend = ws->end;
56169695Skan      thr->ts.static_trip = -1;
57169695Skan      return ws->next == ws->end;
58169695Skan    }
59169695Skan
60169695Skan  /* We interpret chunk_size zero as "unspecified", which means that we
61169695Skan     should break up the iterations such that each thread makes only one
62169695Skan     trip through the outer loop.  */
63169695Skan  if (ws->chunk_size == 0)
64169695Skan    {
65169695Skan      unsigned long n, q, i;
66169695Skan      unsigned long s0, e0;
67169695Skan      long s, e;
68169695Skan
69169695Skan      if (thr->ts.static_trip > 0)
70169695Skan	return 1;
71169695Skan
72169695Skan      /* Compute the total number of iterations.  */
73169695Skan      s = ws->incr + (ws->incr > 0 ? -1 : 1);
74169695Skan      n = (ws->end - ws->next + s) / ws->incr;
75169695Skan      i = thr->ts.team_id;
76169695Skan
77169695Skan      /* Compute the "zero-based" start and end points.  That is, as
78169695Skan         if the loop began at zero and incremented by one.  */
79169695Skan      q = n / nthreads;
80169695Skan      q += (q * nthreads != n);
81169695Skan      s0 = q * i;
82169695Skan      e0 = s0 + q;
83169695Skan      if (e0 > n)
84169695Skan        e0 = n;
85169695Skan
86169695Skan      /* Notice when no iterations allocated for this thread.  */
87169695Skan      if (s0 >= e0)
88169695Skan	{
89169695Skan	  thr->ts.static_trip = 1;
90169695Skan	  return 1;
91169695Skan	}
92169695Skan
93169695Skan      /* Transform these to the actual start and end numbers.  */
94169695Skan      s = (long)s0 * ws->incr + ws->next;
95169695Skan      e = (long)e0 * ws->incr + ws->next;
96169695Skan
97169695Skan      *pstart = s;
98169695Skan      *pend = e;
99169695Skan      thr->ts.static_trip = (e0 == n ? -1 : 1);
100169695Skan      return 0;
101169695Skan    }
102169695Skan  else
103169695Skan    {
104169695Skan      unsigned long n, s0, e0, i, c;
105169695Skan      long s, e;
106169695Skan
107169695Skan      /* Otherwise, each thread gets exactly chunk_size iterations
108169695Skan	 (if available) each time through the loop.  */
109169695Skan
110169695Skan      s = ws->incr + (ws->incr > 0 ? -1 : 1);
111169695Skan      n = (ws->end - ws->next + s) / ws->incr;
112169695Skan      i = thr->ts.team_id;
113169695Skan      c = ws->chunk_size;
114169695Skan
115169695Skan      /* Initial guess is a C sized chunk positioned nthreads iterations
116169695Skan	 in, offset by our thread number.  */
117169695Skan      s0 = (thr->ts.static_trip * nthreads + i) * c;
118169695Skan      e0 = s0 + c;
119169695Skan
120169695Skan      /* Detect overflow.  */
121169695Skan      if (s0 >= n)
122169695Skan	return 1;
123169695Skan      if (e0 > n)
124169695Skan	e0 = n;
125169695Skan
126169695Skan      /* Transform these to the actual start and end numbers.  */
127169695Skan      s = (long)s0 * ws->incr + ws->next;
128169695Skan      e = (long)e0 * ws->incr + ws->next;
129169695Skan
130169695Skan      *pstart = s;
131169695Skan      *pend = e;
132169695Skan
133169695Skan      if (e0 == n)
134169695Skan	thr->ts.static_trip = -1;
135169695Skan      else
136169695Skan	thr->ts.static_trip++;
137169695Skan      return 0;
138169695Skan    }
139169695Skan}
140169695Skan
141169695Skan
142169695Skan/* This function implements the DYNAMIC scheduling method.  Arguments are
143169695Skan   as for gomp_iter_static_next.  This function must be called with ws->lock
144169695Skan   held.  */
145169695Skan
146169695Skanbool
147169695Skangomp_iter_dynamic_next_locked (long *pstart, long *pend)
148169695Skan{
149169695Skan  struct gomp_thread *thr = gomp_thread ();
150169695Skan  struct gomp_work_share *ws = thr->ts.work_share;
151169695Skan  long start, end, chunk, left;
152169695Skan
153169695Skan  start = ws->next;
154169695Skan  if (start == ws->end)
155169695Skan    return false;
156169695Skan
157169695Skan  chunk = ws->chunk_size * ws->incr;
158169695Skan  left = ws->end - start;
159169695Skan  if (ws->incr < 0)
160169695Skan    {
161169695Skan      if (chunk < left)
162169695Skan	chunk = left;
163169695Skan    }
164169695Skan  else
165169695Skan    {
166169695Skan      if (chunk > left)
167169695Skan	chunk = left;
168169695Skan    }
169169695Skan  end = start + chunk;
170169695Skan
171169695Skan  ws->next = end;
172169695Skan  *pstart = start;
173169695Skan  *pend = end;
174169695Skan  return true;
175169695Skan}
176169695Skan
177169695Skan
178169695Skan#ifdef HAVE_SYNC_BUILTINS
179169695Skan/* Similar, but doesn't require the lock held, and uses compare-and-swap
180169695Skan   instead.  Note that the only memory value that changes is ws->next.  */
181169695Skan
182169695Skanbool
183169695Skangomp_iter_dynamic_next (long *pstart, long *pend)
184169695Skan{
185169695Skan  struct gomp_thread *thr = gomp_thread ();
186169695Skan  struct gomp_work_share *ws = thr->ts.work_share;
187169695Skan  long start, end, nend, chunk, incr;
188169695Skan
189169695Skan  start = ws->next;
190169695Skan  end = ws->end;
191169695Skan  incr = ws->incr;
192169695Skan  chunk = ws->chunk_size * incr;
193169695Skan
194169695Skan  while (1)
195169695Skan    {
196169695Skan      long left = end - start;
197169695Skan      long tmp;
198169695Skan
199169695Skan      if (start == end)
200169695Skan	return false;
201169695Skan
202169695Skan      if (incr < 0)
203169695Skan	{
204169695Skan	  if (chunk < left)
205169695Skan	    chunk = left;
206169695Skan	}
207169695Skan      else
208169695Skan	{
209169695Skan	  if (chunk > left)
210169695Skan	    chunk = left;
211169695Skan	}
212169695Skan      nend = start + chunk;
213169695Skan
214169695Skan      tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
215169695Skan      if (__builtin_expect (tmp == start, 1))
216169695Skan	break;
217169695Skan
218169695Skan      start = tmp;
219169695Skan    }
220169695Skan
221169695Skan  *pstart = start;
222169695Skan  *pend = nend;
223169695Skan  return true;
224169695Skan}
225169695Skan#endif /* HAVE_SYNC_BUILTINS */
226169695Skan
227169695Skan
228169695Skan/* This function implements the GUIDED scheduling method.  Arguments are
229169695Skan   as for gomp_iter_static_next.  This function must be called with the
230169695Skan   work share lock held.  */
231169695Skan
232169695Skanbool
233169695Skangomp_iter_guided_next_locked (long *pstart, long *pend)
234169695Skan{
235169695Skan  struct gomp_thread *thr = gomp_thread ();
236169695Skan  struct gomp_work_share *ws = thr->ts.work_share;
237169695Skan  struct gomp_team *team = thr->ts.team;
238169695Skan  unsigned long nthreads = team ? team->nthreads : 1;
239169695Skan  unsigned long n, q;
240169695Skan  long start, end;
241169695Skan
242169695Skan  if (ws->next == ws->end)
243169695Skan    return false;
244169695Skan
245282152Spfg  start = ws->next;
246282152Spfg  n = (ws->end - start) / ws->incr;
247169695Skan  q = (n + nthreads - 1) / nthreads;
248169695Skan
249169695Skan  if (q < ws->chunk_size)
250169695Skan    q = ws->chunk_size;
251282152Spfg  if (q <= n)
252282152Spfg    end = start + q * ws->incr;
253282152Spfg  else
254282152Spfg    end = ws->end;
255169695Skan
256169695Skan  ws->next = end;
257169695Skan  *pstart = start;
258169695Skan  *pend = end;
259169695Skan  return true;
260169695Skan}
261169695Skan
262169695Skan#ifdef HAVE_SYNC_BUILTINS
263169695Skan/* Similar, but doesn't require the lock held, and uses compare-and-swap
264169695Skan   instead.  Note that the only memory value that changes is ws->next.  */
265169695Skan
266169695Skanbool
267169695Skangomp_iter_guided_next (long *pstart, long *pend)
268169695Skan{
269169695Skan  struct gomp_thread *thr = gomp_thread ();
270169695Skan  struct gomp_work_share *ws = thr->ts.work_share;
271169695Skan  struct gomp_team *team = thr->ts.team;
272169695Skan  unsigned long nthreads = team ? team->nthreads : 1;
273169695Skan  long start, end, nend, incr;
274169695Skan  unsigned long chunk_size;
275169695Skan
276169695Skan  start = ws->next;
277169695Skan  end = ws->end;
278169695Skan  incr = ws->incr;
279169695Skan  chunk_size = ws->chunk_size;
280169695Skan
281169695Skan  while (1)
282169695Skan    {
283169695Skan      unsigned long n, q;
284169695Skan      long tmp;
285169695Skan
286169695Skan      if (start == end)
287169695Skan	return false;
288169695Skan
289282152Spfg      n = (end - start) / incr;
290169695Skan      q = (n + nthreads - 1) / nthreads;
291169695Skan
292169695Skan      if (q < chunk_size)
293169695Skan	q = chunk_size;
294282152Spfg      if (__builtin_expect (q <= n, 1))
295282152Spfg	nend = start + q * incr;
296282152Spfg      else
297282152Spfg	nend = end;
298169695Skan
299169695Skan      tmp = __sync_val_compare_and_swap (&ws->next, start, nend);
300169695Skan      if (__builtin_expect (tmp == start, 1))
301169695Skan	break;
302169695Skan
303169695Skan      start = tmp;
304169695Skan    }
305169695Skan
306169695Skan  *pstart = start;
307169695Skan  *pend = nend;
308169695Skan  return true;
309169695Skan}
310169695Skan#endif /* HAVE_SYNC_BUILTINS */
311