1/* Copyright (C) 2005-2020 Free Software Foundation, Inc.
2   Contributed by Richard Henderson <rth@redhat.com>.
3
4   This file is part of the GNU Offloading and Multi Processing Library
5   (libgomp).
6
7   Libgomp is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15   more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26/* This file handles the LOOP (FOR/DO) construct.  */
27
28#include <limits.h>
29#include <stdlib.h>
30#include <string.h>
31#include "libgomp.h"
32
33
34ialias (GOMP_loop_runtime_next)
35ialias_redirect (GOMP_taskgroup_reduction_register)
36
37/* Initialize the given work share construct from the given arguments.  */
38
39static inline void
40gomp_loop_init (struct gomp_work_share *ws, long start, long end, long incr,
41		enum gomp_schedule_type sched, long chunk_size)
42{
43  ws->sched = sched;
44  ws->chunk_size = chunk_size;
45  /* Canonicalize loops that have zero iterations to ->next == ->end.  */
46  ws->end = ((incr > 0 && start > end) || (incr < 0 && start < end))
47	    ? start : end;
48  ws->incr = incr;
49  ws->next = start;
50  if (sched == GFS_DYNAMIC)
51    {
52      ws->chunk_size *= incr;
53
54#ifdef HAVE_SYNC_BUILTINS
55      {
56	/* For dynamic scheduling prepare things to make each iteration
57	   faster.  */
58	struct gomp_thread *thr = gomp_thread ();
59	struct gomp_team *team = thr->ts.team;
60	long nthreads = team ? team->nthreads : 1;
61
62	if (__builtin_expect (incr > 0, 1))
63	  {
64	    /* Cheap overflow protection.  */
65	    if (__builtin_expect ((nthreads | ws->chunk_size)
66				  >= 1UL << (sizeof (long)
67					     * __CHAR_BIT__ / 2 - 1), 0))
68	      ws->mode = 0;
69	    else
70	      ws->mode = ws->end < (LONG_MAX
71				    - (nthreads + 1) * ws->chunk_size);
72	  }
73	/* Cheap overflow protection.  */
74	else if (__builtin_expect ((nthreads | -ws->chunk_size)
75				   >= 1UL << (sizeof (long)
76					      * __CHAR_BIT__ / 2 - 1), 0))
77	  ws->mode = 0;
78	else
79	  ws->mode = ws->end > (nthreads + 1) * -ws->chunk_size - LONG_MAX;
80      }
81#endif
82    }
83}
84
85/* The *_start routines are called when first encountering a loop construct
86   that is not bound directly to a parallel construct.  The first thread
87   that arrives will create the work-share construct; subsequent threads
88   will see the construct exists and allocate work from it.
89
90   START, END, INCR are the bounds of the loop; due to the restrictions of
91   OpenMP, these values must be the same in every thread.  This is not
92   verified (nor is it entirely verifiable, since START is not necessarily
93   retained intact in the work-share data structure).  CHUNK_SIZE is the
94   scheduling parameter; again this must be identical in all threads.
95
96   Returns true if there's any work for this thread to perform.  If so,
97   *ISTART and *IEND are filled with the bounds of the iteration block
98   allocated to this thread.  Returns false if all work was assigned to
99   other threads prior to this thread's arrival.  */
100
101static bool
102gomp_loop_static_start (long start, long end, long incr, long chunk_size,
103			long *istart, long *iend)
104{
105  struct gomp_thread *thr = gomp_thread ();
106
107  thr->ts.static_trip = 0;
108  if (gomp_work_share_start (0))
109    {
110      gomp_loop_init (thr->ts.work_share, start, end, incr,
111		      GFS_STATIC, chunk_size);
112      gomp_work_share_init_done ();
113    }
114
115  return !gomp_iter_static_next (istart, iend);
116}
117
118/* The current dynamic implementation is always monotonic.  The
119   entrypoints without nonmonotonic in them have to be always monotonic,
120   but the nonmonotonic ones could be changed to use work-stealing for
121   improved scalability.  */
122
123static bool
124gomp_loop_dynamic_start (long start, long end, long incr, long chunk_size,
125			 long *istart, long *iend)
126{
127  struct gomp_thread *thr = gomp_thread ();
128  bool ret;
129
130  if (gomp_work_share_start (0))
131    {
132      gomp_loop_init (thr->ts.work_share, start, end, incr,
133		      GFS_DYNAMIC, chunk_size);
134      gomp_work_share_init_done ();
135    }
136
137#ifdef HAVE_SYNC_BUILTINS
138  ret = gomp_iter_dynamic_next (istart, iend);
139#else
140  gomp_mutex_lock (&thr->ts.work_share->lock);
141  ret = gomp_iter_dynamic_next_locked (istart, iend);
142  gomp_mutex_unlock (&thr->ts.work_share->lock);
143#endif
144
145  return ret;
146}
147
148/* Similarly as for dynamic, though the question is how can the chunk sizes
149   be decreased without a central locking or atomics.  */
150
151static bool
152gomp_loop_guided_start (long start, long end, long incr, long chunk_size,
153			long *istart, long *iend)
154{
155  struct gomp_thread *thr = gomp_thread ();
156  bool ret;
157
158  if (gomp_work_share_start (0))
159    {
160      gomp_loop_init (thr->ts.work_share, start, end, incr,
161		      GFS_GUIDED, chunk_size);
162      gomp_work_share_init_done ();
163    }
164
165#ifdef HAVE_SYNC_BUILTINS
166  ret = gomp_iter_guided_next (istart, iend);
167#else
168  gomp_mutex_lock (&thr->ts.work_share->lock);
169  ret = gomp_iter_guided_next_locked (istart, iend);
170  gomp_mutex_unlock (&thr->ts.work_share->lock);
171#endif
172
173  return ret;
174}
175
176bool
177GOMP_loop_runtime_start (long start, long end, long incr,
178			 long *istart, long *iend)
179{
180  struct gomp_task_icv *icv = gomp_icv (false);
181  switch (icv->run_sched_var & ~GFS_MONOTONIC)
182    {
183    case GFS_STATIC:
184      return gomp_loop_static_start (start, end, incr,
185				     icv->run_sched_chunk_size,
186				     istart, iend);
187    case GFS_DYNAMIC:
188      return gomp_loop_dynamic_start (start, end, incr,
189				      icv->run_sched_chunk_size,
190				      istart, iend);
191    case GFS_GUIDED:
192      return gomp_loop_guided_start (start, end, incr,
193				     icv->run_sched_chunk_size,
194				     istart, iend);
195    case GFS_AUTO:
196      /* For now map to schedule(static), later on we could play with feedback
197	 driven choice.  */
198      return gomp_loop_static_start (start, end, incr, 0, istart, iend);
199    default:
200      abort ();
201    }
202}
203
204static long
205gomp_adjust_sched (long sched, long *chunk_size)
206{
207  sched &= ~GFS_MONOTONIC;
208  switch (sched)
209    {
210    case GFS_STATIC:
211    case GFS_DYNAMIC:
212    case GFS_GUIDED:
213      return sched;
214    /* GFS_RUNTIME is used for runtime schedule without monotonic
215       or nonmonotonic modifiers on the clause.
216       GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
217       modifier.  */
218    case GFS_RUNTIME:
219    /* GFS_AUTO is used for runtime schedule with nonmonotonic
220       modifier.  */
221    case GFS_AUTO:
222      {
223	struct gomp_task_icv *icv = gomp_icv (false);
224	sched = icv->run_sched_var & ~GFS_MONOTONIC;
225	switch (sched)
226	  {
227	  case GFS_STATIC:
228	  case GFS_DYNAMIC:
229	  case GFS_GUIDED:
230	    *chunk_size = icv->run_sched_chunk_size;
231	    break;
232	  case GFS_AUTO:
233	    sched = GFS_STATIC;
234	    *chunk_size = 0;
235	    break;
236	  default:
237	    abort ();
238	  }
239	return sched;
240      }
241    default:
242      abort ();
243    }
244}
245
246bool
247GOMP_loop_start (long start, long end, long incr, long sched,
248		 long chunk_size, long *istart, long *iend,
249		 uintptr_t *reductions, void **mem)
250{
251  struct gomp_thread *thr = gomp_thread ();
252
253  thr->ts.static_trip = 0;
254  if (reductions)
255    gomp_workshare_taskgroup_start ();
256  if (gomp_work_share_start (0))
257    {
258      sched = gomp_adjust_sched (sched, &chunk_size);
259      gomp_loop_init (thr->ts.work_share, start, end, incr,
260		      sched, chunk_size);
261      if (reductions)
262	{
263	  GOMP_taskgroup_reduction_register (reductions);
264	  thr->task->taskgroup->workshare = true;
265	  thr->ts.work_share->task_reductions = reductions;
266	}
267      if (mem)
268	{
269	  uintptr_t size = (uintptr_t) *mem;
270#define INLINE_ORDERED_TEAM_IDS_OFF \
271  ((offsetof (struct gomp_work_share, inline_ordered_team_ids)		\
272    + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
273	  if (size > (sizeof (struct gomp_work_share)
274		      - INLINE_ORDERED_TEAM_IDS_OFF))
275	    *mem
276	      = (void *) (thr->ts.work_share->ordered_team_ids
277			  = gomp_malloc_cleared (size));
278	  else
279	    *mem = memset (((char *) thr->ts.work_share)
280			   + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
281	}
282      gomp_work_share_init_done ();
283    }
284  else
285    {
286      if (reductions)
287	{
288	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
289	  gomp_workshare_task_reduction_register (reductions,
290						  first_reductions);
291	}
292      if (mem)
293	{
294	  if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
295	       & (__alignof__ (long long) - 1)) == 0)
296	    *mem = (void *) thr->ts.work_share->ordered_team_ids;
297	  else
298	    {
299	      uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
300	      p += __alignof__ (long long) - 1;
301	      p &= ~(__alignof__ (long long) - 1);
302	      *mem = (void *) p;
303	    }
304	}
305    }
306
307  if (!istart)
308    return true;
309  return ialias_call (GOMP_loop_runtime_next) (istart, iend);
310}
311
312/* The *_ordered_*_start routines are similar.  The only difference is that
313   this work-share construct is initialized to expect an ORDERED section.  */
314
315static bool
316gomp_loop_ordered_static_start (long start, long end, long incr,
317				long chunk_size, long *istart, long *iend)
318{
319  struct gomp_thread *thr = gomp_thread ();
320
321  thr->ts.static_trip = 0;
322  if (gomp_work_share_start (1))
323    {
324      gomp_loop_init (thr->ts.work_share, start, end, incr,
325		      GFS_STATIC, chunk_size);
326      gomp_ordered_static_init ();
327      gomp_work_share_init_done ();
328    }
329
330  return !gomp_iter_static_next (istart, iend);
331}
332
333static bool
334gomp_loop_ordered_dynamic_start (long start, long end, long incr,
335				 long chunk_size, long *istart, long *iend)
336{
337  struct gomp_thread *thr = gomp_thread ();
338  bool ret;
339
340  if (gomp_work_share_start (1))
341    {
342      gomp_loop_init (thr->ts.work_share, start, end, incr,
343		      GFS_DYNAMIC, chunk_size);
344      gomp_mutex_lock (&thr->ts.work_share->lock);
345      gomp_work_share_init_done ();
346    }
347  else
348    gomp_mutex_lock (&thr->ts.work_share->lock);
349
350  ret = gomp_iter_dynamic_next_locked (istart, iend);
351  if (ret)
352    gomp_ordered_first ();
353  gomp_mutex_unlock (&thr->ts.work_share->lock);
354
355  return ret;
356}
357
358static bool
359gomp_loop_ordered_guided_start (long start, long end, long incr,
360				long chunk_size, long *istart, long *iend)
361{
362  struct gomp_thread *thr = gomp_thread ();
363  bool ret;
364
365  if (gomp_work_share_start (1))
366    {
367      gomp_loop_init (thr->ts.work_share, start, end, incr,
368		      GFS_GUIDED, chunk_size);
369      gomp_mutex_lock (&thr->ts.work_share->lock);
370      gomp_work_share_init_done ();
371    }
372  else
373    gomp_mutex_lock (&thr->ts.work_share->lock);
374
375  ret = gomp_iter_guided_next_locked (istart, iend);
376  if (ret)
377    gomp_ordered_first ();
378  gomp_mutex_unlock (&thr->ts.work_share->lock);
379
380  return ret;
381}
382
383bool
384GOMP_loop_ordered_runtime_start (long start, long end, long incr,
385				 long *istart, long *iend)
386{
387  struct gomp_task_icv *icv = gomp_icv (false);
388  switch (icv->run_sched_var & ~GFS_MONOTONIC)
389    {
390    case GFS_STATIC:
391      return gomp_loop_ordered_static_start (start, end, incr,
392					     icv->run_sched_chunk_size,
393					     istart, iend);
394    case GFS_DYNAMIC:
395      return gomp_loop_ordered_dynamic_start (start, end, incr,
396					      icv->run_sched_chunk_size,
397					      istart, iend);
398    case GFS_GUIDED:
399      return gomp_loop_ordered_guided_start (start, end, incr,
400					     icv->run_sched_chunk_size,
401					     istart, iend);
402    case GFS_AUTO:
403      /* For now map to schedule(static), later on we could play with feedback
404	 driven choice.  */
405      return gomp_loop_ordered_static_start (start, end, incr,
406					     0, istart, iend);
407    default:
408      abort ();
409    }
410}
411
412bool
413GOMP_loop_ordered_start (long start, long end, long incr, long sched,
414			 long chunk_size, long *istart, long *iend,
415			 uintptr_t *reductions, void **mem)
416{
417  struct gomp_thread *thr = gomp_thread ();
418  size_t ordered = 1;
419  bool ret;
420
421  thr->ts.static_trip = 0;
422  if (reductions)
423    gomp_workshare_taskgroup_start ();
424  if (mem)
425    ordered += (uintptr_t) *mem;
426  if (gomp_work_share_start (ordered))
427    {
428      sched = gomp_adjust_sched (sched, &chunk_size);
429      gomp_loop_init (thr->ts.work_share, start, end, incr,
430		      sched, chunk_size);
431      if (reductions)
432	{
433	  GOMP_taskgroup_reduction_register (reductions);
434	  thr->task->taskgroup->workshare = true;
435	  thr->ts.work_share->task_reductions = reductions;
436	}
437      if (sched == GFS_STATIC)
438	gomp_ordered_static_init ();
439      else
440	gomp_mutex_lock (&thr->ts.work_share->lock);
441      gomp_work_share_init_done ();
442    }
443  else
444    {
445      if (reductions)
446	{
447	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
448	  gomp_workshare_task_reduction_register (reductions,
449						  first_reductions);
450	}
451      sched = thr->ts.work_share->sched;
452      if (sched != GFS_STATIC)
453	gomp_mutex_lock (&thr->ts.work_share->lock);
454    }
455
456  if (mem)
457    {
458      uintptr_t p
459	= (uintptr_t) (thr->ts.work_share->ordered_team_ids
460		       + (thr->ts.team ? thr->ts.team->nthreads : 1));
461      p += __alignof__ (long long) - 1;
462      p &= ~(__alignof__ (long long) - 1);
463      *mem = (void *) p;
464    }
465
466  switch (sched)
467    {
468    case GFS_STATIC:
469    case GFS_AUTO:
470      return !gomp_iter_static_next (istart, iend);
471    case GFS_DYNAMIC:
472      ret = gomp_iter_dynamic_next_locked (istart, iend);
473      break;
474    case GFS_GUIDED:
475      ret = gomp_iter_guided_next_locked (istart, iend);
476      break;
477    default:
478      abort ();
479    }
480
481  if (ret)
482    gomp_ordered_first ();
483  gomp_mutex_unlock (&thr->ts.work_share->lock);
484  return ret;
485}
486
487/* The *_doacross_*_start routines are similar.  The only difference is that
488   this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
489   section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
490   and other COUNTS array elements tell the library number of iterations
491   in the ordered inner loops.  */
492
493static bool
494gomp_loop_doacross_static_start (unsigned ncounts, long *counts,
495				 long chunk_size, long *istart, long *iend)
496{
497  struct gomp_thread *thr = gomp_thread ();
498
499  thr->ts.static_trip = 0;
500  if (gomp_work_share_start (0))
501    {
502      gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
503		      GFS_STATIC, chunk_size);
504      gomp_doacross_init (ncounts, counts, chunk_size, 0);
505      gomp_work_share_init_done ();
506    }
507
508  return !gomp_iter_static_next (istart, iend);
509}
510
511static bool
512gomp_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
513				  long chunk_size, long *istart, long *iend)
514{
515  struct gomp_thread *thr = gomp_thread ();
516  bool ret;
517
518  if (gomp_work_share_start (0))
519    {
520      gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
521		      GFS_DYNAMIC, chunk_size);
522      gomp_doacross_init (ncounts, counts, chunk_size, 0);
523      gomp_work_share_init_done ();
524    }
525
526#ifdef HAVE_SYNC_BUILTINS
527  ret = gomp_iter_dynamic_next (istart, iend);
528#else
529  gomp_mutex_lock (&thr->ts.work_share->lock);
530  ret = gomp_iter_dynamic_next_locked (istart, iend);
531  gomp_mutex_unlock (&thr->ts.work_share->lock);
532#endif
533
534  return ret;
535}
536
537static bool
538gomp_loop_doacross_guided_start (unsigned ncounts, long *counts,
539				 long chunk_size, long *istart, long *iend)
540{
541  struct gomp_thread *thr = gomp_thread ();
542  bool ret;
543
544  if (gomp_work_share_start (0))
545    {
546      gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
547		      GFS_GUIDED, chunk_size);
548      gomp_doacross_init (ncounts, counts, chunk_size, 0);
549      gomp_work_share_init_done ();
550    }
551
552#ifdef HAVE_SYNC_BUILTINS
553  ret = gomp_iter_guided_next (istart, iend);
554#else
555  gomp_mutex_lock (&thr->ts.work_share->lock);
556  ret = gomp_iter_guided_next_locked (istart, iend);
557  gomp_mutex_unlock (&thr->ts.work_share->lock);
558#endif
559
560  return ret;
561}
562
563bool
564GOMP_loop_doacross_runtime_start (unsigned ncounts, long *counts,
565				  long *istart, long *iend)
566{
567  struct gomp_task_icv *icv = gomp_icv (false);
568  switch (icv->run_sched_var & ~GFS_MONOTONIC)
569    {
570    case GFS_STATIC:
571      return gomp_loop_doacross_static_start (ncounts, counts,
572					      icv->run_sched_chunk_size,
573					      istart, iend);
574    case GFS_DYNAMIC:
575      return gomp_loop_doacross_dynamic_start (ncounts, counts,
576					       icv->run_sched_chunk_size,
577					       istart, iend);
578    case GFS_GUIDED:
579      return gomp_loop_doacross_guided_start (ncounts, counts,
580					      icv->run_sched_chunk_size,
581					      istart, iend);
582    case GFS_AUTO:
583      /* For now map to schedule(static), later on we could play with feedback
584	 driven choice.  */
585      return gomp_loop_doacross_static_start (ncounts, counts,
586					      0, istart, iend);
587    default:
588      abort ();
589    }
590}
591
592bool
593GOMP_loop_doacross_start (unsigned ncounts, long *counts, long sched,
594			  long chunk_size, long *istart, long *iend,
595			  uintptr_t *reductions, void **mem)
596{
597  struct gomp_thread *thr = gomp_thread ();
598
599  thr->ts.static_trip = 0;
600  if (reductions)
601    gomp_workshare_taskgroup_start ();
602  if (gomp_work_share_start (0))
603    {
604      size_t extra = 0;
605      if (mem)
606	extra = (uintptr_t) *mem;
607      sched = gomp_adjust_sched (sched, &chunk_size);
608      gomp_loop_init (thr->ts.work_share, 0, counts[0], 1,
609		      sched, chunk_size);
610      gomp_doacross_init (ncounts, counts, chunk_size, extra);
611      if (reductions)
612	{
613	  GOMP_taskgroup_reduction_register (reductions);
614	  thr->task->taskgroup->workshare = true;
615	  thr->ts.work_share->task_reductions = reductions;
616	}
617      gomp_work_share_init_done ();
618    }
619  else
620    {
621      if (reductions)
622	{
623	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
624	  gomp_workshare_task_reduction_register (reductions,
625						  first_reductions);
626	}
627      sched = thr->ts.work_share->sched;
628    }
629
630  if (mem)
631    *mem = thr->ts.work_share->doacross->extra;
632
633  return ialias_call (GOMP_loop_runtime_next) (istart, iend);
634}
635
636/* The *_next routines are called when the thread completes processing of
637   the iteration block currently assigned to it.  If the work-share
638   construct is bound directly to a parallel construct, then the iteration
639   bounds may have been set up before the parallel.  In which case, this
640   may be the first iteration for the thread.
641
642   Returns true if there is work remaining to be performed; *ISTART and
643   *IEND are filled with a new iteration block.  Returns false if all work
644   has been assigned.  */
645
646static bool
647gomp_loop_static_next (long *istart, long *iend)
648{
649  return !gomp_iter_static_next (istart, iend);
650}
651
652static bool
653gomp_loop_dynamic_next (long *istart, long *iend)
654{
655  bool ret;
656
657#ifdef HAVE_SYNC_BUILTINS
658  ret = gomp_iter_dynamic_next (istart, iend);
659#else
660  struct gomp_thread *thr = gomp_thread ();
661  gomp_mutex_lock (&thr->ts.work_share->lock);
662  ret = gomp_iter_dynamic_next_locked (istart, iend);
663  gomp_mutex_unlock (&thr->ts.work_share->lock);
664#endif
665
666  return ret;
667}
668
669static bool
670gomp_loop_guided_next (long *istart, long *iend)
671{
672  bool ret;
673
674#ifdef HAVE_SYNC_BUILTINS
675  ret = gomp_iter_guided_next (istart, iend);
676#else
677  struct gomp_thread *thr = gomp_thread ();
678  gomp_mutex_lock (&thr->ts.work_share->lock);
679  ret = gomp_iter_guided_next_locked (istart, iend);
680  gomp_mutex_unlock (&thr->ts.work_share->lock);
681#endif
682
683  return ret;
684}
685
686bool
687GOMP_loop_runtime_next (long *istart, long *iend)
688{
689  struct gomp_thread *thr = gomp_thread ();
690
691  switch (thr->ts.work_share->sched)
692    {
693    case GFS_STATIC:
694    case GFS_AUTO:
695      return gomp_loop_static_next (istart, iend);
696    case GFS_DYNAMIC:
697      return gomp_loop_dynamic_next (istart, iend);
698    case GFS_GUIDED:
699      return gomp_loop_guided_next (istart, iend);
700    default:
701      abort ();
702    }
703}
704
705/* The *_ordered_*_next routines are called when the thread completes
706   processing of the iteration block currently assigned to it.
707
708   Returns true if there is work remaining to be performed; *ISTART and
709   *IEND are filled with a new iteration block.  Returns false if all work
710   has been assigned.  */
711
712static bool
713gomp_loop_ordered_static_next (long *istart, long *iend)
714{
715  struct gomp_thread *thr = gomp_thread ();
716  int test;
717
718  gomp_ordered_sync ();
719  gomp_mutex_lock (&thr->ts.work_share->lock);
720  test = gomp_iter_static_next (istart, iend);
721  if (test >= 0)
722    gomp_ordered_static_next ();
723  gomp_mutex_unlock (&thr->ts.work_share->lock);
724
725  return test == 0;
726}
727
728static bool
729gomp_loop_ordered_dynamic_next (long *istart, long *iend)
730{
731  struct gomp_thread *thr = gomp_thread ();
732  bool ret;
733
734  gomp_ordered_sync ();
735  gomp_mutex_lock (&thr->ts.work_share->lock);
736  ret = gomp_iter_dynamic_next_locked (istart, iend);
737  if (ret)
738    gomp_ordered_next ();
739  else
740    gomp_ordered_last ();
741  gomp_mutex_unlock (&thr->ts.work_share->lock);
742
743  return ret;
744}
745
746static bool
747gomp_loop_ordered_guided_next (long *istart, long *iend)
748{
749  struct gomp_thread *thr = gomp_thread ();
750  bool ret;
751
752  gomp_ordered_sync ();
753  gomp_mutex_lock (&thr->ts.work_share->lock);
754  ret = gomp_iter_guided_next_locked (istart, iend);
755  if (ret)
756    gomp_ordered_next ();
757  else
758    gomp_ordered_last ();
759  gomp_mutex_unlock (&thr->ts.work_share->lock);
760
761  return ret;
762}
763
764bool
765GOMP_loop_ordered_runtime_next (long *istart, long *iend)
766{
767  struct gomp_thread *thr = gomp_thread ();
768
769  switch (thr->ts.work_share->sched)
770    {
771    case GFS_STATIC:
772    case GFS_AUTO:
773      return gomp_loop_ordered_static_next (istart, iend);
774    case GFS_DYNAMIC:
775      return gomp_loop_ordered_dynamic_next (istart, iend);
776    case GFS_GUIDED:
777      return gomp_loop_ordered_guided_next (istart, iend);
778    default:
779      abort ();
780    }
781}
782
783/* The GOMP_parallel_loop_* routines pre-initialize a work-share construct
784   to avoid one synchronization once we get into the loop.  */
785
786static void
787gomp_parallel_loop_start (void (*fn) (void *), void *data,
788			  unsigned num_threads, long start, long end,
789			  long incr, enum gomp_schedule_type sched,
790			  long chunk_size, unsigned int flags)
791{
792  struct gomp_team *team;
793
794  num_threads = gomp_resolve_num_threads (num_threads, 0);
795  team = gomp_new_team (num_threads);
796  gomp_loop_init (&team->work_shares[0], start, end, incr, sched, chunk_size);
797  gomp_team_start (fn, data, num_threads, flags, team, NULL);
798}
799
800void
801GOMP_parallel_loop_static_start (void (*fn) (void *), void *data,
802				 unsigned num_threads, long start, long end,
803				 long incr, long chunk_size)
804{
805  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
806			    GFS_STATIC, chunk_size, 0);
807}
808
809void
810GOMP_parallel_loop_dynamic_start (void (*fn) (void *), void *data,
811				  unsigned num_threads, long start, long end,
812				  long incr, long chunk_size)
813{
814  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
815			    GFS_DYNAMIC, chunk_size, 0);
816}
817
818void
819GOMP_parallel_loop_guided_start (void (*fn) (void *), void *data,
820				 unsigned num_threads, long start, long end,
821				 long incr, long chunk_size)
822{
823  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
824			    GFS_GUIDED, chunk_size, 0);
825}
826
827void
828GOMP_parallel_loop_runtime_start (void (*fn) (void *), void *data,
829				  unsigned num_threads, long start, long end,
830				  long incr)
831{
832  struct gomp_task_icv *icv = gomp_icv (false);
833  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
834			    icv->run_sched_var & ~GFS_MONOTONIC,
835			    icv->run_sched_chunk_size, 0);
836}
837
838ialias_redirect (GOMP_parallel_end)
839
840void
841GOMP_parallel_loop_static (void (*fn) (void *), void *data,
842			   unsigned num_threads, long start, long end,
843			   long incr, long chunk_size, unsigned flags)
844{
845  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
846			    GFS_STATIC, chunk_size, flags);
847  fn (data);
848  GOMP_parallel_end ();
849}
850
851void
852GOMP_parallel_loop_dynamic (void (*fn) (void *), void *data,
853			    unsigned num_threads, long start, long end,
854			    long incr, long chunk_size, unsigned flags)
855{
856  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
857			    GFS_DYNAMIC, chunk_size, flags);
858  fn (data);
859  GOMP_parallel_end ();
860}
861
862void
863GOMP_parallel_loop_guided (void (*fn) (void *), void *data,
864			  unsigned num_threads, long start, long end,
865			  long incr, long chunk_size, unsigned flags)
866{
867  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
868			    GFS_GUIDED, chunk_size, flags);
869  fn (data);
870  GOMP_parallel_end ();
871}
872
873void
874GOMP_parallel_loop_runtime (void (*fn) (void *), void *data,
875			    unsigned num_threads, long start, long end,
876			    long incr, unsigned flags)
877{
878  struct gomp_task_icv *icv = gomp_icv (false);
879  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
880			    icv->run_sched_var & ~GFS_MONOTONIC,
881			    icv->run_sched_chunk_size, flags);
882  fn (data);
883  GOMP_parallel_end ();
884}
885
886#ifdef HAVE_ATTRIBUTE_ALIAS
887extern __typeof(GOMP_parallel_loop_dynamic) GOMP_parallel_loop_nonmonotonic_dynamic
888	__attribute__((alias ("GOMP_parallel_loop_dynamic")));
889extern __typeof(GOMP_parallel_loop_guided) GOMP_parallel_loop_nonmonotonic_guided
890	__attribute__((alias ("GOMP_parallel_loop_guided")));
891extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_nonmonotonic_runtime
892	__attribute__((alias ("GOMP_parallel_loop_runtime")));
893extern __typeof(GOMP_parallel_loop_runtime) GOMP_parallel_loop_maybe_nonmonotonic_runtime
894	__attribute__((alias ("GOMP_parallel_loop_runtime")));
895#else
896void
897GOMP_parallel_loop_nonmonotonic_dynamic (void (*fn) (void *), void *data,
898					 unsigned num_threads, long start,
899					 long end, long incr, long chunk_size,
900					 unsigned flags)
901{
902  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
903			    GFS_DYNAMIC, chunk_size, flags);
904  fn (data);
905  GOMP_parallel_end ();
906}
907
908void
909GOMP_parallel_loop_nonmonotonic_guided (void (*fn) (void *), void *data,
910					unsigned num_threads, long start,
911					long end, long incr, long chunk_size,
912					unsigned flags)
913{
914  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
915			    GFS_GUIDED, chunk_size, flags);
916  fn (data);
917  GOMP_parallel_end ();
918}
919
920void
921GOMP_parallel_loop_nonmonotonic_runtime (void (*fn) (void *), void *data,
922					 unsigned num_threads, long start,
923					 long end, long incr, unsigned flags)
924{
925  struct gomp_task_icv *icv = gomp_icv (false);
926  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
927			    icv->run_sched_var & ~GFS_MONOTONIC,
928			    icv->run_sched_chunk_size, flags);
929  fn (data);
930  GOMP_parallel_end ();
931}
932
933void
934GOMP_parallel_loop_maybe_nonmonotonic_runtime (void (*fn) (void *), void *data,
935					       unsigned num_threads, long start,
936					       long end, long incr,
937					       unsigned flags)
938{
939  struct gomp_task_icv *icv = gomp_icv (false);
940  gomp_parallel_loop_start (fn, data, num_threads, start, end, incr,
941			    icv->run_sched_var & ~GFS_MONOTONIC,
942			    icv->run_sched_chunk_size, flags);
943  fn (data);
944  GOMP_parallel_end ();
945}
946#endif
947
948/* The GOMP_loop_end* routines are called after the thread is told that
949   all loop iterations are complete.  The first two versions synchronize
950   all threads; the nowait version does not.  */
951
952void
953GOMP_loop_end (void)
954{
955  gomp_work_share_end ();
956}
957
958bool
959GOMP_loop_end_cancel (void)
960{
961  return gomp_work_share_end_cancel ();
962}
963
964void
965GOMP_loop_end_nowait (void)
966{
967  gomp_work_share_end_nowait ();
968}
969
970
971/* We use static functions above so that we're sure that the "runtime"
972   function can defer to the proper routine without interposition.  We
973   export the static function with a strong alias when possible, or with
974   a wrapper function otherwise.  */
975
976#ifdef HAVE_ATTRIBUTE_ALIAS
977extern __typeof(gomp_loop_static_start) GOMP_loop_static_start
978	__attribute__((alias ("gomp_loop_static_start")));
979extern __typeof(gomp_loop_dynamic_start) GOMP_loop_dynamic_start
980	__attribute__((alias ("gomp_loop_dynamic_start")));
981extern __typeof(gomp_loop_guided_start) GOMP_loop_guided_start
982	__attribute__((alias ("gomp_loop_guided_start")));
983extern __typeof(gomp_loop_dynamic_start) GOMP_loop_nonmonotonic_dynamic_start
984	__attribute__((alias ("gomp_loop_dynamic_start")));
985extern __typeof(gomp_loop_guided_start) GOMP_loop_nonmonotonic_guided_start
986	__attribute__((alias ("gomp_loop_guided_start")));
987extern __typeof(GOMP_loop_runtime_start) GOMP_loop_nonmonotonic_runtime_start
988	__attribute__((alias ("GOMP_loop_runtime_start")));
989extern __typeof(GOMP_loop_runtime_start) GOMP_loop_maybe_nonmonotonic_runtime_start
990	__attribute__((alias ("GOMP_loop_runtime_start")));
991
992extern __typeof(gomp_loop_ordered_static_start) GOMP_loop_ordered_static_start
993	__attribute__((alias ("gomp_loop_ordered_static_start")));
994extern __typeof(gomp_loop_ordered_dynamic_start) GOMP_loop_ordered_dynamic_start
995	__attribute__((alias ("gomp_loop_ordered_dynamic_start")));
996extern __typeof(gomp_loop_ordered_guided_start) GOMP_loop_ordered_guided_start
997	__attribute__((alias ("gomp_loop_ordered_guided_start")));
998
999extern __typeof(gomp_loop_doacross_static_start) GOMP_loop_doacross_static_start
1000	__attribute__((alias ("gomp_loop_doacross_static_start")));
1001extern __typeof(gomp_loop_doacross_dynamic_start) GOMP_loop_doacross_dynamic_start
1002	__attribute__((alias ("gomp_loop_doacross_dynamic_start")));
1003extern __typeof(gomp_loop_doacross_guided_start) GOMP_loop_doacross_guided_start
1004	__attribute__((alias ("gomp_loop_doacross_guided_start")));
1005
1006extern __typeof(gomp_loop_static_next) GOMP_loop_static_next
1007	__attribute__((alias ("gomp_loop_static_next")));
1008extern __typeof(gomp_loop_dynamic_next) GOMP_loop_dynamic_next
1009	__attribute__((alias ("gomp_loop_dynamic_next")));
1010extern __typeof(gomp_loop_guided_next) GOMP_loop_guided_next
1011	__attribute__((alias ("gomp_loop_guided_next")));
1012extern __typeof(gomp_loop_dynamic_next) GOMP_loop_nonmonotonic_dynamic_next
1013	__attribute__((alias ("gomp_loop_dynamic_next")));
1014extern __typeof(gomp_loop_guided_next) GOMP_loop_nonmonotonic_guided_next
1015	__attribute__((alias ("gomp_loop_guided_next")));
1016extern __typeof(GOMP_loop_runtime_next) GOMP_loop_nonmonotonic_runtime_next
1017	__attribute__((alias ("GOMP_loop_runtime_next")));
1018extern __typeof(GOMP_loop_runtime_next) GOMP_loop_maybe_nonmonotonic_runtime_next
1019	__attribute__((alias ("GOMP_loop_runtime_next")));
1020
1021extern __typeof(gomp_loop_ordered_static_next) GOMP_loop_ordered_static_next
1022	__attribute__((alias ("gomp_loop_ordered_static_next")));
1023extern __typeof(gomp_loop_ordered_dynamic_next) GOMP_loop_ordered_dynamic_next
1024	__attribute__((alias ("gomp_loop_ordered_dynamic_next")));
1025extern __typeof(gomp_loop_ordered_guided_next) GOMP_loop_ordered_guided_next
1026	__attribute__((alias ("gomp_loop_ordered_guided_next")));
1027#else
1028bool
1029GOMP_loop_static_start (long start, long end, long incr, long chunk_size,
1030			long *istart, long *iend)
1031{
1032  return gomp_loop_static_start (start, end, incr, chunk_size, istart, iend);
1033}
1034
1035bool
1036GOMP_loop_dynamic_start (long start, long end, long incr, long chunk_size,
1037			 long *istart, long *iend)
1038{
1039  return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
1040}
1041
1042bool
1043GOMP_loop_guided_start (long start, long end, long incr, long chunk_size,
1044			long *istart, long *iend)
1045{
1046  return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
1047}
1048
1049bool
1050GOMP_loop_nonmonotonic_dynamic_start (long start, long end, long incr,
1051				      long chunk_size, long *istart,
1052				      long *iend)
1053{
1054  return gomp_loop_dynamic_start (start, end, incr, chunk_size, istart, iend);
1055}
1056
1057bool
1058GOMP_loop_nonmonotonic_guided_start (long start, long end, long incr,
1059				     long chunk_size, long *istart, long *iend)
1060{
1061  return gomp_loop_guided_start (start, end, incr, chunk_size, istart, iend);
1062}
1063
1064bool
1065GOMP_loop_nonmonotonic_runtime_start (long start, long end, long incr,
1066				      long *istart, long *iend)
1067{
1068  return GOMP_loop_runtime_start (start, end, incr, istart, iend);
1069}
1070
1071bool
1072GOMP_loop_maybe_nonmonotonic_runtime_start (long start, long end, long incr,
1073					    long *istart, long *iend)
1074{
1075  return GOMP_loop_runtime_start (start, end, incr, istart, iend);
1076}
1077
1078bool
1079GOMP_loop_ordered_static_start (long start, long end, long incr,
1080				long chunk_size, long *istart, long *iend)
1081{
1082  return gomp_loop_ordered_static_start (start, end, incr, chunk_size,
1083					 istart, iend);
1084}
1085
1086bool
1087GOMP_loop_ordered_dynamic_start (long start, long end, long incr,
1088				 long chunk_size, long *istart, long *iend)
1089{
1090  return gomp_loop_ordered_dynamic_start (start, end, incr, chunk_size,
1091					  istart, iend);
1092}
1093
1094bool
1095GOMP_loop_ordered_guided_start (long start, long end, long incr,
1096				long chunk_size, long *istart, long *iend)
1097{
1098  return gomp_loop_ordered_guided_start (start, end, incr, chunk_size,
1099					 istart, iend);
1100}
1101
1102bool
1103GOMP_loop_doacross_static_start (unsigned ncounts, long *counts,
1104				 long chunk_size, long *istart, long *iend)
1105{
1106  return gomp_loop_doacross_static_start (ncounts, counts, chunk_size,
1107					  istart, iend);
1108}
1109
1110bool
1111GOMP_loop_doacross_dynamic_start (unsigned ncounts, long *counts,
1112				  long chunk_size, long *istart, long *iend)
1113{
1114  return gomp_loop_doacross_dynamic_start (ncounts, counts, chunk_size,
1115					   istart, iend);
1116}
1117
1118bool
1119GOMP_loop_doacross_guided_start (unsigned ncounts, long *counts,
1120				 long chunk_size, long *istart, long *iend)
1121{
1122  return gomp_loop_doacross_guided_start (ncounts, counts, chunk_size,
1123					  istart, iend);
1124}
1125
1126bool
1127GOMP_loop_static_next (long *istart, long *iend)
1128{
1129  return gomp_loop_static_next (istart, iend);
1130}
1131
1132bool
1133GOMP_loop_dynamic_next (long *istart, long *iend)
1134{
1135  return gomp_loop_dynamic_next (istart, iend);
1136}
1137
1138bool
1139GOMP_loop_guided_next (long *istart, long *iend)
1140{
1141  return gomp_loop_guided_next (istart, iend);
1142}
1143
1144bool
1145GOMP_loop_nonmonotonic_dynamic_next (long *istart, long *iend)
1146{
1147  return gomp_loop_dynamic_next (istart, iend);
1148}
1149
1150bool
1151GOMP_loop_nonmonotonic_guided_next (long *istart, long *iend)
1152{
1153  return gomp_loop_guided_next (istart, iend);
1154}
1155
1156bool
1157GOMP_loop_nonmonotonic_runtime_next (long *istart, long *iend)
1158{
1159  return GOMP_loop_runtime_next (istart, iend);
1160}
1161
1162bool
1163GOMP_loop_maybe_nonmonotonic_runtime_next (long *istart, long *iend)
1164{
1165  return GOMP_loop_runtime_next (istart, iend);
1166}
1167
1168bool
1169GOMP_loop_ordered_static_next (long *istart, long *iend)
1170{
1171  return gomp_loop_ordered_static_next (istart, iend);
1172}
1173
1174bool
1175GOMP_loop_ordered_dynamic_next (long *istart, long *iend)
1176{
1177  return gomp_loop_ordered_dynamic_next (istart, iend);
1178}
1179
1180bool
1181GOMP_loop_ordered_guided_next (long *istart, long *iend)
1182{
1183  return gomp_loop_ordered_guided_next (istart, iend);
1184}
1185#endif
1186