1/* Copyright (C) 2005-2020 Free Software Foundation, Inc.
2   Contributed by Richard Henderson <rth@redhat.com>.
3
4   This file is part of the GNU Offloading and Multi Processing Library
5   (libgomp).
6
7   Libgomp is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15   more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26/* This file handles the LOOP (FOR/DO) construct.  */
27
28#include <limits.h>
29#include <stdlib.h>
30#include <string.h>
31#include "libgomp.h"
32
33ialias (GOMP_loop_ull_runtime_next)
34ialias_redirect (GOMP_taskgroup_reduction_register)
35
36typedef unsigned long long gomp_ull;
37
38/* Initialize the given work share construct from the given arguments.  */
39
40static inline void
41gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start,
42		    gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched,
43		    gomp_ull chunk_size)
44{
45  ws->sched = sched;
46  ws->chunk_size_ull = chunk_size;
47  /* Canonicalize loops that have zero iterations to ->next == ->end.  */
48  ws->end_ull = ((up && start > end) || (!up && start < end))
49		? start : end;
50  ws->incr_ull = incr;
51  ws->next_ull = start;
52  ws->mode = 0;
53  if (sched == GFS_DYNAMIC)
54    {
55      ws->chunk_size_ull *= incr;
56
57#if defined HAVE_SYNC_BUILTINS && defined __LP64__
58      {
59	/* For dynamic scheduling prepare things to make each iteration
60	   faster.  */
61	struct gomp_thread *thr = gomp_thread ();
62	struct gomp_team *team = thr->ts.team;
63	long nthreads = team ? team->nthreads : 1;
64
65	if (__builtin_expect (up, 1))
66	  {
67	    /* Cheap overflow protection.  */
68	    if (__builtin_expect ((nthreads | ws->chunk_size_ull)
69				  < 1ULL << (sizeof (gomp_ull)
70					     * __CHAR_BIT__ / 2 - 1), 1))
71	      ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1
72					- (nthreads + 1) * ws->chunk_size_ull);
73	  }
74	/* Cheap overflow protection.  */
75	else if (__builtin_expect ((nthreads | -ws->chunk_size_ull)
76				   < 1ULL << (sizeof (gomp_ull)
77					      * __CHAR_BIT__ / 2 - 1), 1))
78	  ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull
79				    - (__LONG_LONG_MAX__ * 2ULL + 1));
80      }
81#endif
82    }
83  if (!up)
84    ws->mode |= 2;
85}
86
87/* The *_start routines are called when first encountering a loop construct
88   that is not bound directly to a parallel construct.  The first thread
89   that arrives will create the work-share construct; subsequent threads
90   will see the construct exists and allocate work from it.
91
92   START, END, INCR are the bounds of the loop; due to the restrictions of
93   OpenMP, these values must be the same in every thread.  This is not
94   verified (nor is it entirely verifiable, since START is not necessarily
95   retained intact in the work-share data structure).  CHUNK_SIZE is the
96   scheduling parameter; again this must be identical in all threads.
97
98   Returns true if there's any work for this thread to perform.  If so,
99   *ISTART and *IEND are filled with the bounds of the iteration block
100   allocated to this thread.  Returns false if all work was assigned to
101   other threads prior to this thread's arrival.  */
102
103static bool
104gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
105			    gomp_ull incr, gomp_ull chunk_size,
106			    gomp_ull *istart, gomp_ull *iend)
107{
108  struct gomp_thread *thr = gomp_thread ();
109
110  thr->ts.static_trip = 0;
111  if (gomp_work_share_start (0))
112    {
113      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
114			  GFS_STATIC, chunk_size);
115      gomp_work_share_init_done ();
116    }
117
118  return !gomp_iter_ull_static_next (istart, iend);
119}
120
121static bool
122gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
123			     gomp_ull incr, gomp_ull chunk_size,
124			     gomp_ull *istart, gomp_ull *iend)
125{
126  struct gomp_thread *thr = gomp_thread ();
127  bool ret;
128
129  if (gomp_work_share_start (0))
130    {
131      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
132			  GFS_DYNAMIC, chunk_size);
133      gomp_work_share_init_done ();
134    }
135
136#if defined HAVE_SYNC_BUILTINS && defined __LP64__
137  ret = gomp_iter_ull_dynamic_next (istart, iend);
138#else
139  gomp_mutex_lock (&thr->ts.work_share->lock);
140  ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
141  gomp_mutex_unlock (&thr->ts.work_share->lock);
142#endif
143
144  return ret;
145}
146
147static bool
148gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
149			    gomp_ull incr, gomp_ull chunk_size,
150			    gomp_ull *istart, gomp_ull *iend)
151{
152  struct gomp_thread *thr = gomp_thread ();
153  bool ret;
154
155  if (gomp_work_share_start (0))
156    {
157      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
158			  GFS_GUIDED, chunk_size);
159      gomp_work_share_init_done ();
160    }
161
162#if defined HAVE_SYNC_BUILTINS && defined __LP64__
163  ret = gomp_iter_ull_guided_next (istart, iend);
164#else
165  gomp_mutex_lock (&thr->ts.work_share->lock);
166  ret = gomp_iter_ull_guided_next_locked (istart, iend);
167  gomp_mutex_unlock (&thr->ts.work_share->lock);
168#endif
169
170  return ret;
171}
172
173bool
174GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end,
175			     gomp_ull incr, gomp_ull *istart, gomp_ull *iend)
176{
177  struct gomp_task_icv *icv = gomp_icv (false);
178  switch (icv->run_sched_var & ~GFS_MONOTONIC)
179    {
180    case GFS_STATIC:
181      return gomp_loop_ull_static_start (up, start, end, incr,
182					 icv->run_sched_chunk_size,
183					 istart, iend);
184    case GFS_DYNAMIC:
185      return gomp_loop_ull_dynamic_start (up, start, end, incr,
186					  icv->run_sched_chunk_size,
187					  istart, iend);
188    case GFS_GUIDED:
189      return gomp_loop_ull_guided_start (up, start, end, incr,
190					 icv->run_sched_chunk_size,
191					 istart, iend);
192    case GFS_AUTO:
193      /* For now map to schedule(static), later on we could play with feedback
194	 driven choice.  */
195      return gomp_loop_ull_static_start (up, start, end, incr,
196					 0, istart, iend);
197    default:
198      abort ();
199    }
200}
201
202static long
203gomp_adjust_sched (long sched, gomp_ull *chunk_size)
204{
205  sched &= ~GFS_MONOTONIC;
206  switch (sched)
207    {
208    case GFS_STATIC:
209    case GFS_DYNAMIC:
210    case GFS_GUIDED:
211      return sched;
212    /* GFS_RUNTIME is used for runtime schedule without monotonic
213       or nonmonotonic modifiers on the clause.
214       GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic
215       modifier.  */
216    case GFS_RUNTIME:
217    /* GFS_AUTO is used for runtime schedule with nonmonotonic
218       modifier.  */
219    case GFS_AUTO:
220      {
221	struct gomp_task_icv *icv = gomp_icv (false);
222	sched = icv->run_sched_var & ~GFS_MONOTONIC;
223	switch (sched)
224	  {
225	  case GFS_STATIC:
226	  case GFS_DYNAMIC:
227	  case GFS_GUIDED:
228	    *chunk_size = icv->run_sched_chunk_size;
229	    break;
230	  case GFS_AUTO:
231	    sched = GFS_STATIC;
232	    *chunk_size = 0;
233	    break;
234	  default:
235	    abort ();
236	  }
237	return sched;
238      }
239    default:
240      abort ();
241    }
242}
243
244bool
245GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end,
246		     gomp_ull incr, long sched, gomp_ull chunk_size,
247		     gomp_ull *istart, gomp_ull *iend,
248		     uintptr_t *reductions, void **mem)
249{
250  struct gomp_thread *thr = gomp_thread ();
251
252  thr->ts.static_trip = 0;
253  if (reductions)
254    gomp_workshare_taskgroup_start ();
255  if (gomp_work_share_start (0))
256    {
257      sched = gomp_adjust_sched (sched, &chunk_size);
258      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
259      			  sched, chunk_size);
260      if (reductions)
261	{
262	  GOMP_taskgroup_reduction_register (reductions);
263	  thr->task->taskgroup->workshare = true;
264	  thr->ts.work_share->task_reductions = reductions;
265	}
266      if (mem)
267	{
268	  uintptr_t size = (uintptr_t) *mem;
269#define INLINE_ORDERED_TEAM_IDS_OFF \
270  ((offsetof (struct gomp_work_share, inline_ordered_team_ids)		\
271    + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1))
272	  if (size > (sizeof (struct gomp_work_share)
273		      - INLINE_ORDERED_TEAM_IDS_OFF))
274	    *mem
275	      = (void *) (thr->ts.work_share->ordered_team_ids
276			  = gomp_malloc_cleared (size));
277	  else
278	    *mem = memset (((char *) thr->ts.work_share)
279			   + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size);
280	}
281      gomp_work_share_init_done ();
282    }
283  else
284    {
285      if (reductions)
286	{
287	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
288	  gomp_workshare_task_reduction_register (reductions,
289						  first_reductions);
290	}
291      if (mem)
292	{
293	  if ((offsetof (struct gomp_work_share, inline_ordered_team_ids)
294	       & (__alignof__ (long long) - 1)) == 0)
295	    *mem = (void *) thr->ts.work_share->ordered_team_ids;
296	  else
297	    {
298	      uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids;
299	      p += __alignof__ (long long) - 1;
300	      p &= ~(__alignof__ (long long) - 1);
301	      *mem = (void *) p;
302	    }
303	}
304    }
305
306  return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
307}
308
309/* The *_ordered_*_start routines are similar.  The only difference is that
310   this work-share construct is initialized to expect an ORDERED section.  */
311
312static bool
313gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
314				    gomp_ull incr, gomp_ull chunk_size,
315				    gomp_ull *istart, gomp_ull *iend)
316{
317  struct gomp_thread *thr = gomp_thread ();
318
319  thr->ts.static_trip = 0;
320  if (gomp_work_share_start (1))
321    {
322      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
323			  GFS_STATIC, chunk_size);
324      gomp_ordered_static_init ();
325      gomp_work_share_init_done ();
326    }
327
328  return !gomp_iter_ull_static_next (istart, iend);
329}
330
331static bool
332gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
333				     gomp_ull incr, gomp_ull chunk_size,
334				     gomp_ull *istart, gomp_ull *iend)
335{
336  struct gomp_thread *thr = gomp_thread ();
337  bool ret;
338
339  if (gomp_work_share_start (1))
340    {
341      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
342			  GFS_DYNAMIC, chunk_size);
343      gomp_mutex_lock (&thr->ts.work_share->lock);
344      gomp_work_share_init_done ();
345    }
346  else
347    gomp_mutex_lock (&thr->ts.work_share->lock);
348
349  ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
350  if (ret)
351    gomp_ordered_first ();
352  gomp_mutex_unlock (&thr->ts.work_share->lock);
353
354  return ret;
355}
356
357static bool
358gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
359				    gomp_ull incr, gomp_ull chunk_size,
360				    gomp_ull *istart, gomp_ull *iend)
361{
362  struct gomp_thread *thr = gomp_thread ();
363  bool ret;
364
365  if (gomp_work_share_start (1))
366    {
367      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
368			  GFS_GUIDED, chunk_size);
369      gomp_mutex_lock (&thr->ts.work_share->lock);
370      gomp_work_share_init_done ();
371    }
372  else
373    gomp_mutex_lock (&thr->ts.work_share->lock);
374
375  ret = gomp_iter_ull_guided_next_locked (istart, iend);
376  if (ret)
377    gomp_ordered_first ();
378  gomp_mutex_unlock (&thr->ts.work_share->lock);
379
380  return ret;
381}
382
383bool
384GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end,
385				     gomp_ull incr, gomp_ull *istart,
386				     gomp_ull *iend)
387{
388  struct gomp_task_icv *icv = gomp_icv (false);
389  switch (icv->run_sched_var & ~GFS_MONOTONIC)
390    {
391    case GFS_STATIC:
392      return gomp_loop_ull_ordered_static_start (up, start, end, incr,
393						 icv->run_sched_chunk_size,
394						 istart, iend);
395    case GFS_DYNAMIC:
396      return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr,
397						  icv->run_sched_chunk_size,
398						  istart, iend);
399    case GFS_GUIDED:
400      return gomp_loop_ull_ordered_guided_start (up, start, end, incr,
401						 icv->run_sched_chunk_size,
402						 istart, iend);
403    case GFS_AUTO:
404      /* For now map to schedule(static), later on we could play with feedback
405	 driven choice.  */
406      return gomp_loop_ull_ordered_static_start (up, start, end, incr,
407						 0, istart, iend);
408    default:
409      abort ();
410    }
411}
412
413bool
414GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end,
415			     gomp_ull incr, long sched, gomp_ull chunk_size,
416			     gomp_ull *istart, gomp_ull *iend,
417			     uintptr_t *reductions, void **mem)
418{
419  struct gomp_thread *thr = gomp_thread ();
420  size_t ordered = 1;
421  bool ret;
422
423  thr->ts.static_trip = 0;
424  if (reductions)
425    gomp_workshare_taskgroup_start ();
426  if (mem)
427    ordered += (uintptr_t) *mem;
428  if (gomp_work_share_start (ordered))
429    {
430      sched = gomp_adjust_sched (sched, &chunk_size);
431      gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr,
432			  sched, chunk_size);
433      if (reductions)
434	{
435	  GOMP_taskgroup_reduction_register (reductions);
436	  thr->task->taskgroup->workshare = true;
437	  thr->ts.work_share->task_reductions = reductions;
438	}
439      if (sched == GFS_STATIC)
440	gomp_ordered_static_init ();
441      else
442	gomp_mutex_lock (&thr->ts.work_share->lock);
443      gomp_work_share_init_done ();
444    }
445  else
446    {
447      if (reductions)
448	{
449	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
450	  gomp_workshare_task_reduction_register (reductions,
451						  first_reductions);
452	}
453      sched = thr->ts.work_share->sched;
454      if (sched != GFS_STATIC)
455	gomp_mutex_lock (&thr->ts.work_share->lock);
456    }
457
458  if (mem)
459    {
460      uintptr_t p
461	= (uintptr_t) (thr->ts.work_share->ordered_team_ids
462		       + (thr->ts.team ? thr->ts.team->nthreads : 1));
463      p += __alignof__ (long long) - 1;
464      p &= ~(__alignof__ (long long) - 1);
465      *mem = (void *) p;
466    }
467
468  switch (sched)
469    {
470    case GFS_STATIC:
471    case GFS_AUTO:
472      return !gomp_iter_ull_static_next (istart, iend);
473    case GFS_DYNAMIC:
474      ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
475      break;
476    case GFS_GUIDED:
477      ret = gomp_iter_ull_guided_next_locked (istart, iend);
478      break;
479    default:
480      abort ();
481    }
482
483  if (ret)
484    gomp_ordered_first ();
485  gomp_mutex_unlock (&thr->ts.work_share->lock);
486  return ret;
487}
488
489/* The *_doacross_*_start routines are similar.  The only difference is that
490   this work-share construct is initialized to expect an ORDERED(N) - DOACROSS
491   section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1
492   and other COUNTS array elements tell the library number of iterations
493   in the ordered inner loops.  */
494
495static bool
496gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
497				     gomp_ull chunk_size, gomp_ull *istart,
498				     gomp_ull *iend)
499{
500  struct gomp_thread *thr = gomp_thread ();
501
502  thr->ts.static_trip = 0;
503  if (gomp_work_share_start (0))
504    {
505      gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
506			  GFS_STATIC, chunk_size);
507      gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
508      gomp_work_share_init_done ();
509    }
510
511  return !gomp_iter_ull_static_next (istart, iend);
512}
513
514static bool
515gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
516				      gomp_ull chunk_size, gomp_ull *istart,
517				      gomp_ull *iend)
518{
519  struct gomp_thread *thr = gomp_thread ();
520  bool ret;
521
522  if (gomp_work_share_start (0))
523    {
524      gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
525			  GFS_DYNAMIC, chunk_size);
526      gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
527      gomp_work_share_init_done ();
528    }
529
530#if defined HAVE_SYNC_BUILTINS && defined __LP64__
531  ret = gomp_iter_ull_dynamic_next (istart, iend);
532#else
533  gomp_mutex_lock (&thr->ts.work_share->lock);
534  ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
535  gomp_mutex_unlock (&thr->ts.work_share->lock);
536#endif
537
538  return ret;
539}
540
541static bool
542gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
543				     gomp_ull chunk_size, gomp_ull *istart,
544				     gomp_ull *iend)
545{
546  struct gomp_thread *thr = gomp_thread ();
547  bool ret;
548
549  if (gomp_work_share_start (0))
550    {
551      gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
552			  GFS_GUIDED, chunk_size);
553      gomp_doacross_ull_init (ncounts, counts, chunk_size, 0);
554      gomp_work_share_init_done ();
555    }
556
557#if defined HAVE_SYNC_BUILTINS && defined __LP64__
558  ret = gomp_iter_ull_guided_next (istart, iend);
559#else
560  gomp_mutex_lock (&thr->ts.work_share->lock);
561  ret = gomp_iter_ull_guided_next_locked (istart, iend);
562  gomp_mutex_unlock (&thr->ts.work_share->lock);
563#endif
564
565  return ret;
566}
567
568bool
569GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts,
570				      gomp_ull *istart, gomp_ull *iend)
571{
572  struct gomp_task_icv *icv = gomp_icv (false);
573  switch (icv->run_sched_var & ~GFS_MONOTONIC)
574    {
575    case GFS_STATIC:
576      return gomp_loop_ull_doacross_static_start (ncounts, counts,
577						  icv->run_sched_chunk_size,
578						  istart, iend);
579    case GFS_DYNAMIC:
580      return gomp_loop_ull_doacross_dynamic_start (ncounts, counts,
581						   icv->run_sched_chunk_size,
582						   istart, iend);
583    case GFS_GUIDED:
584      return gomp_loop_ull_doacross_guided_start (ncounts, counts,
585						  icv->run_sched_chunk_size,
586						  istart, iend);
587    case GFS_AUTO:
588      /* For now map to schedule(static), later on we could play with feedback
589	 driven choice.  */
590      return gomp_loop_ull_doacross_static_start (ncounts, counts,
591						  0, istart, iend);
592    default:
593      abort ();
594    }
595}
596
597bool
598GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts,
599			      long sched, gomp_ull chunk_size,
600			      gomp_ull *istart, gomp_ull *iend,
601			      uintptr_t *reductions, void **mem)
602{
603  struct gomp_thread *thr = gomp_thread ();
604
605  thr->ts.static_trip = 0;
606  if (reductions)
607    gomp_workshare_taskgroup_start ();
608  if (gomp_work_share_start (0))
609    {
610      size_t extra = 0;
611      if (mem)
612	extra = (uintptr_t) *mem;
613      sched = gomp_adjust_sched (sched, &chunk_size);
614      gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1,
615			  sched, chunk_size);
616      gomp_doacross_ull_init (ncounts, counts, chunk_size, extra);
617      if (reductions)
618	{
619	  GOMP_taskgroup_reduction_register (reductions);
620	  thr->task->taskgroup->workshare = true;
621	  thr->ts.work_share->task_reductions = reductions;
622	}
623      gomp_work_share_init_done ();
624    }
625  else
626    {
627      if (reductions)
628	{
629	  uintptr_t *first_reductions = thr->ts.work_share->task_reductions;
630	  gomp_workshare_task_reduction_register (reductions,
631						  first_reductions);
632	}
633      sched = thr->ts.work_share->sched;
634    }
635
636  if (mem)
637    *mem = thr->ts.work_share->doacross->extra;
638
639  return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend);
640}
641
642/* The *_next routines are called when the thread completes processing of
643   the iteration block currently assigned to it.  If the work-share
644   construct is bound directly to a parallel construct, then the iteration
645   bounds may have been set up before the parallel.  In which case, this
646   may be the first iteration for the thread.
647
648   Returns true if there is work remaining to be performed; *ISTART and
649   *IEND are filled with a new iteration block.  Returns false if all work
650   has been assigned.  */
651
652static bool
653gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
654{
655  return !gomp_iter_ull_static_next (istart, iend);
656}
657
658static bool
659gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
660{
661  bool ret;
662
663#if defined HAVE_SYNC_BUILTINS && defined __LP64__
664  ret = gomp_iter_ull_dynamic_next (istart, iend);
665#else
666  struct gomp_thread *thr = gomp_thread ();
667  gomp_mutex_lock (&thr->ts.work_share->lock);
668  ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
669  gomp_mutex_unlock (&thr->ts.work_share->lock);
670#endif
671
672  return ret;
673}
674
675static bool
676gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
677{
678  bool ret;
679
680#if defined HAVE_SYNC_BUILTINS && defined __LP64__
681  ret = gomp_iter_ull_guided_next (istart, iend);
682#else
683  struct gomp_thread *thr = gomp_thread ();
684  gomp_mutex_lock (&thr->ts.work_share->lock);
685  ret = gomp_iter_ull_guided_next_locked (istart, iend);
686  gomp_mutex_unlock (&thr->ts.work_share->lock);
687#endif
688
689  return ret;
690}
691
692bool
693GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend)
694{
695  struct gomp_thread *thr = gomp_thread ();
696
697  switch (thr->ts.work_share->sched)
698    {
699    case GFS_STATIC:
700    case GFS_AUTO:
701      return gomp_loop_ull_static_next (istart, iend);
702    case GFS_DYNAMIC:
703      return gomp_loop_ull_dynamic_next (istart, iend);
704    case GFS_GUIDED:
705      return gomp_loop_ull_guided_next (istart, iend);
706    default:
707      abort ();
708    }
709}
710
711/* The *_ordered_*_next routines are called when the thread completes
712   processing of the iteration block currently assigned to it.
713
714   Returns true if there is work remaining to be performed; *ISTART and
715   *IEND are filled with a new iteration block.  Returns false if all work
716   has been assigned.  */
717
718static bool
719gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
720{
721  struct gomp_thread *thr = gomp_thread ();
722  int test;
723
724  gomp_ordered_sync ();
725  gomp_mutex_lock (&thr->ts.work_share->lock);
726  test = gomp_iter_ull_static_next (istart, iend);
727  if (test >= 0)
728    gomp_ordered_static_next ();
729  gomp_mutex_unlock (&thr->ts.work_share->lock);
730
731  return test == 0;
732}
733
734static bool
735gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
736{
737  struct gomp_thread *thr = gomp_thread ();
738  bool ret;
739
740  gomp_ordered_sync ();
741  gomp_mutex_lock (&thr->ts.work_share->lock);
742  ret = gomp_iter_ull_dynamic_next_locked (istart, iend);
743  if (ret)
744    gomp_ordered_next ();
745  else
746    gomp_ordered_last ();
747  gomp_mutex_unlock (&thr->ts.work_share->lock);
748
749  return ret;
750}
751
752static bool
753gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
754{
755  struct gomp_thread *thr = gomp_thread ();
756  bool ret;
757
758  gomp_ordered_sync ();
759  gomp_mutex_lock (&thr->ts.work_share->lock);
760  ret = gomp_iter_ull_guided_next_locked (istart, iend);
761  if (ret)
762    gomp_ordered_next ();
763  else
764    gomp_ordered_last ();
765  gomp_mutex_unlock (&thr->ts.work_share->lock);
766
767  return ret;
768}
769
770bool
771GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend)
772{
773  struct gomp_thread *thr = gomp_thread ();
774
775  switch (thr->ts.work_share->sched)
776    {
777    case GFS_STATIC:
778    case GFS_AUTO:
779      return gomp_loop_ull_ordered_static_next (istart, iend);
780    case GFS_DYNAMIC:
781      return gomp_loop_ull_ordered_dynamic_next (istart, iend);
782    case GFS_GUIDED:
783      return gomp_loop_ull_ordered_guided_next (istart, iend);
784    default:
785      abort ();
786    }
787}
788
789/* We use static functions above so that we're sure that the "runtime"
790   function can defer to the proper routine without interposition.  We
791   export the static function with a strong alias when possible, or with
792   a wrapper function otherwise.  */
793
794#ifdef HAVE_ATTRIBUTE_ALIAS
795extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start
796	__attribute__((alias ("gomp_loop_ull_static_start")));
797extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start
798	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
799extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start
800	__attribute__((alias ("gomp_loop_ull_guided_start")));
801extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start
802	__attribute__((alias ("gomp_loop_ull_dynamic_start")));
803extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start
804	__attribute__((alias ("gomp_loop_ull_guided_start")));
805extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start
806	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
807extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start
808	__attribute__((alias ("GOMP_loop_ull_runtime_start")));
809
810extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start
811	__attribute__((alias ("gomp_loop_ull_ordered_static_start")));
812extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start
813	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_start")));
814extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start
815	__attribute__((alias ("gomp_loop_ull_ordered_guided_start")));
816
817extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start
818	__attribute__((alias ("gomp_loop_ull_doacross_static_start")));
819extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start
820	__attribute__((alias ("gomp_loop_ull_doacross_dynamic_start")));
821extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start
822	__attribute__((alias ("gomp_loop_ull_doacross_guided_start")));
823
824extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next
825	__attribute__((alias ("gomp_loop_ull_static_next")));
826extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next
827	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
828extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next
829	__attribute__((alias ("gomp_loop_ull_guided_next")));
830extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next
831	__attribute__((alias ("gomp_loop_ull_dynamic_next")));
832extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next
833	__attribute__((alias ("gomp_loop_ull_guided_next")));
834extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next
835	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
836extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next
837	__attribute__((alias ("GOMP_loop_ull_runtime_next")));
838
839extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next
840	__attribute__((alias ("gomp_loop_ull_ordered_static_next")));
841extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next
842	__attribute__((alias ("gomp_loop_ull_ordered_dynamic_next")));
843extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next
844	__attribute__((alias ("gomp_loop_ull_ordered_guided_next")));
845#else
846bool
847GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end,
848			    gomp_ull incr, gomp_ull chunk_size,
849			    gomp_ull *istart, gomp_ull *iend)
850{
851  return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart,
852				     iend);
853}
854
855bool
856GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end,
857			     gomp_ull incr, gomp_ull chunk_size,
858			     gomp_ull *istart, gomp_ull *iend)
859{
860  return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
861				      iend);
862}
863
864bool
865GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end,
866			    gomp_ull incr, gomp_ull chunk_size,
867			    gomp_ull *istart, gomp_ull *iend)
868{
869  return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
870				     iend);
871}
872
873bool
874GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start,
875					  gomp_ull end, gomp_ull incr,
876					  gomp_ull chunk_size,
877					  gomp_ull *istart, gomp_ull *iend)
878{
879  return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart,
880				      iend);
881}
882
883bool
884GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end,
885					 gomp_ull incr, gomp_ull chunk_size,
886					 gomp_ull *istart, gomp_ull *iend)
887{
888  return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart,
889				     iend);
890}
891
892bool
893GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start,
894					  gomp_ull end, gomp_ull incr,
895					  gomp_ull *istart, gomp_ull *iend)
896{
897  return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
898}
899
900bool
901GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start,
902						gomp_ull end, gomp_ull incr,
903						gomp_ull *istart,
904						gomp_ull *iend)
905{
906  return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend);
907}
908
909bool
910GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end,
911				    gomp_ull incr, gomp_ull chunk_size,
912				    gomp_ull *istart, gomp_ull *iend)
913{
914  return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size,
915					     istart, iend);
916}
917
918bool
919GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end,
920				     gomp_ull incr, gomp_ull chunk_size,
921				     gomp_ull *istart, gomp_ull *iend)
922{
923  return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size,
924					      istart, iend);
925}
926
927bool
928GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end,
929				    gomp_ull incr, gomp_ull chunk_size,
930				    gomp_ull *istart, gomp_ull *iend)
931{
932  return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size,
933					     istart, iend);
934}
935
936bool
937GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts,
938				     gomp_ull chunk_size, gomp_ull *istart,
939				     gomp_ull *iend)
940{
941  return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size,
942					      istart, iend);
943}
944
945bool
946GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts,
947				      gomp_ull chunk_size, gomp_ull *istart,
948				      gomp_ull *iend)
949{
950  return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size,
951					       istart, iend);
952}
953
954bool
955GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts,
956				     gomp_ull chunk_size, gomp_ull *istart,
957				     gomp_ull *iend)
958{
959  return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size,
960					      istart, iend);
961}
962
963bool
964GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend)
965{
966  return gomp_loop_ull_static_next (istart, iend);
967}
968
969bool
970GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend)
971{
972  return gomp_loop_ull_dynamic_next (istart, iend);
973}
974
975bool
976GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend)
977{
978  return gomp_loop_ull_guided_next (istart, iend);
979}
980
981bool
982GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend)
983{
984  return gomp_loop_ull_dynamic_next (istart, iend);
985}
986
987bool
988GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend)
989{
990  return gomp_loop_ull_guided_next (istart, iend);
991}
992
993bool
994GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend)
995{
996  return GOMP_loop_ull_runtime_next (istart, iend);
997}
998
999bool
1000GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart,
1001					       gomp_ull *iend)
1002{
1003  return GOMP_loop_ull_runtime_next (istart, iend);
1004}
1005
1006bool
1007GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend)
1008{
1009  return gomp_loop_ull_ordered_static_next (istart, iend);
1010}
1011
1012bool
1013GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend)
1014{
1015  return gomp_loop_ull_ordered_dynamic_next (istart, iend);
1016}
1017
1018bool
1019GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend)
1020{
1021  return gomp_loop_ull_ordered_guided_next (istart, iend);
1022}
1023#endif
1024