1/*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13/* Static scheduling initialization.
14
15  NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16        it may change values between parallel regions.  __kmp_max_nth
17        is the largest value __kmp_nth may take, 1 is the smallest. */
18
19#include "kmp.h"
20#include "kmp_error.h"
21#include "kmp_i18n.h"
22#include "kmp_itt.h"
23#include "kmp_stats.h"
24#include "kmp_str.h"
25
26#if OMPT_SUPPORT
27#include "ompt-specific.h"
28#endif
29
30#ifdef KMP_DEBUG
31//-------------------------------------------------------------------------
32// template for debug prints specification ( d, u, lld, llu )
33char const *traits_t<int>::spec = "d";
34char const *traits_t<unsigned int>::spec = "u";
35char const *traits_t<long long>::spec = "lld";
36char const *traits_t<unsigned long long>::spec = "llu";
37char const *traits_t<long>::spec = "ld";
38//-------------------------------------------------------------------------
39#endif
40
41#if KMP_STATS_ENABLED
42#define KMP_STATS_LOOP_END(stat)                                               \
43  {                                                                            \
44    kmp_int64 t;                                                               \
45    kmp_int64 u = (kmp_int64)(*pupper);                                        \
46    kmp_int64 l = (kmp_int64)(*plower);                                        \
47    kmp_int64 i = (kmp_int64)incr;                                             \
48    if (i == 1) {                                                              \
49      t = u - l + 1;                                                           \
50    } else if (i == -1) {                                                      \
51      t = l - u + 1;                                                           \
52    } else if (i > 0) {                                                        \
53      t = (u - l) / i + 1;                                                     \
54    } else {                                                                   \
55      t = (l - u) / (-i) + 1;                                                  \
56    }                                                                          \
57    KMP_COUNT_VALUE(stat, t);                                                  \
58    KMP_POP_PARTITIONED_TIMER();                                               \
59  }
60#else
61#define KMP_STATS_LOOP_END(stat) /* Nothing */
62#endif
63
64template <typename T>
65static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
66                                  kmp_int32 schedtype, kmp_int32 *plastiter,
67                                  T *plower, T *pupper,
68                                  typename traits_t<T>::signed_t *pstride,
69                                  typename traits_t<T>::signed_t incr,
70                                  typename traits_t<T>::signed_t chunk
71#if OMPT_SUPPORT && OMPT_OPTIONAL
72                                  ,
73                                  void *codeptr
74#endif
75                                  ) {
76  KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
77  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
78  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
79
80  typedef typename traits_t<T>::unsigned_t UT;
81  typedef typename traits_t<T>::signed_t ST;
82  /*  this all has to be changed back to TID and such.. */
83  kmp_int32 gtid = global_tid;
84  kmp_uint32 tid;
85  kmp_uint32 nth;
86  UT trip_count;
87  kmp_team_t *team;
88  kmp_info_t *th = __kmp_threads[gtid];
89
90#if OMPT_SUPPORT && OMPT_OPTIONAL
91  ompt_team_info_t *team_info = NULL;
92  ompt_task_info_t *task_info = NULL;
93  ompt_work_t ompt_work_type = ompt_work_loop;
94
95  static kmp_int8 warn = 0;
96
97  if (ompt_enabled.ompt_callback_work) {
98    // Only fully initialize variables needed by OMPT if OMPT is enabled.
99    team_info = __ompt_get_teaminfo(0, NULL);
100    task_info = __ompt_get_task_info_object(0);
101    // Determine workshare type
102    if (loc != NULL) {
103      if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
104        ompt_work_type = ompt_work_loop;
105      } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
106        ompt_work_type = ompt_work_sections;
107      } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
108        ompt_work_type = ompt_work_distribute;
109      } else {
110        kmp_int8 bool_res =
111            KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
112        if (bool_res)
113          KMP_WARNING(OmptOutdatedWorkshare);
114      }
115      KMP_DEBUG_ASSERT(ompt_work_type);
116    }
117  }
118#endif
119
120  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
121  KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
122#ifdef KMP_DEBUG
123  {
124    char *buff;
125    // create format specifiers before the debug output
126    buff = __kmp_str_format(
127        "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
128        " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
129        traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
130        traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
131    KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
132                   *pstride, incr, chunk));
133    __kmp_str_free(&buff);
134  }
135#endif
136
137  if (__kmp_env_consistency_check) {
138    __kmp_push_workshare(global_tid, ct_pdo, loc);
139    if (incr == 0) {
140      __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
141                            loc);
142    }
143  }
144  /* special handling for zero-trip loops */
145  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
146    if (plastiter != NULL)
147      *plastiter = FALSE;
148    /* leave pupper and plower set to entire iteration space */
149    *pstride = incr; /* value should never be used */
150// *plower = *pupper - incr;
151// let compiler bypass the illegal loop (like for(i=1;i<10;i--))
152// THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
153// ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
154#ifdef KMP_DEBUG
155    {
156      char *buff;
157      // create format specifiers before the debug output
158      buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
159                              "lower=%%%s upper=%%%s stride = %%%s "
160                              "signed?<%s>, loc = %%s\n",
161                              traits_t<T>::spec, traits_t<T>::spec,
162                              traits_t<ST>::spec, traits_t<T>::spec);
163      KD_TRACE(100,
164               (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
165      __kmp_str_free(&buff);
166    }
167#endif
168    KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
169
170#if OMPT_SUPPORT && OMPT_OPTIONAL
171    if (ompt_enabled.ompt_callback_work) {
172      ompt_callbacks.ompt_callback(ompt_callback_work)(
173          ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
174          &(task_info->task_data), 0, codeptr);
175    }
176#endif
177    KMP_STATS_LOOP_END(OMP_loop_static_iterations);
178    return;
179  }
180
181  // Although there are schedule enumerations above kmp_ord_upper which are not
182  // schedules for "distribute", the only ones which are useful are dynamic, so
183  // cannot be seen here, since this codepath is only executed for static
184  // schedules.
185  if (schedtype > kmp_ord_upper) {
186    // we are in DISTRIBUTE construct
187    schedtype += kmp_sch_static -
188                 kmp_distribute_static; // AC: convert to usual schedule type
189    tid = th->th.th_team->t.t_master_tid;
190    team = th->th.th_team->t.t_parent;
191  } else {
192    tid = __kmp_tid_from_gtid(global_tid);
193    team = th->th.th_team;
194  }
195
196  /* determine if "for" loop is an active worksharing construct */
197  if (team->t.t_serialized) {
198    /* serialized parallel, each thread executes whole iteration space */
199    if (plastiter != NULL)
200      *plastiter = TRUE;
201    /* leave pupper and plower set to entire iteration space */
202    *pstride =
203        (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
204
205#ifdef KMP_DEBUG
206    {
207      char *buff;
208      // create format specifiers before the debug output
209      buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
210                              "lower=%%%s upper=%%%s stride = %%%s\n",
211                              traits_t<T>::spec, traits_t<T>::spec,
212                              traits_t<ST>::spec);
213      KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
214      __kmp_str_free(&buff);
215    }
216#endif
217    KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
218
219#if OMPT_SUPPORT && OMPT_OPTIONAL
220    if (ompt_enabled.ompt_callback_work) {
221      ompt_callbacks.ompt_callback(ompt_callback_work)(
222          ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
223          &(task_info->task_data), *pstride, codeptr);
224    }
225#endif
226    KMP_STATS_LOOP_END(OMP_loop_static_iterations);
227    return;
228  }
229  nth = team->t.t_nproc;
230  if (nth == 1) {
231    if (plastiter != NULL)
232      *plastiter = TRUE;
233    *pstride =
234        (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
235#ifdef KMP_DEBUG
236    {
237      char *buff;
238      // create format specifiers before the debug output
239      buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
240                              "lower=%%%s upper=%%%s stride = %%%s\n",
241                              traits_t<T>::spec, traits_t<T>::spec,
242                              traits_t<ST>::spec);
243      KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
244      __kmp_str_free(&buff);
245    }
246#endif
247    KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
248
249#if OMPT_SUPPORT && OMPT_OPTIONAL
250    if (ompt_enabled.ompt_callback_work) {
251      ompt_callbacks.ompt_callback(ompt_callback_work)(
252          ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
253          &(task_info->task_data), *pstride, codeptr);
254    }
255#endif
256    KMP_STATS_LOOP_END(OMP_loop_static_iterations);
257    return;
258  }
259
260  /* compute trip count */
261  if (incr == 1) {
262    trip_count = *pupper - *plower + 1;
263  } else if (incr == -1) {
264    trip_count = *plower - *pupper + 1;
265  } else if (incr > 0) {
266    // upper-lower can exceed the limit of signed type
267    trip_count = (UT)(*pupper - *plower) / incr + 1;
268  } else {
269    trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
270  }
271
272#if KMP_STATS_ENABLED
273  if (KMP_MASTER_GTID(gtid)) {
274    KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
275  }
276#endif
277
278  if (__kmp_env_consistency_check) {
279    /* tripcount overflow? */
280    if (trip_count == 0 && *pupper != *plower) {
281      __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
282                            loc);
283    }
284  }
285
286  /* compute remaining parameters */
287  switch (schedtype) {
288  case kmp_sch_static: {
289    if (trip_count < nth) {
290      KMP_DEBUG_ASSERT(
291          __kmp_static == kmp_sch_static_greedy ||
292          __kmp_static ==
293              kmp_sch_static_balanced); // Unknown static scheduling type.
294      if (tid < trip_count) {
295        *pupper = *plower = *plower + tid * incr;
296      } else {
297        *plower = *pupper + incr;
298      }
299      if (plastiter != NULL)
300        *plastiter = (tid == trip_count - 1);
301    } else {
302      if (__kmp_static == kmp_sch_static_balanced) {
303        UT small_chunk = trip_count / nth;
304        UT extras = trip_count % nth;
305        *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
306        *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
307        if (plastiter != NULL)
308          *plastiter = (tid == nth - 1);
309      } else {
310        T big_chunk_inc_count =
311            (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
312        T old_upper = *pupper;
313
314        KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
315        // Unknown static scheduling type.
316
317        *plower += tid * big_chunk_inc_count;
318        *pupper = *plower + big_chunk_inc_count - incr;
319        if (incr > 0) {
320          if (*pupper < *plower)
321            *pupper = traits_t<T>::max_value;
322          if (plastiter != NULL)
323            *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
324          if (*pupper > old_upper)
325            *pupper = old_upper; // tracker C73258
326        } else {
327          if (*pupper > *plower)
328            *pupper = traits_t<T>::min_value;
329          if (plastiter != NULL)
330            *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
331          if (*pupper < old_upper)
332            *pupper = old_upper; // tracker C73258
333        }
334      }
335    }
336    *pstride = trip_count;
337    break;
338  }
339  case kmp_sch_static_chunked: {
340    ST span;
341    if (chunk < 1) {
342      chunk = 1;
343    }
344    span = chunk * incr;
345    *pstride = span * nth;
346    *plower = *plower + (span * tid);
347    *pupper = *plower + span - incr;
348    if (plastiter != NULL)
349      *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
350    break;
351  }
352  case kmp_sch_static_balanced_chunked: {
353    T old_upper = *pupper;
354    // round up to make sure the chunk is enough to cover all iterations
355    UT span = (trip_count + nth - 1) / nth;
356
357    // perform chunk adjustment
358    chunk = (span + chunk - 1) & ~(chunk - 1);
359
360    span = chunk * incr;
361    *plower = *plower + (span * tid);
362    *pupper = *plower + span - incr;
363    if (incr > 0) {
364      if (*pupper > old_upper)
365        *pupper = old_upper;
366    } else if (*pupper < old_upper)
367      *pupper = old_upper;
368
369    if (plastiter != NULL)
370      *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
371    break;
372  }
373  default:
374    KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
375    break;
376  }
377
378#if USE_ITT_BUILD
379  // Report loop metadata
380  if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
381      __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
382      team->t.t_active_level == 1) {
383    kmp_uint64 cur_chunk = chunk;
384    // Calculate chunk in case it was not specified; it is specified for
385    // kmp_sch_static_chunked
386    if (schedtype == kmp_sch_static) {
387      cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
388    }
389    // 0 - "static" schedule
390    __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
391  }
392#endif
393#ifdef KMP_DEBUG
394  {
395    char *buff;
396    // create format specifiers before the debug output
397    buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
398                            "upper=%%%s stride = %%%s signed?<%s>\n",
399                            traits_t<T>::spec, traits_t<T>::spec,
400                            traits_t<ST>::spec, traits_t<T>::spec);
401    KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
402    __kmp_str_free(&buff);
403  }
404#endif
405  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
406
407#if OMPT_SUPPORT && OMPT_OPTIONAL
408  if (ompt_enabled.ompt_callback_work) {
409    ompt_callbacks.ompt_callback(ompt_callback_work)(
410        ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
411        &(task_info->task_data), trip_count, codeptr);
412  }
413#endif
414
415  KMP_STATS_LOOP_END(OMP_loop_static_iterations);
416  return;
417}
418
419template <typename T>
420static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
421                                       kmp_int32 schedule, kmp_int32 *plastiter,
422                                       T *plower, T *pupper, T *pupperDist,
423                                       typename traits_t<T>::signed_t *pstride,
424                                       typename traits_t<T>::signed_t incr,
425                                       typename traits_t<T>::signed_t chunk) {
426  KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
427  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
428  KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
429  typedef typename traits_t<T>::unsigned_t UT;
430  typedef typename traits_t<T>::signed_t ST;
431  kmp_uint32 tid;
432  kmp_uint32 nth;
433  kmp_uint32 team_id;
434  kmp_uint32 nteams;
435  UT trip_count;
436  kmp_team_t *team;
437  kmp_info_t *th;
438
439  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
440  KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
441#ifdef KMP_DEBUG
442  {
443    char *buff;
444    // create format specifiers before the debug output
445    buff = __kmp_str_format(
446        "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
447        "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
448        traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
449        traits_t<ST>::spec, traits_t<T>::spec);
450    KD_TRACE(100,
451             (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
452    __kmp_str_free(&buff);
453  }
454#endif
455
456  if (__kmp_env_consistency_check) {
457    __kmp_push_workshare(gtid, ct_pdo, loc);
458    if (incr == 0) {
459      __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
460                            loc);
461    }
462    if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
463      // The loop is illegal.
464      // Some zero-trip loops maintained by compiler, e.g.:
465      //   for(i=10;i<0;++i) // lower >= upper - run-time check
466      //   for(i=0;i>10;--i) // lower <= upper - run-time check
467      //   for(i=0;i>10;++i) // incr > 0       - compile-time check
468      //   for(i=10;i<0;--i) // incr < 0       - compile-time check
469      // Compiler does not check the following illegal loops:
470      //   for(i=0;i<10;i+=incr) // where incr<0
471      //   for(i=10;i>0;i-=incr) // where incr<0
472      __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
473    }
474  }
475  tid = __kmp_tid_from_gtid(gtid);
476  th = __kmp_threads[gtid];
477  nth = th->th.th_team_nproc;
478  team = th->th.th_team;
479  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
480  nteams = th->th.th_teams_size.nteams;
481  team_id = team->t.t_master_tid;
482  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
483
484  // compute global trip count
485  if (incr == 1) {
486    trip_count = *pupper - *plower + 1;
487  } else if (incr == -1) {
488    trip_count = *plower - *pupper + 1;
489  } else if (incr > 0) {
490    // upper-lower can exceed the limit of signed type
491    trip_count = (UT)(*pupper - *plower) / incr + 1;
492  } else {
493    trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
494  }
495
496  *pstride = *pupper - *plower; // just in case (can be unused)
497  if (trip_count <= nteams) {
498    KMP_DEBUG_ASSERT(
499        __kmp_static == kmp_sch_static_greedy ||
500        __kmp_static ==
501            kmp_sch_static_balanced); // Unknown static scheduling type.
502    // only masters of some teams get single iteration, other threads get
503    // nothing
504    if (team_id < trip_count && tid == 0) {
505      *pupper = *pupperDist = *plower = *plower + team_id * incr;
506    } else {
507      *pupperDist = *pupper;
508      *plower = *pupper + incr; // compiler should skip loop body
509    }
510    if (plastiter != NULL)
511      *plastiter = (tid == 0 && team_id == trip_count - 1);
512  } else {
513    // Get the team's chunk first (each team gets at most one chunk)
514    if (__kmp_static == kmp_sch_static_balanced) {
515      UT chunkD = trip_count / nteams;
516      UT extras = trip_count % nteams;
517      *plower +=
518          incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
519      *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
520      if (plastiter != NULL)
521        *plastiter = (team_id == nteams - 1);
522    } else {
523      T chunk_inc_count =
524          (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
525      T upper = *pupper;
526      KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
527      // Unknown static scheduling type.
528      *plower += team_id * chunk_inc_count;
529      *pupperDist = *plower + chunk_inc_count - incr;
530      // Check/correct bounds if needed
531      if (incr > 0) {
532        if (*pupperDist < *plower)
533          *pupperDist = traits_t<T>::max_value;
534        if (plastiter != NULL)
535          *plastiter = *plower <= upper && *pupperDist > upper - incr;
536        if (*pupperDist > upper)
537          *pupperDist = upper; // tracker C73258
538        if (*plower > *pupperDist) {
539          *pupper = *pupperDist; // no iterations available for the team
540          goto end;
541        }
542      } else {
543        if (*pupperDist > *plower)
544          *pupperDist = traits_t<T>::min_value;
545        if (plastiter != NULL)
546          *plastiter = *plower >= upper && *pupperDist < upper - incr;
547        if (*pupperDist < upper)
548          *pupperDist = upper; // tracker C73258
549        if (*plower < *pupperDist) {
550          *pupper = *pupperDist; // no iterations available for the team
551          goto end;
552        }
553      }
554    }
555    // Get the parallel loop chunk now (for thread)
556    // compute trip count for team's chunk
557    if (incr == 1) {
558      trip_count = *pupperDist - *plower + 1;
559    } else if (incr == -1) {
560      trip_count = *plower - *pupperDist + 1;
561    } else if (incr > 1) {
562      // upper-lower can exceed the limit of signed type
563      trip_count = (UT)(*pupperDist - *plower) / incr + 1;
564    } else {
565      trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
566    }
567    KMP_DEBUG_ASSERT(trip_count);
568    switch (schedule) {
569    case kmp_sch_static: {
570      if (trip_count <= nth) {
571        KMP_DEBUG_ASSERT(
572            __kmp_static == kmp_sch_static_greedy ||
573            __kmp_static ==
574                kmp_sch_static_balanced); // Unknown static scheduling type.
575        if (tid < trip_count)
576          *pupper = *plower = *plower + tid * incr;
577        else
578          *plower = *pupper + incr; // no iterations available
579        if (plastiter != NULL)
580          if (*plastiter != 0 && !(tid == trip_count - 1))
581            *plastiter = 0;
582      } else {
583        if (__kmp_static == kmp_sch_static_balanced) {
584          UT chunkL = trip_count / nth;
585          UT extras = trip_count % nth;
586          *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
587          *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
588          if (plastiter != NULL)
589            if (*plastiter != 0 && !(tid == nth - 1))
590              *plastiter = 0;
591        } else {
592          T chunk_inc_count =
593              (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
594          T upper = *pupperDist;
595          KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
596          // Unknown static scheduling type.
597          *plower += tid * chunk_inc_count;
598          *pupper = *plower + chunk_inc_count - incr;
599          if (incr > 0) {
600            if (*pupper < *plower)
601              *pupper = traits_t<T>::max_value;
602            if (plastiter != NULL)
603              if (*plastiter != 0 &&
604                  !(*plower <= upper && *pupper > upper - incr))
605                *plastiter = 0;
606            if (*pupper > upper)
607              *pupper = upper; // tracker C73258
608          } else {
609            if (*pupper > *plower)
610              *pupper = traits_t<T>::min_value;
611            if (plastiter != NULL)
612              if (*plastiter != 0 &&
613                  !(*plower >= upper && *pupper < upper - incr))
614                *plastiter = 0;
615            if (*pupper < upper)
616              *pupper = upper; // tracker C73258
617          }
618        }
619      }
620      break;
621    }
622    case kmp_sch_static_chunked: {
623      ST span;
624      if (chunk < 1)
625        chunk = 1;
626      span = chunk * incr;
627      *pstride = span * nth;
628      *plower = *plower + (span * tid);
629      *pupper = *plower + span - incr;
630      if (plastiter != NULL)
631        if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
632          *plastiter = 0;
633      break;
634    }
635    default:
636      KMP_ASSERT2(0,
637                  "__kmpc_dist_for_static_init: unknown loop scheduling type");
638      break;
639    }
640  }
641end:;
642#ifdef KMP_DEBUG
643  {
644    char *buff;
645    // create format specifiers before the debug output
646    buff = __kmp_str_format(
647        "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
648        "stride=%%%s signed?<%s>\n",
649        traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
650        traits_t<ST>::spec, traits_t<T>::spec);
651    KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
652    __kmp_str_free(&buff);
653  }
654#endif
655  KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
656  KMP_STATS_LOOP_END(OMP_distribute_iterations);
657  return;
658}
659
660template <typename T>
661static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
662                                   kmp_int32 *p_last, T *p_lb, T *p_ub,
663                                   typename traits_t<T>::signed_t *p_st,
664                                   typename traits_t<T>::signed_t incr,
665                                   typename traits_t<T>::signed_t chunk) {
666  // The routine returns the first chunk distributed to the team and
667  // stride for next chunks calculation.
668  // Last iteration flag set for the team that will execute
669  // the last iteration of the loop.
670  // The routine is called for dist_schedule(static,chunk) only.
671  typedef typename traits_t<T>::unsigned_t UT;
672  typedef typename traits_t<T>::signed_t ST;
673  kmp_uint32 team_id;
674  kmp_uint32 nteams;
675  UT trip_count;
676  T lower;
677  T upper;
678  ST span;
679  kmp_team_t *team;
680  kmp_info_t *th;
681
682  KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
683  KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
684#ifdef KMP_DEBUG
685  {
686    char *buff;
687    // create format specifiers before the debug output
688    buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
689                            "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
690                            traits_t<T>::spec, traits_t<T>::spec,
691                            traits_t<ST>::spec, traits_t<ST>::spec,
692                            traits_t<T>::spec);
693    KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
694    __kmp_str_free(&buff);
695  }
696#endif
697
698  lower = *p_lb;
699  upper = *p_ub;
700  if (__kmp_env_consistency_check) {
701    if (incr == 0) {
702      __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
703                            loc);
704    }
705    if (incr > 0 ? (upper < lower) : (lower < upper)) {
706      // The loop is illegal.
707      // Some zero-trip loops maintained by compiler, e.g.:
708      //   for(i=10;i<0;++i) // lower >= upper - run-time check
709      //   for(i=0;i>10;--i) // lower <= upper - run-time check
710      //   for(i=0;i>10;++i) // incr > 0       - compile-time check
711      //   for(i=10;i<0;--i) // incr < 0       - compile-time check
712      // Compiler does not check the following illegal loops:
713      //   for(i=0;i<10;i+=incr) // where incr<0
714      //   for(i=10;i>0;i-=incr) // where incr<0
715      __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
716    }
717  }
718  th = __kmp_threads[gtid];
719  team = th->th.th_team;
720  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
721  nteams = th->th.th_teams_size.nteams;
722  team_id = team->t.t_master_tid;
723  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
724
725  // compute trip count
726  if (incr == 1) {
727    trip_count = upper - lower + 1;
728  } else if (incr == -1) {
729    trip_count = lower - upper + 1;
730  } else if (incr > 0) {
731    // upper-lower can exceed the limit of signed type
732    trip_count = (UT)(upper - lower) / incr + 1;
733  } else {
734    trip_count = (UT)(lower - upper) / (-incr) + 1;
735  }
736  if (chunk < 1)
737    chunk = 1;
738  span = chunk * incr;
739  *p_st = span * nteams;
740  *p_lb = lower + (span * team_id);
741  *p_ub = *p_lb + span - incr;
742  if (p_last != NULL)
743    *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
744  // Correct upper bound if needed
745  if (incr > 0) {
746    if (*p_ub < *p_lb) // overflow?
747      *p_ub = traits_t<T>::max_value;
748    if (*p_ub > upper)
749      *p_ub = upper; // tracker C73258
750  } else { // incr < 0
751    if (*p_ub > *p_lb)
752      *p_ub = traits_t<T>::min_value;
753    if (*p_ub < upper)
754      *p_ub = upper; // tracker C73258
755  }
756#ifdef KMP_DEBUG
757  {
758    char *buff;
759    // create format specifiers before the debug output
760    buff =
761        __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
762                         "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
763                         traits_t<T>::spec, traits_t<T>::spec,
764                         traits_t<ST>::spec, traits_t<ST>::spec);
765    KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
766    __kmp_str_free(&buff);
767  }
768#endif
769}
770
771//------------------------------------------------------------------------------
772extern "C" {
773/*!
774@ingroup WORK_SHARING
775@param    loc       Source code location
776@param    gtid      Global thread id of this thread
777@param    schedtype  Scheduling type
778@param    plastiter Pointer to the "last iteration" flag
779@param    plower    Pointer to the lower bound
780@param    pupper    Pointer to the upper bound
781@param    pstride   Pointer to the stride
782@param    incr      Loop increment
783@param    chunk     The chunk size
784
785Each of the four functions here are identical apart from the argument types.
786
787The functions compute the upper and lower bounds and stride to be used for the
788set of iterations to be executed by the current thread from the statically
789scheduled loop that is described by the initial values of the bounds, stride,
790increment and chunk size.
791
792@{
793*/
794void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
795                              kmp_int32 *plastiter, kmp_int32 *plower,
796                              kmp_int32 *pupper, kmp_int32 *pstride,
797                              kmp_int32 incr, kmp_int32 chunk) {
798  __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
799                                   pupper, pstride, incr, chunk
800#if OMPT_SUPPORT && OMPT_OPTIONAL
801                                   ,
802                                   OMPT_GET_RETURN_ADDRESS(0)
803#endif
804                                       );
805}
806
807/*!
808 See @ref __kmpc_for_static_init_4
809 */
810void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
811                               kmp_int32 schedtype, kmp_int32 *plastiter,
812                               kmp_uint32 *plower, kmp_uint32 *pupper,
813                               kmp_int32 *pstride, kmp_int32 incr,
814                               kmp_int32 chunk) {
815  __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
816                                    pupper, pstride, incr, chunk
817#if OMPT_SUPPORT && OMPT_OPTIONAL
818                                    ,
819                                    OMPT_GET_RETURN_ADDRESS(0)
820#endif
821                                        );
822}
823
824/*!
825 See @ref __kmpc_for_static_init_4
826 */
827void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
828                              kmp_int32 *plastiter, kmp_int64 *plower,
829                              kmp_int64 *pupper, kmp_int64 *pstride,
830                              kmp_int64 incr, kmp_int64 chunk) {
831  __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
832                                   pupper, pstride, incr, chunk
833#if OMPT_SUPPORT && OMPT_OPTIONAL
834                                   ,
835                                   OMPT_GET_RETURN_ADDRESS(0)
836#endif
837                                       );
838}
839
840/*!
841 See @ref __kmpc_for_static_init_4
842 */
843void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
844                               kmp_int32 schedtype, kmp_int32 *plastiter,
845                               kmp_uint64 *plower, kmp_uint64 *pupper,
846                               kmp_int64 *pstride, kmp_int64 incr,
847                               kmp_int64 chunk) {
848  __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
849                                    pupper, pstride, incr, chunk
850#if OMPT_SUPPORT && OMPT_OPTIONAL
851                                    ,
852                                    OMPT_GET_RETURN_ADDRESS(0)
853#endif
854                                        );
855}
856/*!
857@}
858*/
859
860/*!
861@ingroup WORK_SHARING
862@param    loc       Source code location
863@param    gtid      Global thread id of this thread
864@param    schedule  Scheduling type for the parallel loop
865@param    plastiter Pointer to the "last iteration" flag
866@param    plower    Pointer to the lower bound
867@param    pupper    Pointer to the upper bound of loop chunk
868@param    pupperD   Pointer to the upper bound of dist_chunk
869@param    pstride   Pointer to the stride for parallel loop
870@param    incr      Loop increment
871@param    chunk     The chunk size for the parallel loop
872
873Each of the four functions here are identical apart from the argument types.
874
875The functions compute the upper and lower bounds and strides to be used for the
876set of iterations to be executed by the current thread from the statically
877scheduled loop that is described by the initial values of the bounds, strides,
878increment and chunks for parallel loop and distribute constructs.
879
880@{
881*/
882void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
883                                   kmp_int32 schedule, kmp_int32 *plastiter,
884                                   kmp_int32 *plower, kmp_int32 *pupper,
885                                   kmp_int32 *pupperD, kmp_int32 *pstride,
886                                   kmp_int32 incr, kmp_int32 chunk) {
887  __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
888                                        pupper, pupperD, pstride, incr, chunk);
889}
890
891/*!
892 See @ref __kmpc_dist_for_static_init_4
893 */
894void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
895                                    kmp_int32 schedule, kmp_int32 *plastiter,
896                                    kmp_uint32 *plower, kmp_uint32 *pupper,
897                                    kmp_uint32 *pupperD, kmp_int32 *pstride,
898                                    kmp_int32 incr, kmp_int32 chunk) {
899  __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
900                                         pupper, pupperD, pstride, incr, chunk);
901}
902
903/*!
904 See @ref __kmpc_dist_for_static_init_4
905 */
906void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
907                                   kmp_int32 schedule, kmp_int32 *plastiter,
908                                   kmp_int64 *plower, kmp_int64 *pupper,
909                                   kmp_int64 *pupperD, kmp_int64 *pstride,
910                                   kmp_int64 incr, kmp_int64 chunk) {
911  __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
912                                        pupper, pupperD, pstride, incr, chunk);
913}
914
915/*!
916 See @ref __kmpc_dist_for_static_init_4
917 */
918void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
919                                    kmp_int32 schedule, kmp_int32 *plastiter,
920                                    kmp_uint64 *plower, kmp_uint64 *pupper,
921                                    kmp_uint64 *pupperD, kmp_int64 *pstride,
922                                    kmp_int64 incr, kmp_int64 chunk) {
923  __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
924                                         pupper, pupperD, pstride, incr, chunk);
925}
926/*!
927@}
928*/
929
930//------------------------------------------------------------------------------
931// Auxiliary routines for Distribute Parallel Loop construct implementation
932//    Transfer call to template< type T >
933//    __kmp_team_static_init( ident_t *loc, int gtid,
934//        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
935
936/*!
937@ingroup WORK_SHARING
938@{
939@param loc Source location
940@param gtid Global thread id
941@param p_last pointer to last iteration flag
942@param p_lb  pointer to Lower bound
943@param p_ub  pointer to Upper bound
944@param p_st  Step (or increment if you prefer)
945@param incr  Loop increment
946@param chunk The chunk size to block with
947
948The functions compute the upper and lower bounds and stride to be used for the
949set of iterations to be executed by the current team from the statically
950scheduled loop that is described by the initial values of the bounds, stride,
951increment and chunk for the distribute construct as part of composite distribute
952parallel loop construct. These functions are all identical apart from the types
953of the arguments.
954*/
955
956void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
957                               kmp_int32 *p_lb, kmp_int32 *p_ub,
958                               kmp_int32 *p_st, kmp_int32 incr,
959                               kmp_int32 chunk) {
960  KMP_DEBUG_ASSERT(__kmp_init_serial);
961  __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
962                                    chunk);
963}
964
965/*!
966 See @ref __kmpc_team_static_init_4
967 */
968void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
969                                kmp_uint32 *p_lb, kmp_uint32 *p_ub,
970                                kmp_int32 *p_st, kmp_int32 incr,
971                                kmp_int32 chunk) {
972  KMP_DEBUG_ASSERT(__kmp_init_serial);
973  __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
974                                     chunk);
975}
976
977/*!
978 See @ref __kmpc_team_static_init_4
979 */
980void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
981                               kmp_int64 *p_lb, kmp_int64 *p_ub,
982                               kmp_int64 *p_st, kmp_int64 incr,
983                               kmp_int64 chunk) {
984  KMP_DEBUG_ASSERT(__kmp_init_serial);
985  __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
986                                    chunk);
987}
988
989/*!
990 See @ref __kmpc_team_static_init_4
991 */
992void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
993                                kmp_uint64 *p_lb, kmp_uint64 *p_ub,
994                                kmp_int64 *p_st, kmp_int64 incr,
995                                kmp_int64 chunk) {
996  KMP_DEBUG_ASSERT(__kmp_init_serial);
997  __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
998                                     chunk);
999}
1000/*!
1001@}
1002*/
1003
1004} // extern "C"
1005