kmp_sched.cpp revision 345153
1/*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
4
5//===----------------------------------------------------------------------===//
6//
7//                     The LLVM Compiler Infrastructure
8//
9// This file is dual licensed under the MIT and the University of Illinois Open
10// Source Licenses. See LICENSE.txt for details.
11//
12//===----------------------------------------------------------------------===//
13
14/* Static scheduling initialization.
15
16  NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
17        it may change values between parallel regions.  __kmp_max_nth
18        is the largest value __kmp_nth may take, 1 is the smallest. */
19
20#include "kmp.h"
21#include "kmp_error.h"
22#include "kmp_i18n.h"
23#include "kmp_itt.h"
24#include "kmp_stats.h"
25#include "kmp_str.h"
26
27#if OMPT_SUPPORT
28#include "ompt-specific.h"
29#endif
30
31#ifdef KMP_DEBUG
32//-------------------------------------------------------------------------
33// template for debug prints specification ( d, u, lld, llu )
34char const *traits_t<int>::spec = "d";
35char const *traits_t<unsigned int>::spec = "u";
36char const *traits_t<long long>::spec = "lld";
37char const *traits_t<unsigned long long>::spec = "llu";
38char const *traits_t<long>::spec = "ld";
39//-------------------------------------------------------------------------
40#endif
41
42template <typename T>
43static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
44                                  kmp_int32 schedtype, kmp_int32 *plastiter,
45                                  T *plower, T *pupper,
46                                  typename traits_t<T>::signed_t *pstride,
47                                  typename traits_t<T>::signed_t incr,
48                                  typename traits_t<T>::signed_t chunk
49#if OMPT_SUPPORT && OMPT_OPTIONAL
50                                  ,
51                                  void *codeptr
52#endif
53                                  ) {
54  KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
55  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
56  KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
57
58  typedef typename traits_t<T>::unsigned_t UT;
59  typedef typename traits_t<T>::signed_t ST;
60  /*  this all has to be changed back to TID and such.. */
61  kmp_int32 gtid = global_tid;
62  kmp_uint32 tid;
63  kmp_uint32 nth;
64  UT trip_count;
65  kmp_team_t *team;
66  kmp_info_t *th = __kmp_threads[gtid];
67
68#if OMPT_SUPPORT && OMPT_OPTIONAL
69  ompt_team_info_t *team_info = NULL;
70  ompt_task_info_t *task_info = NULL;
71  ompt_work_t ompt_work_type = ompt_work_loop;
72
73  static kmp_int8 warn = 0;
74
75  if (ompt_enabled.ompt_callback_work) {
76    // Only fully initialize variables needed by OMPT if OMPT is enabled.
77    team_info = __ompt_get_teaminfo(0, NULL);
78    task_info = __ompt_get_task_info_object(0);
79    // Determine workshare type
80    if (loc != NULL) {
81      if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
82        ompt_work_type = ompt_work_loop;
83      } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
84        ompt_work_type = ompt_work_sections;
85      } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
86        ompt_work_type = ompt_work_distribute;
87      } else {
88        kmp_int8 bool_res =
89            KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
90        if (bool_res)
91          KMP_WARNING(OmptOutdatedWorkshare);
92      }
93      KMP_DEBUG_ASSERT(ompt_work_type);
94    }
95  }
96#endif
97
98  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
99  KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
100#ifdef KMP_DEBUG
101  {
102    char *buff;
103    // create format specifiers before the debug output
104    buff = __kmp_str_format(
105        "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
106        " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
107        traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
108        traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
109    KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
110                   *pstride, incr, chunk));
111    __kmp_str_free(&buff);
112  }
113#endif
114
115  if (__kmp_env_consistency_check) {
116    __kmp_push_workshare(global_tid, ct_pdo, loc);
117    if (incr == 0) {
118      __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
119                            loc);
120    }
121  }
122  /* special handling for zero-trip loops */
123  if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
124    if (plastiter != NULL)
125      *plastiter = FALSE;
126    /* leave pupper and plower set to entire iteration space */
127    *pstride = incr; /* value should never be used */
128// *plower = *pupper - incr;
129// let compiler bypass the illegal loop (like for(i=1;i<10;i--))
130// THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
131// ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
132#ifdef KMP_DEBUG
133    {
134      char *buff;
135      // create format specifiers before the debug output
136      buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
137                              "lower=%%%s upper=%%%s stride = %%%s "
138                              "signed?<%s>, loc = %%s\n",
139                              traits_t<T>::spec, traits_t<T>::spec,
140                              traits_t<ST>::spec, traits_t<T>::spec);
141      KD_TRACE(100,
142               (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
143      __kmp_str_free(&buff);
144    }
145#endif
146    KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
147
148#if OMPT_SUPPORT && OMPT_OPTIONAL
149    if (ompt_enabled.ompt_callback_work) {
150      ompt_callbacks.ompt_callback(ompt_callback_work)(
151          ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
152          &(task_info->task_data), 0, codeptr);
153    }
154#endif
155    return;
156  }
157
158#if OMP_40_ENABLED
159  // Although there are schedule enumerations above kmp_ord_upper which are not
160  // schedules for "distribute", the only ones which are useful are dynamic, so
161  // cannot be seen here, since this codepath is only executed for static
162  // schedules.
163  if (schedtype > kmp_ord_upper) {
164    // we are in DISTRIBUTE construct
165    schedtype += kmp_sch_static -
166                 kmp_distribute_static; // AC: convert to usual schedule type
167    tid = th->th.th_team->t.t_master_tid;
168    team = th->th.th_team->t.t_parent;
169  } else
170#endif
171  {
172    tid = __kmp_tid_from_gtid(global_tid);
173    team = th->th.th_team;
174  }
175
176  /* determine if "for" loop is an active worksharing construct */
177  if (team->t.t_serialized) {
178    /* serialized parallel, each thread executes whole iteration space */
179    if (plastiter != NULL)
180      *plastiter = TRUE;
181    /* leave pupper and plower set to entire iteration space */
182    *pstride =
183        (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
184
185#ifdef KMP_DEBUG
186    {
187      char *buff;
188      // create format specifiers before the debug output
189      buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
190                              "lower=%%%s upper=%%%s stride = %%%s\n",
191                              traits_t<T>::spec, traits_t<T>::spec,
192                              traits_t<ST>::spec);
193      KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
194      __kmp_str_free(&buff);
195    }
196#endif
197    KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
198
199#if OMPT_SUPPORT && OMPT_OPTIONAL
200    if (ompt_enabled.ompt_callback_work) {
201      ompt_callbacks.ompt_callback(ompt_callback_work)(
202          ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
203          &(task_info->task_data), *pstride, codeptr);
204    }
205#endif
206    return;
207  }
208  nth = team->t.t_nproc;
209  if (nth == 1) {
210    if (plastiter != NULL)
211      *plastiter = TRUE;
212    *pstride =
213        (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
214#ifdef KMP_DEBUG
215    {
216      char *buff;
217      // create format specifiers before the debug output
218      buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
219                              "lower=%%%s upper=%%%s stride = %%%s\n",
220                              traits_t<T>::spec, traits_t<T>::spec,
221                              traits_t<ST>::spec);
222      KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
223      __kmp_str_free(&buff);
224    }
225#endif
226    KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
227
228#if OMPT_SUPPORT && OMPT_OPTIONAL
229    if (ompt_enabled.ompt_callback_work) {
230      ompt_callbacks.ompt_callback(ompt_callback_work)(
231          ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
232          &(task_info->task_data), *pstride, codeptr);
233    }
234#endif
235    return;
236  }
237
238  /* compute trip count */
239  if (incr == 1) {
240    trip_count = *pupper - *plower + 1;
241  } else if (incr == -1) {
242    trip_count = *plower - *pupper + 1;
243  } else if (incr > 0) {
244    // upper-lower can exceed the limit of signed type
245    trip_count = (UT)(*pupper - *plower) / incr + 1;
246  } else {
247    trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
248  }
249
250  if (__kmp_env_consistency_check) {
251    /* tripcount overflow? */
252    if (trip_count == 0 && *pupper != *plower) {
253      __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
254                            loc);
255    }
256  }
257
258  /* compute remaining parameters */
259  switch (schedtype) {
260  case kmp_sch_static: {
261    if (trip_count < nth) {
262      KMP_DEBUG_ASSERT(
263          __kmp_static == kmp_sch_static_greedy ||
264          __kmp_static ==
265              kmp_sch_static_balanced); // Unknown static scheduling type.
266      if (tid < trip_count) {
267        *pupper = *plower = *plower + tid * incr;
268      } else {
269        *plower = *pupper + incr;
270      }
271      if (plastiter != NULL)
272        *plastiter = (tid == trip_count - 1);
273    } else {
274      if (__kmp_static == kmp_sch_static_balanced) {
275        UT small_chunk = trip_count / nth;
276        UT extras = trip_count % nth;
277        *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
278        *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
279        if (plastiter != NULL)
280          *plastiter = (tid == nth - 1);
281      } else {
282        T big_chunk_inc_count =
283            (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
284        T old_upper = *pupper;
285
286        KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
287        // Unknown static scheduling type.
288
289        *plower += tid * big_chunk_inc_count;
290        *pupper = *plower + big_chunk_inc_count - incr;
291        if (incr > 0) {
292          if (*pupper < *plower)
293            *pupper = traits_t<T>::max_value;
294          if (plastiter != NULL)
295            *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
296          if (*pupper > old_upper)
297            *pupper = old_upper; // tracker C73258
298        } else {
299          if (*pupper > *plower)
300            *pupper = traits_t<T>::min_value;
301          if (plastiter != NULL)
302            *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
303          if (*pupper < old_upper)
304            *pupper = old_upper; // tracker C73258
305        }
306      }
307    }
308    *pstride = trip_count;
309    break;
310  }
311  case kmp_sch_static_chunked: {
312    ST span;
313    if (chunk < 1) {
314      chunk = 1;
315    }
316    span = chunk * incr;
317    *pstride = span * nth;
318    *plower = *plower + (span * tid);
319    *pupper = *plower + span - incr;
320    if (plastiter != NULL)
321      *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
322    break;
323  }
324#if OMP_45_ENABLED
325  case kmp_sch_static_balanced_chunked: {
326    T old_upper = *pupper;
327    // round up to make sure the chunk is enough to cover all iterations
328    UT span = (trip_count + nth - 1) / nth;
329
330    // perform chunk adjustment
331    chunk = (span + chunk - 1) & ~(chunk - 1);
332
333    span = chunk * incr;
334    *plower = *plower + (span * tid);
335    *pupper = *plower + span - incr;
336    if (incr > 0) {
337      if (*pupper > old_upper)
338        *pupper = old_upper;
339    } else if (*pupper < old_upper)
340      *pupper = old_upper;
341
342    if (plastiter != NULL)
343      *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
344    break;
345  }
346#endif
347  default:
348    KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
349    break;
350  }
351
352#if USE_ITT_BUILD
353  // Report loop metadata
354  if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
355      __kmp_forkjoin_frames_mode == 3 &&
356#if OMP_40_ENABLED
357      th->th.th_teams_microtask == NULL &&
358#endif
359      team->t.t_active_level == 1) {
360    kmp_uint64 cur_chunk = chunk;
361    // Calculate chunk in case it was not specified; it is specified for
362    // kmp_sch_static_chunked
363    if (schedtype == kmp_sch_static) {
364      cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
365    }
366    // 0 - "static" schedule
367    __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
368  }
369#endif
370#ifdef KMP_DEBUG
371  {
372    char *buff;
373    // create format specifiers before the debug output
374    buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
375                            "upper=%%%s stride = %%%s signed?<%s>\n",
376                            traits_t<T>::spec, traits_t<T>::spec,
377                            traits_t<ST>::spec, traits_t<T>::spec);
378    KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
379    __kmp_str_free(&buff);
380  }
381#endif
382  KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
383
384#if OMPT_SUPPORT && OMPT_OPTIONAL
385  if (ompt_enabled.ompt_callback_work) {
386    ompt_callbacks.ompt_callback(ompt_callback_work)(
387        ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
388        &(task_info->task_data), trip_count, codeptr);
389  }
390#endif
391
392#if KMP_STATS_ENABLED
393  {
394    kmp_int64 t;
395    kmp_int64 u = (kmp_int64)(*pupper);
396    kmp_int64 l = (kmp_int64)(*plower);
397    kmp_int64 i = (kmp_int64)incr;
398    /* compute trip count */
399    if (i == 1) {
400      t = u - l + 1;
401    } else if (i == -1) {
402      t = l - u + 1;
403    } else if (i > 0) {
404      t = (u - l) / i + 1;
405    } else {
406      t = (l - u) / (-i) + 1;
407    }
408    KMP_COUNT_VALUE(OMP_loop_static_iterations, t);
409    KMP_POP_PARTITIONED_TIMER();
410  }
411#endif
412  return;
413}
414
415template <typename T>
416static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
417                                       kmp_int32 schedule, kmp_int32 *plastiter,
418                                       T *plower, T *pupper, T *pupperDist,
419                                       typename traits_t<T>::signed_t *pstride,
420                                       typename traits_t<T>::signed_t incr,
421                                       typename traits_t<T>::signed_t chunk) {
422  KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
423  typedef typename traits_t<T>::unsigned_t UT;
424  typedef typename traits_t<T>::signed_t ST;
425  kmp_uint32 tid;
426  kmp_uint32 nth;
427  kmp_uint32 team_id;
428  kmp_uint32 nteams;
429  UT trip_count;
430  kmp_team_t *team;
431  kmp_info_t *th;
432
433  KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
434  KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
435#ifdef KMP_DEBUG
436  {
437    char *buff;
438    // create format specifiers before the debug output
439    buff = __kmp_str_format(
440        "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
441        "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
442        traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
443        traits_t<ST>::spec, traits_t<T>::spec);
444    KD_TRACE(100,
445             (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
446    __kmp_str_free(&buff);
447  }
448#endif
449
450  if (__kmp_env_consistency_check) {
451    __kmp_push_workshare(gtid, ct_pdo, loc);
452    if (incr == 0) {
453      __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
454                            loc);
455    }
456    if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
457      // The loop is illegal.
458      // Some zero-trip loops maintained by compiler, e.g.:
459      //   for(i=10;i<0;++i) // lower >= upper - run-time check
460      //   for(i=0;i>10;--i) // lower <= upper - run-time check
461      //   for(i=0;i>10;++i) // incr > 0       - compile-time check
462      //   for(i=10;i<0;--i) // incr < 0       - compile-time check
463      // Compiler does not check the following illegal loops:
464      //   for(i=0;i<10;i+=incr) // where incr<0
465      //   for(i=10;i>0;i-=incr) // where incr<0
466      __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
467    }
468  }
469  tid = __kmp_tid_from_gtid(gtid);
470  th = __kmp_threads[gtid];
471  nth = th->th.th_team_nproc;
472  team = th->th.th_team;
473#if OMP_40_ENABLED
474  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
475  nteams = th->th.th_teams_size.nteams;
476#endif
477  team_id = team->t.t_master_tid;
478  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
479
480  // compute global trip count
481  if (incr == 1) {
482    trip_count = *pupper - *plower + 1;
483  } else if (incr == -1) {
484    trip_count = *plower - *pupper + 1;
485  } else if (incr > 0) {
486    // upper-lower can exceed the limit of signed type
487    trip_count = (UT)(*pupper - *plower) / incr + 1;
488  } else {
489    trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
490  }
491
492  *pstride = *pupper - *plower; // just in case (can be unused)
493  if (trip_count <= nteams) {
494    KMP_DEBUG_ASSERT(
495        __kmp_static == kmp_sch_static_greedy ||
496        __kmp_static ==
497            kmp_sch_static_balanced); // Unknown static scheduling type.
498    // only masters of some teams get single iteration, other threads get
499    // nothing
500    if (team_id < trip_count && tid == 0) {
501      *pupper = *pupperDist = *plower = *plower + team_id * incr;
502    } else {
503      *pupperDist = *pupper;
504      *plower = *pupper + incr; // compiler should skip loop body
505    }
506    if (plastiter != NULL)
507      *plastiter = (tid == 0 && team_id == trip_count - 1);
508  } else {
509    // Get the team's chunk first (each team gets at most one chunk)
510    if (__kmp_static == kmp_sch_static_balanced) {
511      UT chunkD = trip_count / nteams;
512      UT extras = trip_count % nteams;
513      *plower +=
514          incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
515      *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
516      if (plastiter != NULL)
517        *plastiter = (team_id == nteams - 1);
518    } else {
519      T chunk_inc_count =
520          (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
521      T upper = *pupper;
522      KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
523      // Unknown static scheduling type.
524      *plower += team_id * chunk_inc_count;
525      *pupperDist = *plower + chunk_inc_count - incr;
526      // Check/correct bounds if needed
527      if (incr > 0) {
528        if (*pupperDist < *plower)
529          *pupperDist = traits_t<T>::max_value;
530        if (plastiter != NULL)
531          *plastiter = *plower <= upper && *pupperDist > upper - incr;
532        if (*pupperDist > upper)
533          *pupperDist = upper; // tracker C73258
534        if (*plower > *pupperDist) {
535          *pupper = *pupperDist; // no iterations available for the team
536          goto end;
537        }
538      } else {
539        if (*pupperDist > *plower)
540          *pupperDist = traits_t<T>::min_value;
541        if (plastiter != NULL)
542          *plastiter = *plower >= upper && *pupperDist < upper - incr;
543        if (*pupperDist < upper)
544          *pupperDist = upper; // tracker C73258
545        if (*plower < *pupperDist) {
546          *pupper = *pupperDist; // no iterations available for the team
547          goto end;
548        }
549      }
550    }
551    // Get the parallel loop chunk now (for thread)
552    // compute trip count for team's chunk
553    if (incr == 1) {
554      trip_count = *pupperDist - *plower + 1;
555    } else if (incr == -1) {
556      trip_count = *plower - *pupperDist + 1;
557    } else if (incr > 1) {
558      // upper-lower can exceed the limit of signed type
559      trip_count = (UT)(*pupperDist - *plower) / incr + 1;
560    } else {
561      trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
562    }
563    KMP_DEBUG_ASSERT(trip_count);
564    switch (schedule) {
565    case kmp_sch_static: {
566      if (trip_count <= nth) {
567        KMP_DEBUG_ASSERT(
568            __kmp_static == kmp_sch_static_greedy ||
569            __kmp_static ==
570                kmp_sch_static_balanced); // Unknown static scheduling type.
571        if (tid < trip_count)
572          *pupper = *plower = *plower + tid * incr;
573        else
574          *plower = *pupper + incr; // no iterations available
575        if (plastiter != NULL)
576          if (*plastiter != 0 && !(tid == trip_count - 1))
577            *plastiter = 0;
578      } else {
579        if (__kmp_static == kmp_sch_static_balanced) {
580          UT chunkL = trip_count / nth;
581          UT extras = trip_count % nth;
582          *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
583          *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
584          if (plastiter != NULL)
585            if (*plastiter != 0 && !(tid == nth - 1))
586              *plastiter = 0;
587        } else {
588          T chunk_inc_count =
589              (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
590          T upper = *pupperDist;
591          KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
592          // Unknown static scheduling type.
593          *plower += tid * chunk_inc_count;
594          *pupper = *plower + chunk_inc_count - incr;
595          if (incr > 0) {
596            if (*pupper < *plower)
597              *pupper = traits_t<T>::max_value;
598            if (plastiter != NULL)
599              if (*plastiter != 0 &&
600                  !(*plower <= upper && *pupper > upper - incr))
601                *plastiter = 0;
602            if (*pupper > upper)
603              *pupper = upper; // tracker C73258
604          } else {
605            if (*pupper > *plower)
606              *pupper = traits_t<T>::min_value;
607            if (plastiter != NULL)
608              if (*plastiter != 0 &&
609                  !(*plower >= upper && *pupper < upper - incr))
610                *plastiter = 0;
611            if (*pupper < upper)
612              *pupper = upper; // tracker C73258
613          }
614        }
615      }
616      break;
617    }
618    case kmp_sch_static_chunked: {
619      ST span;
620      if (chunk < 1)
621        chunk = 1;
622      span = chunk * incr;
623      *pstride = span * nth;
624      *plower = *plower + (span * tid);
625      *pupper = *plower + span - incr;
626      if (plastiter != NULL)
627        if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
628          *plastiter = 0;
629      break;
630    }
631    default:
632      KMP_ASSERT2(0,
633                  "__kmpc_dist_for_static_init: unknown loop scheduling type");
634      break;
635    }
636  }
637end:;
638#ifdef KMP_DEBUG
639  {
640    char *buff;
641    // create format specifiers before the debug output
642    buff = __kmp_str_format(
643        "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
644        "stride=%%%s signed?<%s>\n",
645        traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
646        traits_t<ST>::spec, traits_t<T>::spec);
647    KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
648    __kmp_str_free(&buff);
649  }
650#endif
651  KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
652  return;
653}
654
655template <typename T>
656static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
657                                   kmp_int32 *p_last, T *p_lb, T *p_ub,
658                                   typename traits_t<T>::signed_t *p_st,
659                                   typename traits_t<T>::signed_t incr,
660                                   typename traits_t<T>::signed_t chunk) {
661  // The routine returns the first chunk distributed to the team and
662  // stride for next chunks calculation.
663  // Last iteration flag set for the team that will execute
664  // the last iteration of the loop.
665  // The routine is called for dist_schedue(static,chunk) only.
666  typedef typename traits_t<T>::unsigned_t UT;
667  typedef typename traits_t<T>::signed_t ST;
668  kmp_uint32 team_id;
669  kmp_uint32 nteams;
670  UT trip_count;
671  T lower;
672  T upper;
673  ST span;
674  kmp_team_t *team;
675  kmp_info_t *th;
676
677  KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
678  KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
679#ifdef KMP_DEBUG
680  {
681    char *buff;
682    // create format specifiers before the debug output
683    buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
684                            "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
685                            traits_t<T>::spec, traits_t<T>::spec,
686                            traits_t<ST>::spec, traits_t<ST>::spec,
687                            traits_t<T>::spec);
688    KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
689    __kmp_str_free(&buff);
690  }
691#endif
692
693  lower = *p_lb;
694  upper = *p_ub;
695  if (__kmp_env_consistency_check) {
696    if (incr == 0) {
697      __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
698                            loc);
699    }
700    if (incr > 0 ? (upper < lower) : (lower < upper)) {
701      // The loop is illegal.
702      // Some zero-trip loops maintained by compiler, e.g.:
703      //   for(i=10;i<0;++i) // lower >= upper - run-time check
704      //   for(i=0;i>10;--i) // lower <= upper - run-time check
705      //   for(i=0;i>10;++i) // incr > 0       - compile-time check
706      //   for(i=10;i<0;--i) // incr < 0       - compile-time check
707      // Compiler does not check the following illegal loops:
708      //   for(i=0;i<10;i+=incr) // where incr<0
709      //   for(i=10;i>0;i-=incr) // where incr<0
710      __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
711    }
712  }
713  th = __kmp_threads[gtid];
714  team = th->th.th_team;
715#if OMP_40_ENABLED
716  KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
717  nteams = th->th.th_teams_size.nteams;
718#endif
719  team_id = team->t.t_master_tid;
720  KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
721
722  // compute trip count
723  if (incr == 1) {
724    trip_count = upper - lower + 1;
725  } else if (incr == -1) {
726    trip_count = lower - upper + 1;
727  } else if (incr > 0) {
728    // upper-lower can exceed the limit of signed type
729    trip_count = (UT)(upper - lower) / incr + 1;
730  } else {
731    trip_count = (UT)(lower - upper) / (-incr) + 1;
732  }
733  if (chunk < 1)
734    chunk = 1;
735  span = chunk * incr;
736  *p_st = span * nteams;
737  *p_lb = lower + (span * team_id);
738  *p_ub = *p_lb + span - incr;
739  if (p_last != NULL)
740    *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
741  // Correct upper bound if needed
742  if (incr > 0) {
743    if (*p_ub < *p_lb) // overflow?
744      *p_ub = traits_t<T>::max_value;
745    if (*p_ub > upper)
746      *p_ub = upper; // tracker C73258
747  } else { // incr < 0
748    if (*p_ub > *p_lb)
749      *p_ub = traits_t<T>::min_value;
750    if (*p_ub < upper)
751      *p_ub = upper; // tracker C73258
752  }
753#ifdef KMP_DEBUG
754  {
755    char *buff;
756    // create format specifiers before the debug output
757    buff =
758        __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
759                         "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
760                         traits_t<T>::spec, traits_t<T>::spec,
761                         traits_t<ST>::spec, traits_t<ST>::spec);
762    KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
763    __kmp_str_free(&buff);
764  }
765#endif
766}
767
768//------------------------------------------------------------------------------
769extern "C" {
770/*!
771@ingroup WORK_SHARING
772@param    loc       Source code location
773@param    gtid      Global thread id of this thread
774@param    schedtype  Scheduling type
775@param    plastiter Pointer to the "last iteration" flag
776@param    plower    Pointer to the lower bound
777@param    pupper    Pointer to the upper bound
778@param    pstride   Pointer to the stride
779@param    incr      Loop increment
780@param    chunk     The chunk size
781
782Each of the four functions here are identical apart from the argument types.
783
784The functions compute the upper and lower bounds and stride to be used for the
785set of iterations to be executed by the current thread from the statically
786scheduled loop that is described by the initial values of the bounds, stride,
787increment and chunk size.
788
789@{
790*/
791void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
792                              kmp_int32 *plastiter, kmp_int32 *plower,
793                              kmp_int32 *pupper, kmp_int32 *pstride,
794                              kmp_int32 incr, kmp_int32 chunk) {
795  __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
796                                   pupper, pstride, incr, chunk
797#if OMPT_SUPPORT && OMPT_OPTIONAL
798                                   ,
799                                   OMPT_GET_RETURN_ADDRESS(0)
800#endif
801                                       );
802}
803
804/*!
805 See @ref __kmpc_for_static_init_4
806 */
807void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
808                               kmp_int32 schedtype, kmp_int32 *plastiter,
809                               kmp_uint32 *plower, kmp_uint32 *pupper,
810                               kmp_int32 *pstride, kmp_int32 incr,
811                               kmp_int32 chunk) {
812  __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
813                                    pupper, pstride, incr, chunk
814#if OMPT_SUPPORT && OMPT_OPTIONAL
815                                    ,
816                                    OMPT_GET_RETURN_ADDRESS(0)
817#endif
818                                        );
819}
820
821/*!
822 See @ref __kmpc_for_static_init_4
823 */
824void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
825                              kmp_int32 *plastiter, kmp_int64 *plower,
826                              kmp_int64 *pupper, kmp_int64 *pstride,
827                              kmp_int64 incr, kmp_int64 chunk) {
828  __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
829                                   pupper, pstride, incr, chunk
830#if OMPT_SUPPORT && OMPT_OPTIONAL
831                                   ,
832                                   OMPT_GET_RETURN_ADDRESS(0)
833#endif
834                                       );
835}
836
837/*!
838 See @ref __kmpc_for_static_init_4
839 */
840void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
841                               kmp_int32 schedtype, kmp_int32 *plastiter,
842                               kmp_uint64 *plower, kmp_uint64 *pupper,
843                               kmp_int64 *pstride, kmp_int64 incr,
844                               kmp_int64 chunk) {
845  __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
846                                    pupper, pstride, incr, chunk
847#if OMPT_SUPPORT && OMPT_OPTIONAL
848                                    ,
849                                    OMPT_GET_RETURN_ADDRESS(0)
850#endif
851                                        );
852}
853/*!
854@}
855*/
856
857/*!
858@ingroup WORK_SHARING
859@param    loc       Source code location
860@param    gtid      Global thread id of this thread
861@param    schedule  Scheduling type for the parallel loop
862@param    plastiter Pointer to the "last iteration" flag
863@param    plower    Pointer to the lower bound
864@param    pupper    Pointer to the upper bound of loop chunk
865@param    pupperD   Pointer to the upper bound of dist_chunk
866@param    pstride   Pointer to the stride for parallel loop
867@param    incr      Loop increment
868@param    chunk     The chunk size for the parallel loop
869
870Each of the four functions here are identical apart from the argument types.
871
872The functions compute the upper and lower bounds and strides to be used for the
873set of iterations to be executed by the current thread from the statically
874scheduled loop that is described by the initial values of the bounds, strides,
875increment and chunks for parallel loop and distribute constructs.
876
877@{
878*/
879void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
880                                   kmp_int32 schedule, kmp_int32 *plastiter,
881                                   kmp_int32 *plower, kmp_int32 *pupper,
882                                   kmp_int32 *pupperD, kmp_int32 *pstride,
883                                   kmp_int32 incr, kmp_int32 chunk) {
884  __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
885                                        pupper, pupperD, pstride, incr, chunk);
886}
887
888/*!
889 See @ref __kmpc_dist_for_static_init_4
890 */
891void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
892                                    kmp_int32 schedule, kmp_int32 *plastiter,
893                                    kmp_uint32 *plower, kmp_uint32 *pupper,
894                                    kmp_uint32 *pupperD, kmp_int32 *pstride,
895                                    kmp_int32 incr, kmp_int32 chunk) {
896  __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
897                                         pupper, pupperD, pstride, incr, chunk);
898}
899
900/*!
901 See @ref __kmpc_dist_for_static_init_4
902 */
903void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
904                                   kmp_int32 schedule, kmp_int32 *plastiter,
905                                   kmp_int64 *plower, kmp_int64 *pupper,
906                                   kmp_int64 *pupperD, kmp_int64 *pstride,
907                                   kmp_int64 incr, kmp_int64 chunk) {
908  __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
909                                        pupper, pupperD, pstride, incr, chunk);
910}
911
912/*!
913 See @ref __kmpc_dist_for_static_init_4
914 */
915void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
916                                    kmp_int32 schedule, kmp_int32 *plastiter,
917                                    kmp_uint64 *plower, kmp_uint64 *pupper,
918                                    kmp_uint64 *pupperD, kmp_int64 *pstride,
919                                    kmp_int64 incr, kmp_int64 chunk) {
920  __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
921                                         pupper, pupperD, pstride, incr, chunk);
922}
923/*!
924@}
925*/
926
927//------------------------------------------------------------------------------
928// Auxiliary routines for Distribute Parallel Loop construct implementation
929//    Transfer call to template< type T >
930//    __kmp_team_static_init( ident_t *loc, int gtid,
931//        int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
932
933/*!
934@ingroup WORK_SHARING
935@{
936@param loc Source location
937@param gtid Global thread id
938@param p_last pointer to last iteration flag
939@param p_lb  pointer to Lower bound
940@param p_ub  pointer to Upper bound
941@param p_st  Step (or increment if you prefer)
942@param incr  Loop increment
943@param chunk The chunk size to block with
944
945The functions compute the upper and lower bounds and stride to be used for the
946set of iterations to be executed by the current team from the statically
947scheduled loop that is described by the initial values of the bounds, stride,
948increment and chunk for the distribute construct as part of composite distribute
949parallel loop construct. These functions are all identical apart from the types
950of the arguments.
951*/
952
953void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
954                               kmp_int32 *p_lb, kmp_int32 *p_ub,
955                               kmp_int32 *p_st, kmp_int32 incr,
956                               kmp_int32 chunk) {
957  KMP_DEBUG_ASSERT(__kmp_init_serial);
958  __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
959                                    chunk);
960}
961
962/*!
963 See @ref __kmpc_team_static_init_4
964 */
965void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
966                                kmp_uint32 *p_lb, kmp_uint32 *p_ub,
967                                kmp_int32 *p_st, kmp_int32 incr,
968                                kmp_int32 chunk) {
969  KMP_DEBUG_ASSERT(__kmp_init_serial);
970  __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
971                                     chunk);
972}
973
974/*!
975 See @ref __kmpc_team_static_init_4
976 */
977void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
978                               kmp_int64 *p_lb, kmp_int64 *p_ub,
979                               kmp_int64 *p_st, kmp_int64 incr,
980                               kmp_int64 chunk) {
981  KMP_DEBUG_ASSERT(__kmp_init_serial);
982  __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
983                                    chunk);
984}
985
986/*!
987 See @ref __kmpc_team_static_init_4
988 */
989void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
990                                kmp_uint64 *p_lb, kmp_uint64 *p_ub,
991                                kmp_int64 *p_st, kmp_int64 incr,
992                                kmp_int64 chunk) {
993  KMP_DEBUG_ASSERT(__kmp_init_serial);
994  __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
995                                     chunk);
996}
997/*!
998@}
999*/
1000
1001} // extern "C"
1002