kmp_sched.cpp revision 345153
1/* 2 * kmp_sched.cpp -- static scheduling -- iteration initialization 3 */ 4 5//===----------------------------------------------------------------------===// 6// 7// The LLVM Compiler Infrastructure 8// 9// This file is dual licensed under the MIT and the University of Illinois Open 10// Source Licenses. See LICENSE.txt for details. 11// 12//===----------------------------------------------------------------------===// 13 14/* Static scheduling initialization. 15 16 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however 17 it may change values between parallel regions. __kmp_max_nth 18 is the largest value __kmp_nth may take, 1 is the smallest. */ 19 20#include "kmp.h" 21#include "kmp_error.h" 22#include "kmp_i18n.h" 23#include "kmp_itt.h" 24#include "kmp_stats.h" 25#include "kmp_str.h" 26 27#if OMPT_SUPPORT 28#include "ompt-specific.h" 29#endif 30 31#ifdef KMP_DEBUG 32//------------------------------------------------------------------------- 33// template for debug prints specification ( d, u, lld, llu ) 34char const *traits_t<int>::spec = "d"; 35char const *traits_t<unsigned int>::spec = "u"; 36char const *traits_t<long long>::spec = "lld"; 37char const *traits_t<unsigned long long>::spec = "llu"; 38char const *traits_t<long>::spec = "ld"; 39//------------------------------------------------------------------------- 40#endif 41 42template <typename T> 43static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, 44 kmp_int32 schedtype, kmp_int32 *plastiter, 45 T *plower, T *pupper, 46 typename traits_t<T>::signed_t *pstride, 47 typename traits_t<T>::signed_t incr, 48 typename traits_t<T>::signed_t chunk 49#if OMPT_SUPPORT && OMPT_OPTIONAL 50 , 51 void *codeptr 52#endif 53 ) { 54 KMP_COUNT_BLOCK(OMP_LOOP_STATIC); 55 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static); 56 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling); 57 58 typedef typename traits_t<T>::unsigned_t UT; 59 typedef typename traits_t<T>::signed_t ST; 60 /* this all has to be changed back to TID and such.. */ 61 kmp_int32 gtid = global_tid; 62 kmp_uint32 tid; 63 kmp_uint32 nth; 64 UT trip_count; 65 kmp_team_t *team; 66 kmp_info_t *th = __kmp_threads[gtid]; 67 68#if OMPT_SUPPORT && OMPT_OPTIONAL 69 ompt_team_info_t *team_info = NULL; 70 ompt_task_info_t *task_info = NULL; 71 ompt_work_t ompt_work_type = ompt_work_loop; 72 73 static kmp_int8 warn = 0; 74 75 if (ompt_enabled.ompt_callback_work) { 76 // Only fully initialize variables needed by OMPT if OMPT is enabled. 77 team_info = __ompt_get_teaminfo(0, NULL); 78 task_info = __ompt_get_task_info_object(0); 79 // Determine workshare type 80 if (loc != NULL) { 81 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) { 82 ompt_work_type = ompt_work_loop; 83 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) { 84 ompt_work_type = ompt_work_sections; 85 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) { 86 ompt_work_type = ompt_work_distribute; 87 } else { 88 kmp_int8 bool_res = 89 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1); 90 if (bool_res) 91 KMP_WARNING(OmptOutdatedWorkshare); 92 } 93 KMP_DEBUG_ASSERT(ompt_work_type); 94 } 95 } 96#endif 97 98 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); 99 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); 100#ifdef KMP_DEBUG 101 { 102 char *buff; 103 // create format specifiers before the debug output 104 buff = __kmp_str_format( 105 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," 106 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", 107 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 108 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec); 109 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, 110 *pstride, incr, chunk)); 111 __kmp_str_free(&buff); 112 } 113#endif 114 115 if (__kmp_env_consistency_check) { 116 __kmp_push_workshare(global_tid, ct_pdo, loc); 117 if (incr == 0) { 118 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 119 loc); 120 } 121 } 122 /* special handling for zero-trip loops */ 123 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 124 if (plastiter != NULL) 125 *plastiter = FALSE; 126 /* leave pupper and plower set to entire iteration space */ 127 *pstride = incr; /* value should never be used */ 128// *plower = *pupper - incr; 129// let compiler bypass the illegal loop (like for(i=1;i<10;i--)) 130// THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE 131// ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. 132#ifdef KMP_DEBUG 133 { 134 char *buff; 135 // create format specifiers before the debug output 136 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " 137 "lower=%%%s upper=%%%s stride = %%%s " 138 "signed?<%s>, loc = %%s\n", 139 traits_t<T>::spec, traits_t<T>::spec, 140 traits_t<ST>::spec, traits_t<T>::spec); 141 KD_TRACE(100, 142 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); 143 __kmp_str_free(&buff); 144 } 145#endif 146 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 147 148#if OMPT_SUPPORT && OMPT_OPTIONAL 149 if (ompt_enabled.ompt_callback_work) { 150 ompt_callbacks.ompt_callback(ompt_callback_work)( 151 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 152 &(task_info->task_data), 0, codeptr); 153 } 154#endif 155 return; 156 } 157 158#if OMP_40_ENABLED 159 // Although there are schedule enumerations above kmp_ord_upper which are not 160 // schedules for "distribute", the only ones which are useful are dynamic, so 161 // cannot be seen here, since this codepath is only executed for static 162 // schedules. 163 if (schedtype > kmp_ord_upper) { 164 // we are in DISTRIBUTE construct 165 schedtype += kmp_sch_static - 166 kmp_distribute_static; // AC: convert to usual schedule type 167 tid = th->th.th_team->t.t_master_tid; 168 team = th->th.th_team->t.t_parent; 169 } else 170#endif 171 { 172 tid = __kmp_tid_from_gtid(global_tid); 173 team = th->th.th_team; 174 } 175 176 /* determine if "for" loop is an active worksharing construct */ 177 if (team->t.t_serialized) { 178 /* serialized parallel, each thread executes whole iteration space */ 179 if (plastiter != NULL) 180 *plastiter = TRUE; 181 /* leave pupper and plower set to entire iteration space */ 182 *pstride = 183 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 184 185#ifdef KMP_DEBUG 186 { 187 char *buff; 188 // create format specifiers before the debug output 189 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 190 "lower=%%%s upper=%%%s stride = %%%s\n", 191 traits_t<T>::spec, traits_t<T>::spec, 192 traits_t<ST>::spec); 193 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 194 __kmp_str_free(&buff); 195 } 196#endif 197 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 198 199#if OMPT_SUPPORT && OMPT_OPTIONAL 200 if (ompt_enabled.ompt_callback_work) { 201 ompt_callbacks.ompt_callback(ompt_callback_work)( 202 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 203 &(task_info->task_data), *pstride, codeptr); 204 } 205#endif 206 return; 207 } 208 nth = team->t.t_nproc; 209 if (nth == 1) { 210 if (plastiter != NULL) 211 *plastiter = TRUE; 212 *pstride = 213 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); 214#ifdef KMP_DEBUG 215 { 216 char *buff; 217 // create format specifiers before the debug output 218 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " 219 "lower=%%%s upper=%%%s stride = %%%s\n", 220 traits_t<T>::spec, traits_t<T>::spec, 221 traits_t<ST>::spec); 222 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 223 __kmp_str_free(&buff); 224 } 225#endif 226 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 227 228#if OMPT_SUPPORT && OMPT_OPTIONAL 229 if (ompt_enabled.ompt_callback_work) { 230 ompt_callbacks.ompt_callback(ompt_callback_work)( 231 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 232 &(task_info->task_data), *pstride, codeptr); 233 } 234#endif 235 return; 236 } 237 238 /* compute trip count */ 239 if (incr == 1) { 240 trip_count = *pupper - *plower + 1; 241 } else if (incr == -1) { 242 trip_count = *plower - *pupper + 1; 243 } else if (incr > 0) { 244 // upper-lower can exceed the limit of signed type 245 trip_count = (UT)(*pupper - *plower) / incr + 1; 246 } else { 247 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 248 } 249 250 if (__kmp_env_consistency_check) { 251 /* tripcount overflow? */ 252 if (trip_count == 0 && *pupper != *plower) { 253 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, 254 loc); 255 } 256 } 257 258 /* compute remaining parameters */ 259 switch (schedtype) { 260 case kmp_sch_static: { 261 if (trip_count < nth) { 262 KMP_DEBUG_ASSERT( 263 __kmp_static == kmp_sch_static_greedy || 264 __kmp_static == 265 kmp_sch_static_balanced); // Unknown static scheduling type. 266 if (tid < trip_count) { 267 *pupper = *plower = *plower + tid * incr; 268 } else { 269 *plower = *pupper + incr; 270 } 271 if (plastiter != NULL) 272 *plastiter = (tid == trip_count - 1); 273 } else { 274 if (__kmp_static == kmp_sch_static_balanced) { 275 UT small_chunk = trip_count / nth; 276 UT extras = trip_count % nth; 277 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); 278 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); 279 if (plastiter != NULL) 280 *plastiter = (tid == nth - 1); 281 } else { 282 T big_chunk_inc_count = 283 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 284 T old_upper = *pupper; 285 286 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 287 // Unknown static scheduling type. 288 289 *plower += tid * big_chunk_inc_count; 290 *pupper = *plower + big_chunk_inc_count - incr; 291 if (incr > 0) { 292 if (*pupper < *plower) 293 *pupper = traits_t<T>::max_value; 294 if (plastiter != NULL) 295 *plastiter = *plower <= old_upper && *pupper > old_upper - incr; 296 if (*pupper > old_upper) 297 *pupper = old_upper; // tracker C73258 298 } else { 299 if (*pupper > *plower) 300 *pupper = traits_t<T>::min_value; 301 if (plastiter != NULL) 302 *plastiter = *plower >= old_upper && *pupper < old_upper - incr; 303 if (*pupper < old_upper) 304 *pupper = old_upper; // tracker C73258 305 } 306 } 307 } 308 *pstride = trip_count; 309 break; 310 } 311 case kmp_sch_static_chunked: { 312 ST span; 313 if (chunk < 1) { 314 chunk = 1; 315 } 316 span = chunk * incr; 317 *pstride = span * nth; 318 *plower = *plower + (span * tid); 319 *pupper = *plower + span - incr; 320 if (plastiter != NULL) 321 *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth); 322 break; 323 } 324#if OMP_45_ENABLED 325 case kmp_sch_static_balanced_chunked: { 326 T old_upper = *pupper; 327 // round up to make sure the chunk is enough to cover all iterations 328 UT span = (trip_count + nth - 1) / nth; 329 330 // perform chunk adjustment 331 chunk = (span + chunk - 1) & ~(chunk - 1); 332 333 span = chunk * incr; 334 *plower = *plower + (span * tid); 335 *pupper = *plower + span - incr; 336 if (incr > 0) { 337 if (*pupper > old_upper) 338 *pupper = old_upper; 339 } else if (*pupper < old_upper) 340 *pupper = old_upper; 341 342 if (plastiter != NULL) 343 *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); 344 break; 345 } 346#endif 347 default: 348 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); 349 break; 350 } 351 352#if USE_ITT_BUILD 353 // Report loop metadata 354 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && 355 __kmp_forkjoin_frames_mode == 3 && 356#if OMP_40_ENABLED 357 th->th.th_teams_microtask == NULL && 358#endif 359 team->t.t_active_level == 1) { 360 kmp_uint64 cur_chunk = chunk; 361 // Calculate chunk in case it was not specified; it is specified for 362 // kmp_sch_static_chunked 363 if (schedtype == kmp_sch_static) { 364 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); 365 } 366 // 0 - "static" schedule 367 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); 368 } 369#endif 370#ifdef KMP_DEBUG 371 { 372 char *buff; 373 // create format specifiers before the debug output 374 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " 375 "upper=%%%s stride = %%%s signed?<%s>\n", 376 traits_t<T>::spec, traits_t<T>::spec, 377 traits_t<ST>::spec, traits_t<T>::spec); 378 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); 379 __kmp_str_free(&buff); 380 } 381#endif 382 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); 383 384#if OMPT_SUPPORT && OMPT_OPTIONAL 385 if (ompt_enabled.ompt_callback_work) { 386 ompt_callbacks.ompt_callback(ompt_callback_work)( 387 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data), 388 &(task_info->task_data), trip_count, codeptr); 389 } 390#endif 391 392#if KMP_STATS_ENABLED 393 { 394 kmp_int64 t; 395 kmp_int64 u = (kmp_int64)(*pupper); 396 kmp_int64 l = (kmp_int64)(*plower); 397 kmp_int64 i = (kmp_int64)incr; 398 /* compute trip count */ 399 if (i == 1) { 400 t = u - l + 1; 401 } else if (i == -1) { 402 t = l - u + 1; 403 } else if (i > 0) { 404 t = (u - l) / i + 1; 405 } else { 406 t = (l - u) / (-i) + 1; 407 } 408 KMP_COUNT_VALUE(OMP_loop_static_iterations, t); 409 KMP_POP_PARTITIONED_TIMER(); 410 } 411#endif 412 return; 413} 414 415template <typename T> 416static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, 417 kmp_int32 schedule, kmp_int32 *plastiter, 418 T *plower, T *pupper, T *pupperDist, 419 typename traits_t<T>::signed_t *pstride, 420 typename traits_t<T>::signed_t incr, 421 typename traits_t<T>::signed_t chunk) { 422 KMP_COUNT_BLOCK(OMP_DISTRIBUTE); 423 typedef typename traits_t<T>::unsigned_t UT; 424 typedef typename traits_t<T>::signed_t ST; 425 kmp_uint32 tid; 426 kmp_uint32 nth; 427 kmp_uint32 team_id; 428 kmp_uint32 nteams; 429 UT trip_count; 430 kmp_team_t *team; 431 kmp_info_t *th; 432 433 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); 434 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); 435#ifdef KMP_DEBUG 436 { 437 char *buff; 438 // create format specifiers before the debug output 439 buff = __kmp_str_format( 440 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " 441 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", 442 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec, 443 traits_t<ST>::spec, traits_t<T>::spec); 444 KD_TRACE(100, 445 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); 446 __kmp_str_free(&buff); 447 } 448#endif 449 450 if (__kmp_env_consistency_check) { 451 __kmp_push_workshare(gtid, ct_pdo, loc); 452 if (incr == 0) { 453 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 454 loc); 455 } 456 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { 457 // The loop is illegal. 458 // Some zero-trip loops maintained by compiler, e.g.: 459 // for(i=10;i<0;++i) // lower >= upper - run-time check 460 // for(i=0;i>10;--i) // lower <= upper - run-time check 461 // for(i=0;i>10;++i) // incr > 0 - compile-time check 462 // for(i=10;i<0;--i) // incr < 0 - compile-time check 463 // Compiler does not check the following illegal loops: 464 // for(i=0;i<10;i+=incr) // where incr<0 465 // for(i=10;i>0;i-=incr) // where incr<0 466 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 467 } 468 } 469 tid = __kmp_tid_from_gtid(gtid); 470 th = __kmp_threads[gtid]; 471 nth = th->th.th_team_nproc; 472 team = th->th.th_team; 473#if OMP_40_ENABLED 474 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 475 nteams = th->th.th_teams_size.nteams; 476#endif 477 team_id = team->t.t_master_tid; 478 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 479 480 // compute global trip count 481 if (incr == 1) { 482 trip_count = *pupper - *plower + 1; 483 } else if (incr == -1) { 484 trip_count = *plower - *pupper + 1; 485 } else if (incr > 0) { 486 // upper-lower can exceed the limit of signed type 487 trip_count = (UT)(*pupper - *plower) / incr + 1; 488 } else { 489 trip_count = (UT)(*plower - *pupper) / (-incr) + 1; 490 } 491 492 *pstride = *pupper - *plower; // just in case (can be unused) 493 if (trip_count <= nteams) { 494 KMP_DEBUG_ASSERT( 495 __kmp_static == kmp_sch_static_greedy || 496 __kmp_static == 497 kmp_sch_static_balanced); // Unknown static scheduling type. 498 // only masters of some teams get single iteration, other threads get 499 // nothing 500 if (team_id < trip_count && tid == 0) { 501 *pupper = *pupperDist = *plower = *plower + team_id * incr; 502 } else { 503 *pupperDist = *pupper; 504 *plower = *pupper + incr; // compiler should skip loop body 505 } 506 if (plastiter != NULL) 507 *plastiter = (tid == 0 && team_id == trip_count - 1); 508 } else { 509 // Get the team's chunk first (each team gets at most one chunk) 510 if (__kmp_static == kmp_sch_static_balanced) { 511 UT chunkD = trip_count / nteams; 512 UT extras = trip_count % nteams; 513 *plower += 514 incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); 515 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); 516 if (plastiter != NULL) 517 *plastiter = (team_id == nteams - 1); 518 } else { 519 T chunk_inc_count = 520 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; 521 T upper = *pupper; 522 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 523 // Unknown static scheduling type. 524 *plower += team_id * chunk_inc_count; 525 *pupperDist = *plower + chunk_inc_count - incr; 526 // Check/correct bounds if needed 527 if (incr > 0) { 528 if (*pupperDist < *plower) 529 *pupperDist = traits_t<T>::max_value; 530 if (plastiter != NULL) 531 *plastiter = *plower <= upper && *pupperDist > upper - incr; 532 if (*pupperDist > upper) 533 *pupperDist = upper; // tracker C73258 534 if (*plower > *pupperDist) { 535 *pupper = *pupperDist; // no iterations available for the team 536 goto end; 537 } 538 } else { 539 if (*pupperDist > *plower) 540 *pupperDist = traits_t<T>::min_value; 541 if (plastiter != NULL) 542 *plastiter = *plower >= upper && *pupperDist < upper - incr; 543 if (*pupperDist < upper) 544 *pupperDist = upper; // tracker C73258 545 if (*plower < *pupperDist) { 546 *pupper = *pupperDist; // no iterations available for the team 547 goto end; 548 } 549 } 550 } 551 // Get the parallel loop chunk now (for thread) 552 // compute trip count for team's chunk 553 if (incr == 1) { 554 trip_count = *pupperDist - *plower + 1; 555 } else if (incr == -1) { 556 trip_count = *plower - *pupperDist + 1; 557 } else if (incr > 1) { 558 // upper-lower can exceed the limit of signed type 559 trip_count = (UT)(*pupperDist - *plower) / incr + 1; 560 } else { 561 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; 562 } 563 KMP_DEBUG_ASSERT(trip_count); 564 switch (schedule) { 565 case kmp_sch_static: { 566 if (trip_count <= nth) { 567 KMP_DEBUG_ASSERT( 568 __kmp_static == kmp_sch_static_greedy || 569 __kmp_static == 570 kmp_sch_static_balanced); // Unknown static scheduling type. 571 if (tid < trip_count) 572 *pupper = *plower = *plower + tid * incr; 573 else 574 *plower = *pupper + incr; // no iterations available 575 if (plastiter != NULL) 576 if (*plastiter != 0 && !(tid == trip_count - 1)) 577 *plastiter = 0; 578 } else { 579 if (__kmp_static == kmp_sch_static_balanced) { 580 UT chunkL = trip_count / nth; 581 UT extras = trip_count % nth; 582 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); 583 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); 584 if (plastiter != NULL) 585 if (*plastiter != 0 && !(tid == nth - 1)) 586 *plastiter = 0; 587 } else { 588 T chunk_inc_count = 589 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; 590 T upper = *pupperDist; 591 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); 592 // Unknown static scheduling type. 593 *plower += tid * chunk_inc_count; 594 *pupper = *plower + chunk_inc_count - incr; 595 if (incr > 0) { 596 if (*pupper < *plower) 597 *pupper = traits_t<T>::max_value; 598 if (plastiter != NULL) 599 if (*plastiter != 0 && 600 !(*plower <= upper && *pupper > upper - incr)) 601 *plastiter = 0; 602 if (*pupper > upper) 603 *pupper = upper; // tracker C73258 604 } else { 605 if (*pupper > *plower) 606 *pupper = traits_t<T>::min_value; 607 if (plastiter != NULL) 608 if (*plastiter != 0 && 609 !(*plower >= upper && *pupper < upper - incr)) 610 *plastiter = 0; 611 if (*pupper < upper) 612 *pupper = upper; // tracker C73258 613 } 614 } 615 } 616 break; 617 } 618 case kmp_sch_static_chunked: { 619 ST span; 620 if (chunk < 1) 621 chunk = 1; 622 span = chunk * incr; 623 *pstride = span * nth; 624 *plower = *plower + (span * tid); 625 *pupper = *plower + span - incr; 626 if (plastiter != NULL) 627 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) 628 *plastiter = 0; 629 break; 630 } 631 default: 632 KMP_ASSERT2(0, 633 "__kmpc_dist_for_static_init: unknown loop scheduling type"); 634 break; 635 } 636 } 637end:; 638#ifdef KMP_DEBUG 639 { 640 char *buff; 641 // create format specifiers before the debug output 642 buff = __kmp_str_format( 643 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " 644 "stride=%%%s signed?<%s>\n", 645 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec, 646 traits_t<ST>::spec, traits_t<T>::spec); 647 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); 648 __kmp_str_free(&buff); 649 } 650#endif 651 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); 652 return; 653} 654 655template <typename T> 656static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, 657 kmp_int32 *p_last, T *p_lb, T *p_ub, 658 typename traits_t<T>::signed_t *p_st, 659 typename traits_t<T>::signed_t incr, 660 typename traits_t<T>::signed_t chunk) { 661 // The routine returns the first chunk distributed to the team and 662 // stride for next chunks calculation. 663 // Last iteration flag set for the team that will execute 664 // the last iteration of the loop. 665 // The routine is called for dist_schedue(static,chunk) only. 666 typedef typename traits_t<T>::unsigned_t UT; 667 typedef typename traits_t<T>::signed_t ST; 668 kmp_uint32 team_id; 669 kmp_uint32 nteams; 670 UT trip_count; 671 T lower; 672 T upper; 673 ST span; 674 kmp_team_t *team; 675 kmp_info_t *th; 676 677 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); 678 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); 679#ifdef KMP_DEBUG 680 { 681 char *buff; 682 // create format specifiers before the debug output 683 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " 684 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", 685 traits_t<T>::spec, traits_t<T>::spec, 686 traits_t<ST>::spec, traits_t<ST>::spec, 687 traits_t<T>::spec); 688 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); 689 __kmp_str_free(&buff); 690 } 691#endif 692 693 lower = *p_lb; 694 upper = *p_ub; 695 if (__kmp_env_consistency_check) { 696 if (incr == 0) { 697 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, 698 loc); 699 } 700 if (incr > 0 ? (upper < lower) : (lower < upper)) { 701 // The loop is illegal. 702 // Some zero-trip loops maintained by compiler, e.g.: 703 // for(i=10;i<0;++i) // lower >= upper - run-time check 704 // for(i=0;i>10;--i) // lower <= upper - run-time check 705 // for(i=0;i>10;++i) // incr > 0 - compile-time check 706 // for(i=10;i<0;--i) // incr < 0 - compile-time check 707 // Compiler does not check the following illegal loops: 708 // for(i=0;i<10;i+=incr) // where incr<0 709 // for(i=10;i>0;i-=incr) // where incr<0 710 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); 711 } 712 } 713 th = __kmp_threads[gtid]; 714 team = th->th.th_team; 715#if OMP_40_ENABLED 716 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct 717 nteams = th->th.th_teams_size.nteams; 718#endif 719 team_id = team->t.t_master_tid; 720 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc); 721 722 // compute trip count 723 if (incr == 1) { 724 trip_count = upper - lower + 1; 725 } else if (incr == -1) { 726 trip_count = lower - upper + 1; 727 } else if (incr > 0) { 728 // upper-lower can exceed the limit of signed type 729 trip_count = (UT)(upper - lower) / incr + 1; 730 } else { 731 trip_count = (UT)(lower - upper) / (-incr) + 1; 732 } 733 if (chunk < 1) 734 chunk = 1; 735 span = chunk * incr; 736 *p_st = span * nteams; 737 *p_lb = lower + (span * team_id); 738 *p_ub = *p_lb + span - incr; 739 if (p_last != NULL) 740 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); 741 // Correct upper bound if needed 742 if (incr > 0) { 743 if (*p_ub < *p_lb) // overflow? 744 *p_ub = traits_t<T>::max_value; 745 if (*p_ub > upper) 746 *p_ub = upper; // tracker C73258 747 } else { // incr < 0 748 if (*p_ub > *p_lb) 749 *p_ub = traits_t<T>::min_value; 750 if (*p_ub < upper) 751 *p_ub = upper; // tracker C73258 752 } 753#ifdef KMP_DEBUG 754 { 755 char *buff; 756 // create format specifiers before the debug output 757 buff = 758 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " 759 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", 760 traits_t<T>::spec, traits_t<T>::spec, 761 traits_t<ST>::spec, traits_t<ST>::spec); 762 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); 763 __kmp_str_free(&buff); 764 } 765#endif 766} 767 768//------------------------------------------------------------------------------ 769extern "C" { 770/*! 771@ingroup WORK_SHARING 772@param loc Source code location 773@param gtid Global thread id of this thread 774@param schedtype Scheduling type 775@param plastiter Pointer to the "last iteration" flag 776@param plower Pointer to the lower bound 777@param pupper Pointer to the upper bound 778@param pstride Pointer to the stride 779@param incr Loop increment 780@param chunk The chunk size 781 782Each of the four functions here are identical apart from the argument types. 783 784The functions compute the upper and lower bounds and stride to be used for the 785set of iterations to be executed by the current thread from the statically 786scheduled loop that is described by the initial values of the bounds, stride, 787increment and chunk size. 788 789@{ 790*/ 791void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 792 kmp_int32 *plastiter, kmp_int32 *plower, 793 kmp_int32 *pupper, kmp_int32 *pstride, 794 kmp_int32 incr, kmp_int32 chunk) { 795 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower, 796 pupper, pstride, incr, chunk 797#if OMPT_SUPPORT && OMPT_OPTIONAL 798 , 799 OMPT_GET_RETURN_ADDRESS(0) 800#endif 801 ); 802} 803 804/*! 805 See @ref __kmpc_for_static_init_4 806 */ 807void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 808 kmp_int32 schedtype, kmp_int32 *plastiter, 809 kmp_uint32 *plower, kmp_uint32 *pupper, 810 kmp_int32 *pstride, kmp_int32 incr, 811 kmp_int32 chunk) { 812 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower, 813 pupper, pstride, incr, chunk 814#if OMPT_SUPPORT && OMPT_OPTIONAL 815 , 816 OMPT_GET_RETURN_ADDRESS(0) 817#endif 818 ); 819} 820 821/*! 822 See @ref __kmpc_for_static_init_4 823 */ 824void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, 825 kmp_int32 *plastiter, kmp_int64 *plower, 826 kmp_int64 *pupper, kmp_int64 *pstride, 827 kmp_int64 incr, kmp_int64 chunk) { 828 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower, 829 pupper, pstride, incr, chunk 830#if OMPT_SUPPORT && OMPT_OPTIONAL 831 , 832 OMPT_GET_RETURN_ADDRESS(0) 833#endif 834 ); 835} 836 837/*! 838 See @ref __kmpc_for_static_init_4 839 */ 840void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 841 kmp_int32 schedtype, kmp_int32 *plastiter, 842 kmp_uint64 *plower, kmp_uint64 *pupper, 843 kmp_int64 *pstride, kmp_int64 incr, 844 kmp_int64 chunk) { 845 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower, 846 pupper, pstride, incr, chunk 847#if OMPT_SUPPORT && OMPT_OPTIONAL 848 , 849 OMPT_GET_RETURN_ADDRESS(0) 850#endif 851 ); 852} 853/*! 854@} 855*/ 856 857/*! 858@ingroup WORK_SHARING 859@param loc Source code location 860@param gtid Global thread id of this thread 861@param schedule Scheduling type for the parallel loop 862@param plastiter Pointer to the "last iteration" flag 863@param plower Pointer to the lower bound 864@param pupper Pointer to the upper bound of loop chunk 865@param pupperD Pointer to the upper bound of dist_chunk 866@param pstride Pointer to the stride for parallel loop 867@param incr Loop increment 868@param chunk The chunk size for the parallel loop 869 870Each of the four functions here are identical apart from the argument types. 871 872The functions compute the upper and lower bounds and strides to be used for the 873set of iterations to be executed by the current thread from the statically 874scheduled loop that is described by the initial values of the bounds, strides, 875increment and chunks for parallel loop and distribute constructs. 876 877@{ 878*/ 879void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, 880 kmp_int32 schedule, kmp_int32 *plastiter, 881 kmp_int32 *plower, kmp_int32 *pupper, 882 kmp_int32 *pupperD, kmp_int32 *pstride, 883 kmp_int32 incr, kmp_int32 chunk) { 884 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower, 885 pupper, pupperD, pstride, incr, chunk); 886} 887 888/*! 889 See @ref __kmpc_dist_for_static_init_4 890 */ 891void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, 892 kmp_int32 schedule, kmp_int32 *plastiter, 893 kmp_uint32 *plower, kmp_uint32 *pupper, 894 kmp_uint32 *pupperD, kmp_int32 *pstride, 895 kmp_int32 incr, kmp_int32 chunk) { 896 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower, 897 pupper, pupperD, pstride, incr, chunk); 898} 899 900/*! 901 See @ref __kmpc_dist_for_static_init_4 902 */ 903void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, 904 kmp_int32 schedule, kmp_int32 *plastiter, 905 kmp_int64 *plower, kmp_int64 *pupper, 906 kmp_int64 *pupperD, kmp_int64 *pstride, 907 kmp_int64 incr, kmp_int64 chunk) { 908 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower, 909 pupper, pupperD, pstride, incr, chunk); 910} 911 912/*! 913 See @ref __kmpc_dist_for_static_init_4 914 */ 915void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, 916 kmp_int32 schedule, kmp_int32 *plastiter, 917 kmp_uint64 *plower, kmp_uint64 *pupper, 918 kmp_uint64 *pupperD, kmp_int64 *pstride, 919 kmp_int64 incr, kmp_int64 chunk) { 920 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower, 921 pupper, pupperD, pstride, incr, chunk); 922} 923/*! 924@} 925*/ 926 927//------------------------------------------------------------------------------ 928// Auxiliary routines for Distribute Parallel Loop construct implementation 929// Transfer call to template< type T > 930// __kmp_team_static_init( ident_t *loc, int gtid, 931// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk ) 932 933/*! 934@ingroup WORK_SHARING 935@{ 936@param loc Source location 937@param gtid Global thread id 938@param p_last pointer to last iteration flag 939@param p_lb pointer to Lower bound 940@param p_ub pointer to Upper bound 941@param p_st Step (or increment if you prefer) 942@param incr Loop increment 943@param chunk The chunk size to block with 944 945The functions compute the upper and lower bounds and stride to be used for the 946set of iterations to be executed by the current team from the statically 947scheduled loop that is described by the initial values of the bounds, stride, 948increment and chunk for the distribute construct as part of composite distribute 949parallel loop construct. These functions are all identical apart from the types 950of the arguments. 951*/ 952 953void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 954 kmp_int32 *p_lb, kmp_int32 *p_ub, 955 kmp_int32 *p_st, kmp_int32 incr, 956 kmp_int32 chunk) { 957 KMP_DEBUG_ASSERT(__kmp_init_serial); 958 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 959 chunk); 960} 961 962/*! 963 See @ref __kmpc_team_static_init_4 964 */ 965void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 966 kmp_uint32 *p_lb, kmp_uint32 *p_ub, 967 kmp_int32 *p_st, kmp_int32 incr, 968 kmp_int32 chunk) { 969 KMP_DEBUG_ASSERT(__kmp_init_serial); 970 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 971 chunk); 972} 973 974/*! 975 See @ref __kmpc_team_static_init_4 976 */ 977void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 978 kmp_int64 *p_lb, kmp_int64 *p_ub, 979 kmp_int64 *p_st, kmp_int64 incr, 980 kmp_int64 chunk) { 981 KMP_DEBUG_ASSERT(__kmp_init_serial); 982 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 983 chunk); 984} 985 986/*! 987 See @ref __kmpc_team_static_init_4 988 */ 989void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, 990 kmp_uint64 *p_lb, kmp_uint64 *p_ub, 991 kmp_int64 *p_st, kmp_int64 incr, 992 kmp_int64 chunk) { 993 KMP_DEBUG_ASSERT(__kmp_init_serial); 994 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr, 995 chunk); 996} 997/*! 998@} 999*/ 1000 1001} // extern "C" 1002