1/* Copyright (C) 2005-2020 Free Software Foundation, Inc. 2 Contributed by Richard Henderson <rth@redhat.com>. 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26/* This file handles the LOOP (FOR/DO) construct. */ 27 28#include <limits.h> 29#include <stdlib.h> 30#include <string.h> 31#include "libgomp.h" 32 33ialias (GOMP_loop_ull_runtime_next) 34ialias_redirect (GOMP_taskgroup_reduction_register) 35 36typedef unsigned long long gomp_ull; 37 38/* Initialize the given work share construct from the given arguments. */ 39 40static inline void 41gomp_loop_ull_init (struct gomp_work_share *ws, bool up, gomp_ull start, 42 gomp_ull end, gomp_ull incr, enum gomp_schedule_type sched, 43 gomp_ull chunk_size) 44{ 45 ws->sched = sched; 46 ws->chunk_size_ull = chunk_size; 47 /* Canonicalize loops that have zero iterations to ->next == ->end. */ 48 ws->end_ull = ((up && start > end) || (!up && start < end)) 49 ? start : end; 50 ws->incr_ull = incr; 51 ws->next_ull = start; 52 ws->mode = 0; 53 if (sched == GFS_DYNAMIC) 54 { 55 ws->chunk_size_ull *= incr; 56 57#if defined HAVE_SYNC_BUILTINS && defined __LP64__ 58 { 59 /* For dynamic scheduling prepare things to make each iteration 60 faster. */ 61 struct gomp_thread *thr = gomp_thread (); 62 struct gomp_team *team = thr->ts.team; 63 long nthreads = team ? team->nthreads : 1; 64 65 if (__builtin_expect (up, 1)) 66 { 67 /* Cheap overflow protection. */ 68 if (__builtin_expect ((nthreads | ws->chunk_size_ull) 69 < 1ULL << (sizeof (gomp_ull) 70 * __CHAR_BIT__ / 2 - 1), 1)) 71 ws->mode = ws->end_ull < (__LONG_LONG_MAX__ * 2ULL + 1 72 - (nthreads + 1) * ws->chunk_size_ull); 73 } 74 /* Cheap overflow protection. */ 75 else if (__builtin_expect ((nthreads | -ws->chunk_size_ull) 76 < 1ULL << (sizeof (gomp_ull) 77 * __CHAR_BIT__ / 2 - 1), 1)) 78 ws->mode = ws->end_ull > ((nthreads + 1) * -ws->chunk_size_ull 79 - (__LONG_LONG_MAX__ * 2ULL + 1)); 80 } 81#endif 82 } 83 if (!up) 84 ws->mode |= 2; 85} 86 87/* The *_start routines are called when first encountering a loop construct 88 that is not bound directly to a parallel construct. The first thread 89 that arrives will create the work-share construct; subsequent threads 90 will see the construct exists and allocate work from it. 91 92 START, END, INCR are the bounds of the loop; due to the restrictions of 93 OpenMP, these values must be the same in every thread. This is not 94 verified (nor is it entirely verifiable, since START is not necessarily 95 retained intact in the work-share data structure). CHUNK_SIZE is the 96 scheduling parameter; again this must be identical in all threads. 97 98 Returns true if there's any work for this thread to perform. If so, 99 *ISTART and *IEND are filled with the bounds of the iteration block 100 allocated to this thread. Returns false if all work was assigned to 101 other threads prior to this thread's arrival. */ 102 103static bool 104gomp_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end, 105 gomp_ull incr, gomp_ull chunk_size, 106 gomp_ull *istart, gomp_ull *iend) 107{ 108 struct gomp_thread *thr = gomp_thread (); 109 110 thr->ts.static_trip = 0; 111 if (gomp_work_share_start (0)) 112 { 113 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, 114 GFS_STATIC, chunk_size); 115 gomp_work_share_init_done (); 116 } 117 118 return !gomp_iter_ull_static_next (istart, iend); 119} 120 121static bool 122gomp_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end, 123 gomp_ull incr, gomp_ull chunk_size, 124 gomp_ull *istart, gomp_ull *iend) 125{ 126 struct gomp_thread *thr = gomp_thread (); 127 bool ret; 128 129 if (gomp_work_share_start (0)) 130 { 131 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, 132 GFS_DYNAMIC, chunk_size); 133 gomp_work_share_init_done (); 134 } 135 136#if defined HAVE_SYNC_BUILTINS && defined __LP64__ 137 ret = gomp_iter_ull_dynamic_next (istart, iend); 138#else 139 gomp_mutex_lock (&thr->ts.work_share->lock); 140 ret = gomp_iter_ull_dynamic_next_locked (istart, iend); 141 gomp_mutex_unlock (&thr->ts.work_share->lock); 142#endif 143 144 return ret; 145} 146 147static bool 148gomp_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end, 149 gomp_ull incr, gomp_ull chunk_size, 150 gomp_ull *istart, gomp_ull *iend) 151{ 152 struct gomp_thread *thr = gomp_thread (); 153 bool ret; 154 155 if (gomp_work_share_start (0)) 156 { 157 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, 158 GFS_GUIDED, chunk_size); 159 gomp_work_share_init_done (); 160 } 161 162#if defined HAVE_SYNC_BUILTINS && defined __LP64__ 163 ret = gomp_iter_ull_guided_next (istart, iend); 164#else 165 gomp_mutex_lock (&thr->ts.work_share->lock); 166 ret = gomp_iter_ull_guided_next_locked (istart, iend); 167 gomp_mutex_unlock (&thr->ts.work_share->lock); 168#endif 169 170 return ret; 171} 172 173bool 174GOMP_loop_ull_runtime_start (bool up, gomp_ull start, gomp_ull end, 175 gomp_ull incr, gomp_ull *istart, gomp_ull *iend) 176{ 177 struct gomp_task_icv *icv = gomp_icv (false); 178 switch (icv->run_sched_var & ~GFS_MONOTONIC) 179 { 180 case GFS_STATIC: 181 return gomp_loop_ull_static_start (up, start, end, incr, 182 icv->run_sched_chunk_size, 183 istart, iend); 184 case GFS_DYNAMIC: 185 return gomp_loop_ull_dynamic_start (up, start, end, incr, 186 icv->run_sched_chunk_size, 187 istart, iend); 188 case GFS_GUIDED: 189 return gomp_loop_ull_guided_start (up, start, end, incr, 190 icv->run_sched_chunk_size, 191 istart, iend); 192 case GFS_AUTO: 193 /* For now map to schedule(static), later on we could play with feedback 194 driven choice. */ 195 return gomp_loop_ull_static_start (up, start, end, incr, 196 0, istart, iend); 197 default: 198 abort (); 199 } 200} 201 202static long 203gomp_adjust_sched (long sched, gomp_ull *chunk_size) 204{ 205 sched &= ~GFS_MONOTONIC; 206 switch (sched) 207 { 208 case GFS_STATIC: 209 case GFS_DYNAMIC: 210 case GFS_GUIDED: 211 return sched; 212 /* GFS_RUNTIME is used for runtime schedule without monotonic 213 or nonmonotonic modifiers on the clause. 214 GFS_RUNTIME|GFS_MONOTONIC for runtime schedule with monotonic 215 modifier. */ 216 case GFS_RUNTIME: 217 /* GFS_AUTO is used for runtime schedule with nonmonotonic 218 modifier. */ 219 case GFS_AUTO: 220 { 221 struct gomp_task_icv *icv = gomp_icv (false); 222 sched = icv->run_sched_var & ~GFS_MONOTONIC; 223 switch (sched) 224 { 225 case GFS_STATIC: 226 case GFS_DYNAMIC: 227 case GFS_GUIDED: 228 *chunk_size = icv->run_sched_chunk_size; 229 break; 230 case GFS_AUTO: 231 sched = GFS_STATIC; 232 *chunk_size = 0; 233 break; 234 default: 235 abort (); 236 } 237 return sched; 238 } 239 default: 240 abort (); 241 } 242} 243 244bool 245GOMP_loop_ull_start (bool up, gomp_ull start, gomp_ull end, 246 gomp_ull incr, long sched, gomp_ull chunk_size, 247 gomp_ull *istart, gomp_ull *iend, 248 uintptr_t *reductions, void **mem) 249{ 250 struct gomp_thread *thr = gomp_thread (); 251 252 thr->ts.static_trip = 0; 253 if (reductions) 254 gomp_workshare_taskgroup_start (); 255 if (gomp_work_share_start (0)) 256 { 257 sched = gomp_adjust_sched (sched, &chunk_size); 258 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, 259 sched, chunk_size); 260 if (reductions) 261 { 262 GOMP_taskgroup_reduction_register (reductions); 263 thr->task->taskgroup->workshare = true; 264 thr->ts.work_share->task_reductions = reductions; 265 } 266 if (mem) 267 { 268 uintptr_t size = (uintptr_t) *mem; 269#define INLINE_ORDERED_TEAM_IDS_OFF \ 270 ((offsetof (struct gomp_work_share, inline_ordered_team_ids) \ 271 + __alignof__ (long long) - 1) & ~(__alignof__ (long long) - 1)) 272 if (size > (sizeof (struct gomp_work_share) 273 - INLINE_ORDERED_TEAM_IDS_OFF)) 274 *mem 275 = (void *) (thr->ts.work_share->ordered_team_ids 276 = gomp_malloc_cleared (size)); 277 else 278 *mem = memset (((char *) thr->ts.work_share) 279 + INLINE_ORDERED_TEAM_IDS_OFF, '\0', size); 280 } 281 gomp_work_share_init_done (); 282 } 283 else 284 { 285 if (reductions) 286 { 287 uintptr_t *first_reductions = thr->ts.work_share->task_reductions; 288 gomp_workshare_task_reduction_register (reductions, 289 first_reductions); 290 } 291 if (mem) 292 { 293 if ((offsetof (struct gomp_work_share, inline_ordered_team_ids) 294 & (__alignof__ (long long) - 1)) == 0) 295 *mem = (void *) thr->ts.work_share->ordered_team_ids; 296 else 297 { 298 uintptr_t p = (uintptr_t) thr->ts.work_share->ordered_team_ids; 299 p += __alignof__ (long long) - 1; 300 p &= ~(__alignof__ (long long) - 1); 301 *mem = (void *) p; 302 } 303 } 304 } 305 306 return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend); 307} 308 309/* The *_ordered_*_start routines are similar. The only difference is that 310 this work-share construct is initialized to expect an ORDERED section. */ 311 312static bool 313gomp_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, 314 gomp_ull incr, gomp_ull chunk_size, 315 gomp_ull *istart, gomp_ull *iend) 316{ 317 struct gomp_thread *thr = gomp_thread (); 318 319 thr->ts.static_trip = 0; 320 if (gomp_work_share_start (1)) 321 { 322 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, 323 GFS_STATIC, chunk_size); 324 gomp_ordered_static_init (); 325 gomp_work_share_init_done (); 326 } 327 328 return !gomp_iter_ull_static_next (istart, iend); 329} 330 331static bool 332gomp_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end, 333 gomp_ull incr, gomp_ull chunk_size, 334 gomp_ull *istart, gomp_ull *iend) 335{ 336 struct gomp_thread *thr = gomp_thread (); 337 bool ret; 338 339 if (gomp_work_share_start (1)) 340 { 341 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, 342 GFS_DYNAMIC, chunk_size); 343 gomp_mutex_lock (&thr->ts.work_share->lock); 344 gomp_work_share_init_done (); 345 } 346 else 347 gomp_mutex_lock (&thr->ts.work_share->lock); 348 349 ret = gomp_iter_ull_dynamic_next_locked (istart, iend); 350 if (ret) 351 gomp_ordered_first (); 352 gomp_mutex_unlock (&thr->ts.work_share->lock); 353 354 return ret; 355} 356 357static bool 358gomp_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end, 359 gomp_ull incr, gomp_ull chunk_size, 360 gomp_ull *istart, gomp_ull *iend) 361{ 362 struct gomp_thread *thr = gomp_thread (); 363 bool ret; 364 365 if (gomp_work_share_start (1)) 366 { 367 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, 368 GFS_GUIDED, chunk_size); 369 gomp_mutex_lock (&thr->ts.work_share->lock); 370 gomp_work_share_init_done (); 371 } 372 else 373 gomp_mutex_lock (&thr->ts.work_share->lock); 374 375 ret = gomp_iter_ull_guided_next_locked (istart, iend); 376 if (ret) 377 gomp_ordered_first (); 378 gomp_mutex_unlock (&thr->ts.work_share->lock); 379 380 return ret; 381} 382 383bool 384GOMP_loop_ull_ordered_runtime_start (bool up, gomp_ull start, gomp_ull end, 385 gomp_ull incr, gomp_ull *istart, 386 gomp_ull *iend) 387{ 388 struct gomp_task_icv *icv = gomp_icv (false); 389 switch (icv->run_sched_var & ~GFS_MONOTONIC) 390 { 391 case GFS_STATIC: 392 return gomp_loop_ull_ordered_static_start (up, start, end, incr, 393 icv->run_sched_chunk_size, 394 istart, iend); 395 case GFS_DYNAMIC: 396 return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, 397 icv->run_sched_chunk_size, 398 istart, iend); 399 case GFS_GUIDED: 400 return gomp_loop_ull_ordered_guided_start (up, start, end, incr, 401 icv->run_sched_chunk_size, 402 istart, iend); 403 case GFS_AUTO: 404 /* For now map to schedule(static), later on we could play with feedback 405 driven choice. */ 406 return gomp_loop_ull_ordered_static_start (up, start, end, incr, 407 0, istart, iend); 408 default: 409 abort (); 410 } 411} 412 413bool 414GOMP_loop_ull_ordered_start (bool up, gomp_ull start, gomp_ull end, 415 gomp_ull incr, long sched, gomp_ull chunk_size, 416 gomp_ull *istart, gomp_ull *iend, 417 uintptr_t *reductions, void **mem) 418{ 419 struct gomp_thread *thr = gomp_thread (); 420 size_t ordered = 1; 421 bool ret; 422 423 thr->ts.static_trip = 0; 424 if (reductions) 425 gomp_workshare_taskgroup_start (); 426 if (mem) 427 ordered += (uintptr_t) *mem; 428 if (gomp_work_share_start (ordered)) 429 { 430 sched = gomp_adjust_sched (sched, &chunk_size); 431 gomp_loop_ull_init (thr->ts.work_share, up, start, end, incr, 432 sched, chunk_size); 433 if (reductions) 434 { 435 GOMP_taskgroup_reduction_register (reductions); 436 thr->task->taskgroup->workshare = true; 437 thr->ts.work_share->task_reductions = reductions; 438 } 439 if (sched == GFS_STATIC) 440 gomp_ordered_static_init (); 441 else 442 gomp_mutex_lock (&thr->ts.work_share->lock); 443 gomp_work_share_init_done (); 444 } 445 else 446 { 447 if (reductions) 448 { 449 uintptr_t *first_reductions = thr->ts.work_share->task_reductions; 450 gomp_workshare_task_reduction_register (reductions, 451 first_reductions); 452 } 453 sched = thr->ts.work_share->sched; 454 if (sched != GFS_STATIC) 455 gomp_mutex_lock (&thr->ts.work_share->lock); 456 } 457 458 if (mem) 459 { 460 uintptr_t p 461 = (uintptr_t) (thr->ts.work_share->ordered_team_ids 462 + (thr->ts.team ? thr->ts.team->nthreads : 1)); 463 p += __alignof__ (long long) - 1; 464 p &= ~(__alignof__ (long long) - 1); 465 *mem = (void *) p; 466 } 467 468 switch (sched) 469 { 470 case GFS_STATIC: 471 case GFS_AUTO: 472 return !gomp_iter_ull_static_next (istart, iend); 473 case GFS_DYNAMIC: 474 ret = gomp_iter_ull_dynamic_next_locked (istart, iend); 475 break; 476 case GFS_GUIDED: 477 ret = gomp_iter_ull_guided_next_locked (istart, iend); 478 break; 479 default: 480 abort (); 481 } 482 483 if (ret) 484 gomp_ordered_first (); 485 gomp_mutex_unlock (&thr->ts.work_share->lock); 486 return ret; 487} 488 489/* The *_doacross_*_start routines are similar. The only difference is that 490 this work-share construct is initialized to expect an ORDERED(N) - DOACROSS 491 section, and the worksharing loop iterates always from 0 to COUNTS[0] - 1 492 and other COUNTS array elements tell the library number of iterations 493 in the ordered inner loops. */ 494 495static bool 496gomp_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, 497 gomp_ull chunk_size, gomp_ull *istart, 498 gomp_ull *iend) 499{ 500 struct gomp_thread *thr = gomp_thread (); 501 502 thr->ts.static_trip = 0; 503 if (gomp_work_share_start (0)) 504 { 505 gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, 506 GFS_STATIC, chunk_size); 507 gomp_doacross_ull_init (ncounts, counts, chunk_size, 0); 508 gomp_work_share_init_done (); 509 } 510 511 return !gomp_iter_ull_static_next (istart, iend); 512} 513 514static bool 515gomp_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, 516 gomp_ull chunk_size, gomp_ull *istart, 517 gomp_ull *iend) 518{ 519 struct gomp_thread *thr = gomp_thread (); 520 bool ret; 521 522 if (gomp_work_share_start (0)) 523 { 524 gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, 525 GFS_DYNAMIC, chunk_size); 526 gomp_doacross_ull_init (ncounts, counts, chunk_size, 0); 527 gomp_work_share_init_done (); 528 } 529 530#if defined HAVE_SYNC_BUILTINS && defined __LP64__ 531 ret = gomp_iter_ull_dynamic_next (istart, iend); 532#else 533 gomp_mutex_lock (&thr->ts.work_share->lock); 534 ret = gomp_iter_ull_dynamic_next_locked (istart, iend); 535 gomp_mutex_unlock (&thr->ts.work_share->lock); 536#endif 537 538 return ret; 539} 540 541static bool 542gomp_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, 543 gomp_ull chunk_size, gomp_ull *istart, 544 gomp_ull *iend) 545{ 546 struct gomp_thread *thr = gomp_thread (); 547 bool ret; 548 549 if (gomp_work_share_start (0)) 550 { 551 gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, 552 GFS_GUIDED, chunk_size); 553 gomp_doacross_ull_init (ncounts, counts, chunk_size, 0); 554 gomp_work_share_init_done (); 555 } 556 557#if defined HAVE_SYNC_BUILTINS && defined __LP64__ 558 ret = gomp_iter_ull_guided_next (istart, iend); 559#else 560 gomp_mutex_lock (&thr->ts.work_share->lock); 561 ret = gomp_iter_ull_guided_next_locked (istart, iend); 562 gomp_mutex_unlock (&thr->ts.work_share->lock); 563#endif 564 565 return ret; 566} 567 568bool 569GOMP_loop_ull_doacross_runtime_start (unsigned ncounts, gomp_ull *counts, 570 gomp_ull *istart, gomp_ull *iend) 571{ 572 struct gomp_task_icv *icv = gomp_icv (false); 573 switch (icv->run_sched_var & ~GFS_MONOTONIC) 574 { 575 case GFS_STATIC: 576 return gomp_loop_ull_doacross_static_start (ncounts, counts, 577 icv->run_sched_chunk_size, 578 istart, iend); 579 case GFS_DYNAMIC: 580 return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, 581 icv->run_sched_chunk_size, 582 istart, iend); 583 case GFS_GUIDED: 584 return gomp_loop_ull_doacross_guided_start (ncounts, counts, 585 icv->run_sched_chunk_size, 586 istart, iend); 587 case GFS_AUTO: 588 /* For now map to schedule(static), later on we could play with feedback 589 driven choice. */ 590 return gomp_loop_ull_doacross_static_start (ncounts, counts, 591 0, istart, iend); 592 default: 593 abort (); 594 } 595} 596 597bool 598GOMP_loop_ull_doacross_start (unsigned ncounts, gomp_ull *counts, 599 long sched, gomp_ull chunk_size, 600 gomp_ull *istart, gomp_ull *iend, 601 uintptr_t *reductions, void **mem) 602{ 603 struct gomp_thread *thr = gomp_thread (); 604 605 thr->ts.static_trip = 0; 606 if (reductions) 607 gomp_workshare_taskgroup_start (); 608 if (gomp_work_share_start (0)) 609 { 610 size_t extra = 0; 611 if (mem) 612 extra = (uintptr_t) *mem; 613 sched = gomp_adjust_sched (sched, &chunk_size); 614 gomp_loop_ull_init (thr->ts.work_share, true, 0, counts[0], 1, 615 sched, chunk_size); 616 gomp_doacross_ull_init (ncounts, counts, chunk_size, extra); 617 if (reductions) 618 { 619 GOMP_taskgroup_reduction_register (reductions); 620 thr->task->taskgroup->workshare = true; 621 thr->ts.work_share->task_reductions = reductions; 622 } 623 gomp_work_share_init_done (); 624 } 625 else 626 { 627 if (reductions) 628 { 629 uintptr_t *first_reductions = thr->ts.work_share->task_reductions; 630 gomp_workshare_task_reduction_register (reductions, 631 first_reductions); 632 } 633 sched = thr->ts.work_share->sched; 634 } 635 636 if (mem) 637 *mem = thr->ts.work_share->doacross->extra; 638 639 return ialias_call (GOMP_loop_ull_runtime_next) (istart, iend); 640} 641 642/* The *_next routines are called when the thread completes processing of 643 the iteration block currently assigned to it. If the work-share 644 construct is bound directly to a parallel construct, then the iteration 645 bounds may have been set up before the parallel. In which case, this 646 may be the first iteration for the thread. 647 648 Returns true if there is work remaining to be performed; *ISTART and 649 *IEND are filled with a new iteration block. Returns false if all work 650 has been assigned. */ 651 652static bool 653gomp_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend) 654{ 655 return !gomp_iter_ull_static_next (istart, iend); 656} 657 658static bool 659gomp_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend) 660{ 661 bool ret; 662 663#if defined HAVE_SYNC_BUILTINS && defined __LP64__ 664 ret = gomp_iter_ull_dynamic_next (istart, iend); 665#else 666 struct gomp_thread *thr = gomp_thread (); 667 gomp_mutex_lock (&thr->ts.work_share->lock); 668 ret = gomp_iter_ull_dynamic_next_locked (istart, iend); 669 gomp_mutex_unlock (&thr->ts.work_share->lock); 670#endif 671 672 return ret; 673} 674 675static bool 676gomp_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend) 677{ 678 bool ret; 679 680#if defined HAVE_SYNC_BUILTINS && defined __LP64__ 681 ret = gomp_iter_ull_guided_next (istart, iend); 682#else 683 struct gomp_thread *thr = gomp_thread (); 684 gomp_mutex_lock (&thr->ts.work_share->lock); 685 ret = gomp_iter_ull_guided_next_locked (istart, iend); 686 gomp_mutex_unlock (&thr->ts.work_share->lock); 687#endif 688 689 return ret; 690} 691 692bool 693GOMP_loop_ull_runtime_next (gomp_ull *istart, gomp_ull *iend) 694{ 695 struct gomp_thread *thr = gomp_thread (); 696 697 switch (thr->ts.work_share->sched) 698 { 699 case GFS_STATIC: 700 case GFS_AUTO: 701 return gomp_loop_ull_static_next (istart, iend); 702 case GFS_DYNAMIC: 703 return gomp_loop_ull_dynamic_next (istart, iend); 704 case GFS_GUIDED: 705 return gomp_loop_ull_guided_next (istart, iend); 706 default: 707 abort (); 708 } 709} 710 711/* The *_ordered_*_next routines are called when the thread completes 712 processing of the iteration block currently assigned to it. 713 714 Returns true if there is work remaining to be performed; *ISTART and 715 *IEND are filled with a new iteration block. Returns false if all work 716 has been assigned. */ 717 718static bool 719gomp_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend) 720{ 721 struct gomp_thread *thr = gomp_thread (); 722 int test; 723 724 gomp_ordered_sync (); 725 gomp_mutex_lock (&thr->ts.work_share->lock); 726 test = gomp_iter_ull_static_next (istart, iend); 727 if (test >= 0) 728 gomp_ordered_static_next (); 729 gomp_mutex_unlock (&thr->ts.work_share->lock); 730 731 return test == 0; 732} 733 734static bool 735gomp_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend) 736{ 737 struct gomp_thread *thr = gomp_thread (); 738 bool ret; 739 740 gomp_ordered_sync (); 741 gomp_mutex_lock (&thr->ts.work_share->lock); 742 ret = gomp_iter_ull_dynamic_next_locked (istart, iend); 743 if (ret) 744 gomp_ordered_next (); 745 else 746 gomp_ordered_last (); 747 gomp_mutex_unlock (&thr->ts.work_share->lock); 748 749 return ret; 750} 751 752static bool 753gomp_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend) 754{ 755 struct gomp_thread *thr = gomp_thread (); 756 bool ret; 757 758 gomp_ordered_sync (); 759 gomp_mutex_lock (&thr->ts.work_share->lock); 760 ret = gomp_iter_ull_guided_next_locked (istart, iend); 761 if (ret) 762 gomp_ordered_next (); 763 else 764 gomp_ordered_last (); 765 gomp_mutex_unlock (&thr->ts.work_share->lock); 766 767 return ret; 768} 769 770bool 771GOMP_loop_ull_ordered_runtime_next (gomp_ull *istart, gomp_ull *iend) 772{ 773 struct gomp_thread *thr = gomp_thread (); 774 775 switch (thr->ts.work_share->sched) 776 { 777 case GFS_STATIC: 778 case GFS_AUTO: 779 return gomp_loop_ull_ordered_static_next (istart, iend); 780 case GFS_DYNAMIC: 781 return gomp_loop_ull_ordered_dynamic_next (istart, iend); 782 case GFS_GUIDED: 783 return gomp_loop_ull_ordered_guided_next (istart, iend); 784 default: 785 abort (); 786 } 787} 788 789/* We use static functions above so that we're sure that the "runtime" 790 function can defer to the proper routine without interposition. We 791 export the static function with a strong alias when possible, or with 792 a wrapper function otherwise. */ 793 794#ifdef HAVE_ATTRIBUTE_ALIAS 795extern __typeof(gomp_loop_ull_static_start) GOMP_loop_ull_static_start 796 __attribute__((alias ("gomp_loop_ull_static_start"))); 797extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_dynamic_start 798 __attribute__((alias ("gomp_loop_ull_dynamic_start"))); 799extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_guided_start 800 __attribute__((alias ("gomp_loop_ull_guided_start"))); 801extern __typeof(gomp_loop_ull_dynamic_start) GOMP_loop_ull_nonmonotonic_dynamic_start 802 __attribute__((alias ("gomp_loop_ull_dynamic_start"))); 803extern __typeof(gomp_loop_ull_guided_start) GOMP_loop_ull_nonmonotonic_guided_start 804 __attribute__((alias ("gomp_loop_ull_guided_start"))); 805extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_nonmonotonic_runtime_start 806 __attribute__((alias ("GOMP_loop_ull_runtime_start"))); 807extern __typeof(GOMP_loop_ull_runtime_start) GOMP_loop_ull_maybe_nonmonotonic_runtime_start 808 __attribute__((alias ("GOMP_loop_ull_runtime_start"))); 809 810extern __typeof(gomp_loop_ull_ordered_static_start) GOMP_loop_ull_ordered_static_start 811 __attribute__((alias ("gomp_loop_ull_ordered_static_start"))); 812extern __typeof(gomp_loop_ull_ordered_dynamic_start) GOMP_loop_ull_ordered_dynamic_start 813 __attribute__((alias ("gomp_loop_ull_ordered_dynamic_start"))); 814extern __typeof(gomp_loop_ull_ordered_guided_start) GOMP_loop_ull_ordered_guided_start 815 __attribute__((alias ("gomp_loop_ull_ordered_guided_start"))); 816 817extern __typeof(gomp_loop_ull_doacross_static_start) GOMP_loop_ull_doacross_static_start 818 __attribute__((alias ("gomp_loop_ull_doacross_static_start"))); 819extern __typeof(gomp_loop_ull_doacross_dynamic_start) GOMP_loop_ull_doacross_dynamic_start 820 __attribute__((alias ("gomp_loop_ull_doacross_dynamic_start"))); 821extern __typeof(gomp_loop_ull_doacross_guided_start) GOMP_loop_ull_doacross_guided_start 822 __attribute__((alias ("gomp_loop_ull_doacross_guided_start"))); 823 824extern __typeof(gomp_loop_ull_static_next) GOMP_loop_ull_static_next 825 __attribute__((alias ("gomp_loop_ull_static_next"))); 826extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_dynamic_next 827 __attribute__((alias ("gomp_loop_ull_dynamic_next"))); 828extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_guided_next 829 __attribute__((alias ("gomp_loop_ull_guided_next"))); 830extern __typeof(gomp_loop_ull_dynamic_next) GOMP_loop_ull_nonmonotonic_dynamic_next 831 __attribute__((alias ("gomp_loop_ull_dynamic_next"))); 832extern __typeof(gomp_loop_ull_guided_next) GOMP_loop_ull_nonmonotonic_guided_next 833 __attribute__((alias ("gomp_loop_ull_guided_next"))); 834extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_nonmonotonic_runtime_next 835 __attribute__((alias ("GOMP_loop_ull_runtime_next"))); 836extern __typeof(GOMP_loop_ull_runtime_next) GOMP_loop_ull_maybe_nonmonotonic_runtime_next 837 __attribute__((alias ("GOMP_loop_ull_runtime_next"))); 838 839extern __typeof(gomp_loop_ull_ordered_static_next) GOMP_loop_ull_ordered_static_next 840 __attribute__((alias ("gomp_loop_ull_ordered_static_next"))); 841extern __typeof(gomp_loop_ull_ordered_dynamic_next) GOMP_loop_ull_ordered_dynamic_next 842 __attribute__((alias ("gomp_loop_ull_ordered_dynamic_next"))); 843extern __typeof(gomp_loop_ull_ordered_guided_next) GOMP_loop_ull_ordered_guided_next 844 __attribute__((alias ("gomp_loop_ull_ordered_guided_next"))); 845#else 846bool 847GOMP_loop_ull_static_start (bool up, gomp_ull start, gomp_ull end, 848 gomp_ull incr, gomp_ull chunk_size, 849 gomp_ull *istart, gomp_ull *iend) 850{ 851 return gomp_loop_ull_static_start (up, start, end, incr, chunk_size, istart, 852 iend); 853} 854 855bool 856GOMP_loop_ull_dynamic_start (bool up, gomp_ull start, gomp_ull end, 857 gomp_ull incr, gomp_ull chunk_size, 858 gomp_ull *istart, gomp_ull *iend) 859{ 860 return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart, 861 iend); 862} 863 864bool 865GOMP_loop_ull_guided_start (bool up, gomp_ull start, gomp_ull end, 866 gomp_ull incr, gomp_ull chunk_size, 867 gomp_ull *istart, gomp_ull *iend) 868{ 869 return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart, 870 iend); 871} 872 873bool 874GOMP_loop_ull_nonmonotonic_dynamic_start (bool up, gomp_ull start, 875 gomp_ull end, gomp_ull incr, 876 gomp_ull chunk_size, 877 gomp_ull *istart, gomp_ull *iend) 878{ 879 return gomp_loop_ull_dynamic_start (up, start, end, incr, chunk_size, istart, 880 iend); 881} 882 883bool 884GOMP_loop_ull_nonmonotonic_guided_start (bool up, gomp_ull start, gomp_ull end, 885 gomp_ull incr, gomp_ull chunk_size, 886 gomp_ull *istart, gomp_ull *iend) 887{ 888 return gomp_loop_ull_guided_start (up, start, end, incr, chunk_size, istart, 889 iend); 890} 891 892bool 893GOMP_loop_ull_nonmonotonic_runtime_start (bool up, gomp_ull start, 894 gomp_ull end, gomp_ull incr, 895 gomp_ull *istart, gomp_ull *iend) 896{ 897 return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend); 898} 899 900bool 901GOMP_loop_ull_maybe_nonmonotonic_runtime_start (bool up, gomp_ull start, 902 gomp_ull end, gomp_ull incr, 903 gomp_ull *istart, 904 gomp_ull *iend) 905{ 906 return GOMP_loop_ull_runtime_start (up, start, end, incr, istart, iend); 907} 908 909bool 910GOMP_loop_ull_ordered_static_start (bool up, gomp_ull start, gomp_ull end, 911 gomp_ull incr, gomp_ull chunk_size, 912 gomp_ull *istart, gomp_ull *iend) 913{ 914 return gomp_loop_ull_ordered_static_start (up, start, end, incr, chunk_size, 915 istart, iend); 916} 917 918bool 919GOMP_loop_ull_ordered_dynamic_start (bool up, gomp_ull start, gomp_ull end, 920 gomp_ull incr, gomp_ull chunk_size, 921 gomp_ull *istart, gomp_ull *iend) 922{ 923 return gomp_loop_ull_ordered_dynamic_start (up, start, end, incr, chunk_size, 924 istart, iend); 925} 926 927bool 928GOMP_loop_ull_ordered_guided_start (bool up, gomp_ull start, gomp_ull end, 929 gomp_ull incr, gomp_ull chunk_size, 930 gomp_ull *istart, gomp_ull *iend) 931{ 932 return gomp_loop_ull_ordered_guided_start (up, start, end, incr, chunk_size, 933 istart, iend); 934} 935 936bool 937GOMP_loop_ull_doacross_static_start (unsigned ncounts, gomp_ull *counts, 938 gomp_ull chunk_size, gomp_ull *istart, 939 gomp_ull *iend) 940{ 941 return gomp_loop_ull_doacross_static_start (ncounts, counts, chunk_size, 942 istart, iend); 943} 944 945bool 946GOMP_loop_ull_doacross_dynamic_start (unsigned ncounts, gomp_ull *counts, 947 gomp_ull chunk_size, gomp_ull *istart, 948 gomp_ull *iend) 949{ 950 return gomp_loop_ull_doacross_dynamic_start (ncounts, counts, chunk_size, 951 istart, iend); 952} 953 954bool 955GOMP_loop_ull_doacross_guided_start (unsigned ncounts, gomp_ull *counts, 956 gomp_ull chunk_size, gomp_ull *istart, 957 gomp_ull *iend) 958{ 959 return gomp_loop_ull_doacross_guided_start (ncounts, counts, chunk_size, 960 istart, iend); 961} 962 963bool 964GOMP_loop_ull_static_next (gomp_ull *istart, gomp_ull *iend) 965{ 966 return gomp_loop_ull_static_next (istart, iend); 967} 968 969bool 970GOMP_loop_ull_dynamic_next (gomp_ull *istart, gomp_ull *iend) 971{ 972 return gomp_loop_ull_dynamic_next (istart, iend); 973} 974 975bool 976GOMP_loop_ull_guided_next (gomp_ull *istart, gomp_ull *iend) 977{ 978 return gomp_loop_ull_guided_next (istart, iend); 979} 980 981bool 982GOMP_loop_ull_nonmonotonic_dynamic_next (gomp_ull *istart, gomp_ull *iend) 983{ 984 return gomp_loop_ull_dynamic_next (istart, iend); 985} 986 987bool 988GOMP_loop_ull_nonmonotonic_guided_next (gomp_ull *istart, gomp_ull *iend) 989{ 990 return gomp_loop_ull_guided_next (istart, iend); 991} 992 993bool 994GOMP_loop_ull_nonmonotonic_runtime_next (gomp_ull *istart, gomp_ull *iend) 995{ 996 return GOMP_loop_ull_runtime_next (istart, iend); 997} 998 999bool 1000GOMP_loop_ull_maybe_nonmonotonic_runtime_next (gomp_ull *istart, 1001 gomp_ull *iend) 1002{ 1003 return GOMP_loop_ull_runtime_next (istart, iend); 1004} 1005 1006bool 1007GOMP_loop_ull_ordered_static_next (gomp_ull *istart, gomp_ull *iend) 1008{ 1009 return gomp_loop_ull_ordered_static_next (istart, iend); 1010} 1011 1012bool 1013GOMP_loop_ull_ordered_dynamic_next (gomp_ull *istart, gomp_ull *iend) 1014{ 1015 return gomp_loop_ull_ordered_dynamic_next (istart, iend); 1016} 1017 1018bool 1019GOMP_loop_ull_ordered_guided_next (gomp_ull *istart, gomp_ull *iend) 1020{ 1021 return gomp_loop_ull_ordered_guided_next (istart, iend); 1022} 1023#endif 1024