1/* Copyright (C) 2005-2020 Free Software Foundation, Inc. 2 Contributed by Richard Henderson <rth@redhat.com>. 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26/* This file handles the maintenance of threads in response to team 27 creation and termination. */ 28 29#include "libgomp.h" 30#include "pool.h" 31#include <stdlib.h> 32#include <string.h> 33 34#ifdef LIBGOMP_USE_PTHREADS 35pthread_attr_t gomp_thread_attr; 36 37/* This key is for the thread destructor. */ 38pthread_key_t gomp_thread_destructor; 39 40 41/* This is the libgomp per-thread data structure. */ 42#if defined HAVE_TLS || defined USE_EMUTLS 43__thread struct gomp_thread gomp_tls_data; 44#else 45pthread_key_t gomp_tls_key; 46#endif 47 48 49/* This structure is used to communicate across pthread_create. */ 50 51struct gomp_thread_start_data 52{ 53 void (*fn) (void *); 54 void *fn_data; 55 struct gomp_team_state ts; 56 struct gomp_task *task; 57 struct gomp_thread_pool *thread_pool; 58 unsigned int place; 59 bool nested; 60 pthread_t handle; 61}; 62 63 64/* This function is a pthread_create entry point. This contains the idle 65 loop in which a thread waits to be called up to become part of a team. */ 66 67static void * 68gomp_thread_start (void *xdata) 69{ 70 struct gomp_thread_start_data *data = xdata; 71 struct gomp_thread *thr; 72 struct gomp_thread_pool *pool; 73 void (*local_fn) (void *); 74 void *local_data; 75 76#if defined HAVE_TLS || defined USE_EMUTLS 77 thr = &gomp_tls_data; 78#else 79 struct gomp_thread local_thr; 80 thr = &local_thr; 81 pthread_setspecific (gomp_tls_key, thr); 82#endif 83 gomp_sem_init (&thr->release, 0); 84 85 /* Extract what we need from data. */ 86 local_fn = data->fn; 87 local_data = data->fn_data; 88 thr->thread_pool = data->thread_pool; 89 thr->ts = data->ts; 90 thr->task = data->task; 91 thr->place = data->place; 92#ifdef GOMP_NEEDS_THREAD_HANDLE 93 thr->handle = data->handle; 94#endif 95 96 thr->ts.team->ordered_release[thr->ts.team_id] = &thr->release; 97 98 /* Make thread pool local. */ 99 pool = thr->thread_pool; 100 101 if (data->nested) 102 { 103 struct gomp_team *team = thr->ts.team; 104 struct gomp_task *task = thr->task; 105 106 gomp_barrier_wait (&team->barrier); 107 108 local_fn (local_data); 109 gomp_team_barrier_wait_final (&team->barrier); 110 gomp_finish_task (task); 111 gomp_barrier_wait_last (&team->barrier); 112 } 113 else 114 { 115 pool->threads[thr->ts.team_id] = thr; 116 117 gomp_simple_barrier_wait (&pool->threads_dock); 118 do 119 { 120 struct gomp_team *team = thr->ts.team; 121 struct gomp_task *task = thr->task; 122 123 local_fn (local_data); 124 gomp_team_barrier_wait_final (&team->barrier); 125 gomp_finish_task (task); 126 127 gomp_simple_barrier_wait (&pool->threads_dock); 128 129 local_fn = thr->fn; 130 local_data = thr->data; 131 thr->fn = NULL; 132 } 133 while (local_fn); 134 } 135 136 gomp_sem_destroy (&thr->release); 137 pthread_detach (pthread_self ()); 138 thr->thread_pool = NULL; 139 thr->task = NULL; 140 return NULL; 141} 142#endif 143 144static inline struct gomp_team * 145get_last_team (unsigned nthreads) 146{ 147 struct gomp_thread *thr = gomp_thread (); 148 if (thr->ts.team == NULL) 149 { 150 struct gomp_thread_pool *pool = gomp_get_thread_pool (thr, nthreads); 151 struct gomp_team *last_team = pool->last_team; 152 if (last_team != NULL && last_team->nthreads == nthreads) 153 { 154 pool->last_team = NULL; 155 return last_team; 156 } 157 } 158 return NULL; 159} 160 161/* Create a new team data structure. */ 162 163struct gomp_team * 164gomp_new_team (unsigned nthreads) 165{ 166 struct gomp_team *team; 167 int i; 168 169 team = get_last_team (nthreads); 170 if (team == NULL) 171 { 172 size_t extra = sizeof (team->ordered_release[0]) 173 + sizeof (team->implicit_task[0]); 174 team = team_malloc (sizeof (*team) + nthreads * extra); 175 176#ifndef HAVE_SYNC_BUILTINS 177 gomp_mutex_init (&team->work_share_list_free_lock); 178#endif 179 gomp_barrier_init (&team->barrier, nthreads); 180 gomp_mutex_init (&team->task_lock); 181 182 team->nthreads = nthreads; 183 } 184 185 team->work_share_chunk = 8; 186#ifdef HAVE_SYNC_BUILTINS 187 team->single_count = 0; 188#endif 189 team->work_shares_to_free = &team->work_shares[0]; 190 gomp_init_work_share (&team->work_shares[0], 0, nthreads); 191 team->work_shares[0].next_alloc = NULL; 192 team->work_share_list_free = NULL; 193 team->work_share_list_alloc = &team->work_shares[1]; 194 for (i = 1; i < 7; i++) 195 team->work_shares[i].next_free = &team->work_shares[i + 1]; 196 team->work_shares[i].next_free = NULL; 197 198 gomp_sem_init (&team->master_release, 0); 199 team->ordered_release = (void *) &team->implicit_task[nthreads]; 200 team->ordered_release[0] = &team->master_release; 201 202 priority_queue_init (&team->task_queue); 203 team->task_count = 0; 204 team->task_queued_count = 0; 205 team->task_running_count = 0; 206 team->work_share_cancelled = 0; 207 team->team_cancelled = 0; 208 209 return team; 210} 211 212 213/* Free a team data structure. */ 214 215static void 216free_team (struct gomp_team *team) 217{ 218#ifndef HAVE_SYNC_BUILTINS 219 gomp_mutex_destroy (&team->work_share_list_free_lock); 220#endif 221 gomp_barrier_destroy (&team->barrier); 222 gomp_mutex_destroy (&team->task_lock); 223 priority_queue_free (&team->task_queue); 224 team_free (team); 225} 226 227static void 228gomp_free_pool_helper (void *thread_pool) 229{ 230 struct gomp_thread *thr = gomp_thread (); 231 struct gomp_thread_pool *pool 232 = (struct gomp_thread_pool *) thread_pool; 233 gomp_simple_barrier_wait_last (&pool->threads_dock); 234 gomp_sem_destroy (&thr->release); 235 thr->thread_pool = NULL; 236 thr->task = NULL; 237#ifdef LIBGOMP_USE_PTHREADS 238 pthread_detach (pthread_self ()); 239 pthread_exit (NULL); 240#elif defined(__nvptx__) 241 asm ("exit;"); 242#elif defined(__AMDGCN__) 243 asm ("s_dcache_wb\n\t" 244 "s_endpgm"); 245#else 246#error gomp_free_pool_helper must terminate the thread 247#endif 248} 249 250/* Free a thread pool and release its threads. */ 251 252void 253gomp_free_thread (void *arg __attribute__((unused))) 254{ 255 struct gomp_thread *thr = gomp_thread (); 256 struct gomp_thread_pool *pool = thr->thread_pool; 257 if (pool) 258 { 259 if (pool->threads_used > 0) 260 { 261 int i; 262 for (i = 1; i < pool->threads_used; i++) 263 { 264 struct gomp_thread *nthr = pool->threads[i]; 265 nthr->fn = gomp_free_pool_helper; 266 nthr->data = pool; 267 } 268 /* This barrier undocks threads docked on pool->threads_dock. */ 269 gomp_simple_barrier_wait (&pool->threads_dock); 270 /* And this waits till all threads have called gomp_barrier_wait_last 271 in gomp_free_pool_helper. */ 272 gomp_simple_barrier_wait (&pool->threads_dock); 273 /* Now it is safe to destroy the barrier and free the pool. */ 274 gomp_simple_barrier_destroy (&pool->threads_dock); 275 276#ifdef HAVE_SYNC_BUILTINS 277 __sync_fetch_and_add (&gomp_managed_threads, 278 1L - pool->threads_used); 279#else 280 gomp_mutex_lock (&gomp_managed_threads_lock); 281 gomp_managed_threads -= pool->threads_used - 1L; 282 gomp_mutex_unlock (&gomp_managed_threads_lock); 283#endif 284 } 285 if (pool->last_team) 286 free_team (pool->last_team); 287#ifndef __nvptx__ 288 team_free (pool->threads); 289 team_free (pool); 290#endif 291 thr->thread_pool = NULL; 292 } 293 if (thr->ts.level == 0 && __builtin_expect (thr->ts.team != NULL, 0)) 294 gomp_team_end (); 295 if (thr->task != NULL) 296 { 297 struct gomp_task *task = thr->task; 298 gomp_end_task (); 299 free (task); 300 } 301} 302 303/* Launch a team. */ 304 305#ifdef LIBGOMP_USE_PTHREADS 306void 307gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads, 308 unsigned flags, struct gomp_team *team, 309 struct gomp_taskgroup *taskgroup) 310{ 311 struct gomp_thread_start_data *start_data; 312 struct gomp_thread *thr, *nthr; 313 struct gomp_task *task; 314 struct gomp_task_icv *icv; 315 bool nested; 316 struct gomp_thread_pool *pool; 317 unsigned i, n, old_threads_used = 0; 318 pthread_attr_t thread_attr, *attr; 319 unsigned long nthreads_var; 320 char bind, bind_var; 321 unsigned int s = 0, rest = 0, p = 0, k = 0; 322 unsigned int affinity_count = 0; 323 struct gomp_thread **affinity_thr = NULL; 324 bool force_display = false; 325 326 thr = gomp_thread (); 327 nested = thr->ts.level; 328 pool = thr->thread_pool; 329 task = thr->task; 330 icv = task ? &task->icv : &gomp_global_icv; 331 if (__builtin_expect (gomp_places_list != NULL, 0) && thr->place == 0) 332 { 333 gomp_init_affinity (); 334 if (__builtin_expect (gomp_display_affinity_var, 0) && nthreads == 1) 335 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts, 336 thr->place); 337 } 338 339 /* Always save the previous state, even if this isn't a nested team. 340 In particular, we should save any work share state from an outer 341 orphaned work share construct. */ 342 team->prev_ts = thr->ts; 343 344 thr->ts.team = team; 345 thr->ts.team_id = 0; 346 ++thr->ts.level; 347 if (nthreads > 1) 348 ++thr->ts.active_level; 349 thr->ts.work_share = &team->work_shares[0]; 350 thr->ts.last_work_share = NULL; 351#ifdef HAVE_SYNC_BUILTINS 352 thr->ts.single_count = 0; 353#endif 354 thr->ts.static_trip = 0; 355 thr->task = &team->implicit_task[0]; 356#ifdef GOMP_NEEDS_THREAD_HANDLE 357 thr->handle = pthread_self (); 358#endif 359 nthreads_var = icv->nthreads_var; 360 if (__builtin_expect (gomp_nthreads_var_list != NULL, 0) 361 && thr->ts.level < gomp_nthreads_var_list_len) 362 nthreads_var = gomp_nthreads_var_list[thr->ts.level]; 363 bind_var = icv->bind_var; 364 if (bind_var != omp_proc_bind_false && (flags & 7) != omp_proc_bind_false) 365 bind_var = flags & 7; 366 bind = bind_var; 367 if (__builtin_expect (gomp_bind_var_list != NULL, 0) 368 && thr->ts.level < gomp_bind_var_list_len) 369 bind_var = gomp_bind_var_list[thr->ts.level]; 370 gomp_init_task (thr->task, task, icv); 371 thr->task->taskgroup = taskgroup; 372 team->implicit_task[0].icv.nthreads_var = nthreads_var; 373 team->implicit_task[0].icv.bind_var = bind_var; 374 375 if (nthreads == 1) 376 return; 377 378 i = 1; 379 380 if (__builtin_expect (gomp_places_list != NULL, 0)) 381 { 382 /* Depending on chosen proc_bind model, set subpartition 383 for the master thread and initialize helper variables 384 P and optionally S, K and/or REST used by later place 385 computation for each additional thread. */ 386 p = thr->place - 1; 387 switch (bind) 388 { 389 case omp_proc_bind_true: 390 case omp_proc_bind_close: 391 if (nthreads > thr->ts.place_partition_len) 392 { 393 /* T > P. S threads will be placed in each place, 394 and the final REM threads placed one by one 395 into the already occupied places. */ 396 s = nthreads / thr->ts.place_partition_len; 397 rest = nthreads % thr->ts.place_partition_len; 398 } 399 else 400 s = 1; 401 k = 1; 402 break; 403 case omp_proc_bind_master: 404 /* Each thread will be bound to master's place. */ 405 break; 406 case omp_proc_bind_spread: 407 if (nthreads <= thr->ts.place_partition_len) 408 { 409 /* T <= P. Each subpartition will have in between s 410 and s+1 places (subpartitions starting at or 411 after rest will have s places, earlier s+1 places), 412 each thread will be bound to the first place in 413 its subpartition (except for the master thread 414 that can be bound to another place in its 415 subpartition). */ 416 s = thr->ts.place_partition_len / nthreads; 417 rest = thr->ts.place_partition_len % nthreads; 418 rest = (s + 1) * rest + thr->ts.place_partition_off; 419 if (p < rest) 420 { 421 p -= (p - thr->ts.place_partition_off) % (s + 1); 422 thr->ts.place_partition_len = s + 1; 423 } 424 else 425 { 426 p -= (p - rest) % s; 427 thr->ts.place_partition_len = s; 428 } 429 thr->ts.place_partition_off = p; 430 } 431 else 432 { 433 /* T > P. Each subpartition will have just a single 434 place and we'll place between s and s+1 435 threads into each subpartition. */ 436 s = nthreads / thr->ts.place_partition_len; 437 rest = nthreads % thr->ts.place_partition_len; 438 thr->ts.place_partition_off = p; 439 thr->ts.place_partition_len = 1; 440 k = 1; 441 } 442 break; 443 } 444 } 445 else 446 bind = omp_proc_bind_false; 447 448 /* We only allow the reuse of idle threads for non-nested PARALLEL 449 regions. This appears to be implied by the semantics of 450 threadprivate variables, but perhaps that's reading too much into 451 things. Certainly it does prevent any locking problems, since 452 only the initial program thread will modify gomp_threads. */ 453 if (!nested) 454 { 455 old_threads_used = pool->threads_used; 456 457 if (nthreads <= old_threads_used) 458 n = nthreads; 459 else if (old_threads_used == 0) 460 { 461 n = 0; 462 gomp_simple_barrier_init (&pool->threads_dock, nthreads); 463 } 464 else 465 { 466 n = old_threads_used; 467 468 /* Increase the barrier threshold to make sure all new 469 threads arrive before the team is released. */ 470 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads); 471 } 472 473 /* Not true yet, but soon will be. We're going to release all 474 threads from the dock, and those that aren't part of the 475 team will exit. */ 476 pool->threads_used = nthreads; 477 478 /* If necessary, expand the size of the gomp_threads array. It is 479 expected that changes in the number of threads are rare, thus we 480 make no effort to expand gomp_threads_size geometrically. */ 481 if (nthreads >= pool->threads_size) 482 { 483 pool->threads_size = nthreads + 1; 484 pool->threads 485 = gomp_realloc (pool->threads, 486 pool->threads_size 487 * sizeof (struct gomp_thread *)); 488 /* Add current (master) thread to threads[]. */ 489 pool->threads[0] = thr; 490 } 491 492 /* Release existing idle threads. */ 493 for (; i < n; ++i) 494 { 495 unsigned int place_partition_off = thr->ts.place_partition_off; 496 unsigned int place_partition_len = thr->ts.place_partition_len; 497 unsigned int place = 0; 498 if (__builtin_expect (gomp_places_list != NULL, 0)) 499 { 500 switch (bind) 501 { 502 case omp_proc_bind_true: 503 case omp_proc_bind_close: 504 if (k == s) 505 { 506 ++p; 507 if (p == (team->prev_ts.place_partition_off 508 + team->prev_ts.place_partition_len)) 509 p = team->prev_ts.place_partition_off; 510 k = 1; 511 if (i == nthreads - rest) 512 s = 1; 513 } 514 else 515 ++k; 516 break; 517 case omp_proc_bind_master: 518 break; 519 case omp_proc_bind_spread: 520 if (k == 0) 521 { 522 /* T <= P. */ 523 if (p < rest) 524 p += s + 1; 525 else 526 p += s; 527 if (p == (team->prev_ts.place_partition_off 528 + team->prev_ts.place_partition_len)) 529 p = team->prev_ts.place_partition_off; 530 place_partition_off = p; 531 if (p < rest) 532 place_partition_len = s + 1; 533 else 534 place_partition_len = s; 535 } 536 else 537 { 538 /* T > P. */ 539 if (k == s) 540 { 541 ++p; 542 if (p == (team->prev_ts.place_partition_off 543 + team->prev_ts.place_partition_len)) 544 p = team->prev_ts.place_partition_off; 545 k = 1; 546 if (i == nthreads - rest) 547 s = 1; 548 } 549 else 550 ++k; 551 place_partition_off = p; 552 place_partition_len = 1; 553 } 554 break; 555 } 556 if (affinity_thr != NULL 557 || (bind != omp_proc_bind_true 558 && pool->threads[i]->place != p + 1) 559 || pool->threads[i]->place <= place_partition_off 560 || pool->threads[i]->place > (place_partition_off 561 + place_partition_len)) 562 { 563 unsigned int l; 564 force_display = true; 565 if (affinity_thr == NULL) 566 { 567 unsigned int j; 568 569 if (team->prev_ts.place_partition_len > 64) 570 affinity_thr 571 = gomp_malloc (team->prev_ts.place_partition_len 572 * sizeof (struct gomp_thread *)); 573 else 574 affinity_thr 575 = gomp_alloca (team->prev_ts.place_partition_len 576 * sizeof (struct gomp_thread *)); 577 memset (affinity_thr, '\0', 578 team->prev_ts.place_partition_len 579 * sizeof (struct gomp_thread *)); 580 for (j = i; j < old_threads_used; j++) 581 { 582 if (pool->threads[j]->place 583 > team->prev_ts.place_partition_off 584 && (pool->threads[j]->place 585 <= (team->prev_ts.place_partition_off 586 + team->prev_ts.place_partition_len))) 587 { 588 l = pool->threads[j]->place - 1 589 - team->prev_ts.place_partition_off; 590 pool->threads[j]->data = affinity_thr[l]; 591 affinity_thr[l] = pool->threads[j]; 592 } 593 pool->threads[j] = NULL; 594 } 595 if (nthreads > old_threads_used) 596 memset (&pool->threads[old_threads_used], 597 '\0', ((nthreads - old_threads_used) 598 * sizeof (struct gomp_thread *))); 599 n = nthreads; 600 affinity_count = old_threads_used - i; 601 } 602 if (affinity_count == 0) 603 break; 604 l = p; 605 if (affinity_thr[l - team->prev_ts.place_partition_off] 606 == NULL) 607 { 608 if (bind != omp_proc_bind_true) 609 continue; 610 for (l = place_partition_off; 611 l < place_partition_off + place_partition_len; 612 l++) 613 if (affinity_thr[l - team->prev_ts.place_partition_off] 614 != NULL) 615 break; 616 if (l == place_partition_off + place_partition_len) 617 continue; 618 } 619 nthr = affinity_thr[l - team->prev_ts.place_partition_off]; 620 affinity_thr[l - team->prev_ts.place_partition_off] 621 = (struct gomp_thread *) nthr->data; 622 affinity_count--; 623 pool->threads[i] = nthr; 624 } 625 else 626 nthr = pool->threads[i]; 627 place = p + 1; 628 } 629 else 630 nthr = pool->threads[i]; 631 nthr->ts.team = team; 632 nthr->ts.work_share = &team->work_shares[0]; 633 nthr->ts.last_work_share = NULL; 634 nthr->ts.team_id = i; 635 nthr->ts.level = team->prev_ts.level + 1; 636 nthr->ts.active_level = thr->ts.active_level; 637 nthr->ts.place_partition_off = place_partition_off; 638 nthr->ts.place_partition_len = place_partition_len; 639#ifdef HAVE_SYNC_BUILTINS 640 nthr->ts.single_count = 0; 641#endif 642 nthr->ts.static_trip = 0; 643 nthr->task = &team->implicit_task[i]; 644 nthr->place = place; 645 gomp_init_task (nthr->task, task, icv); 646 team->implicit_task[i].icv.nthreads_var = nthreads_var; 647 team->implicit_task[i].icv.bind_var = bind_var; 648 nthr->task->taskgroup = taskgroup; 649 nthr->fn = fn; 650 nthr->data = data; 651 team->ordered_release[i] = &nthr->release; 652 } 653 654 if (__builtin_expect (affinity_thr != NULL, 0)) 655 { 656 /* If AFFINITY_THR is non-NULL just because we had to 657 permute some threads in the pool, but we've managed 658 to find exactly as many old threads as we'd find 659 without affinity, we don't need to handle this 660 specially anymore. */ 661 if (nthreads <= old_threads_used 662 ? (affinity_count == old_threads_used - nthreads) 663 : (i == old_threads_used)) 664 { 665 if (team->prev_ts.place_partition_len > 64) 666 free (affinity_thr); 667 affinity_thr = NULL; 668 affinity_count = 0; 669 } 670 else 671 { 672 i = 1; 673 /* We are going to compute the places/subpartitions 674 again from the beginning. So, we need to reinitialize 675 vars modified by the switch (bind) above inside 676 of the loop, to the state they had after the initial 677 switch (bind). */ 678 switch (bind) 679 { 680 case omp_proc_bind_true: 681 case omp_proc_bind_close: 682 if (nthreads > thr->ts.place_partition_len) 683 /* T > P. S has been changed, so needs 684 to be recomputed. */ 685 s = nthreads / thr->ts.place_partition_len; 686 k = 1; 687 p = thr->place - 1; 688 break; 689 case omp_proc_bind_master: 690 /* No vars have been changed. */ 691 break; 692 case omp_proc_bind_spread: 693 p = thr->ts.place_partition_off; 694 if (k != 0) 695 { 696 /* T > P. */ 697 s = nthreads / team->prev_ts.place_partition_len; 698 k = 1; 699 } 700 break; 701 } 702 703 /* Increase the barrier threshold to make sure all new 704 threads and all the threads we're going to let die 705 arrive before the team is released. */ 706 if (affinity_count) 707 gomp_simple_barrier_reinit (&pool->threads_dock, 708 nthreads + affinity_count); 709 } 710 } 711 712 if (i == nthreads) 713 goto do_release; 714 715 } 716 717 if (__builtin_expect (nthreads + affinity_count > old_threads_used, 0)) 718 { 719 long diff = (long) (nthreads + affinity_count) - (long) old_threads_used; 720 721 if (old_threads_used == 0) 722 --diff; 723 724#ifdef HAVE_SYNC_BUILTINS 725 __sync_fetch_and_add (&gomp_managed_threads, diff); 726#else 727 gomp_mutex_lock (&gomp_managed_threads_lock); 728 gomp_managed_threads += diff; 729 gomp_mutex_unlock (&gomp_managed_threads_lock); 730#endif 731 } 732 733 attr = &gomp_thread_attr; 734 if (__builtin_expect (gomp_places_list != NULL, 0)) 735 { 736 size_t stacksize; 737 pthread_attr_init (&thread_attr); 738 if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize)) 739 pthread_attr_setstacksize (&thread_attr, stacksize); 740 attr = &thread_attr; 741 } 742 743 start_data = gomp_alloca (sizeof (struct gomp_thread_start_data) 744 * (nthreads - i)); 745 746 /* Launch new threads. */ 747 for (; i < nthreads; ++i) 748 { 749 int err; 750 751 start_data->ts.place_partition_off = thr->ts.place_partition_off; 752 start_data->ts.place_partition_len = thr->ts.place_partition_len; 753 start_data->place = 0; 754 if (__builtin_expect (gomp_places_list != NULL, 0)) 755 { 756 switch (bind) 757 { 758 case omp_proc_bind_true: 759 case omp_proc_bind_close: 760 if (k == s) 761 { 762 ++p; 763 if (p == (team->prev_ts.place_partition_off 764 + team->prev_ts.place_partition_len)) 765 p = team->prev_ts.place_partition_off; 766 k = 1; 767 if (i == nthreads - rest) 768 s = 1; 769 } 770 else 771 ++k; 772 break; 773 case omp_proc_bind_master: 774 break; 775 case omp_proc_bind_spread: 776 if (k == 0) 777 { 778 /* T <= P. */ 779 if (p < rest) 780 p += s + 1; 781 else 782 p += s; 783 if (p == (team->prev_ts.place_partition_off 784 + team->prev_ts.place_partition_len)) 785 p = team->prev_ts.place_partition_off; 786 start_data->ts.place_partition_off = p; 787 if (p < rest) 788 start_data->ts.place_partition_len = s + 1; 789 else 790 start_data->ts.place_partition_len = s; 791 } 792 else 793 { 794 /* T > P. */ 795 if (k == s) 796 { 797 ++p; 798 if (p == (team->prev_ts.place_partition_off 799 + team->prev_ts.place_partition_len)) 800 p = team->prev_ts.place_partition_off; 801 k = 1; 802 if (i == nthreads - rest) 803 s = 1; 804 } 805 else 806 ++k; 807 start_data->ts.place_partition_off = p; 808 start_data->ts.place_partition_len = 1; 809 } 810 break; 811 } 812 start_data->place = p + 1; 813 if (affinity_thr != NULL && pool->threads[i] != NULL) 814 continue; 815 gomp_init_thread_affinity (attr, p); 816 } 817 818 start_data->fn = fn; 819 start_data->fn_data = data; 820 start_data->ts.team = team; 821 start_data->ts.work_share = &team->work_shares[0]; 822 start_data->ts.last_work_share = NULL; 823 start_data->ts.team_id = i; 824 start_data->ts.level = team->prev_ts.level + 1; 825 start_data->ts.active_level = thr->ts.active_level; 826#ifdef HAVE_SYNC_BUILTINS 827 start_data->ts.single_count = 0; 828#endif 829 start_data->ts.static_trip = 0; 830 start_data->task = &team->implicit_task[i]; 831 gomp_init_task (start_data->task, task, icv); 832 team->implicit_task[i].icv.nthreads_var = nthreads_var; 833 team->implicit_task[i].icv.bind_var = bind_var; 834 start_data->task->taskgroup = taskgroup; 835 start_data->thread_pool = pool; 836 start_data->nested = nested; 837 838 attr = gomp_adjust_thread_attr (attr, &thread_attr); 839 err = pthread_create (&start_data->handle, attr, gomp_thread_start, 840 start_data); 841 start_data++; 842 if (err != 0) 843 gomp_fatal ("Thread creation failed: %s", strerror (err)); 844 } 845 846 if (__builtin_expect (attr == &thread_attr, 0)) 847 pthread_attr_destroy (&thread_attr); 848 849 do_release: 850 if (nested) 851 gomp_barrier_wait (&team->barrier); 852 else 853 gomp_simple_barrier_wait (&pool->threads_dock); 854 855 /* Decrease the barrier threshold to match the number of threads 856 that should arrive back at the end of this team. The extra 857 threads should be exiting. Note that we arrange for this test 858 to never be true for nested teams. If AFFINITY_COUNT is non-zero, 859 the barrier as well as gomp_managed_threads was temporarily 860 set to NTHREADS + AFFINITY_COUNT. For NTHREADS < OLD_THREADS_COUNT, 861 AFFINITY_COUNT if non-zero will be always at least 862 OLD_THREADS_COUNT - NTHREADS. */ 863 if (__builtin_expect (nthreads < old_threads_used, 0) 864 || __builtin_expect (affinity_count, 0)) 865 { 866 long diff = (long) nthreads - (long) old_threads_used; 867 868 if (affinity_count) 869 diff = -affinity_count; 870 871 gomp_simple_barrier_reinit (&pool->threads_dock, nthreads); 872 873#ifdef HAVE_SYNC_BUILTINS 874 __sync_fetch_and_add (&gomp_managed_threads, diff); 875#else 876 gomp_mutex_lock (&gomp_managed_threads_lock); 877 gomp_managed_threads += diff; 878 gomp_mutex_unlock (&gomp_managed_threads_lock); 879#endif 880 } 881 if (__builtin_expect (gomp_display_affinity_var, 0)) 882 { 883 if (nested 884 || nthreads != old_threads_used 885 || force_display) 886 { 887 gomp_display_affinity_thread (gomp_thread_self (), &thr->ts, 888 thr->place); 889 if (nested) 890 { 891 start_data -= nthreads - 1; 892 for (i = 1; i < nthreads; ++i) 893 { 894 gomp_display_affinity_thread ( 895#ifdef LIBGOMP_USE_PTHREADS 896 start_data->handle, 897#else 898 gomp_thread_self (), 899#endif 900 &start_data->ts, 901 start_data->place); 902 start_data++; 903 } 904 } 905 else 906 { 907 for (i = 1; i < nthreads; ++i) 908 { 909 gomp_thread_handle handle 910 = gomp_thread_to_pthread_t (pool->threads[i]); 911 gomp_display_affinity_thread (handle, &pool->threads[i]->ts, 912 pool->threads[i]->place); 913 } 914 } 915 } 916 } 917 if (__builtin_expect (affinity_thr != NULL, 0) 918 && team->prev_ts.place_partition_len > 64) 919 free (affinity_thr); 920} 921#endif 922 923 924/* Terminate the current team. This is only to be called by the master 925 thread. We assume that we must wait for the other threads. */ 926 927void 928gomp_team_end (void) 929{ 930 struct gomp_thread *thr = gomp_thread (); 931 struct gomp_team *team = thr->ts.team; 932 933 /* This barrier handles all pending explicit threads. 934 As #pragma omp cancel parallel might get awaited count in 935 team->barrier in a inconsistent state, we need to use a different 936 counter here. */ 937 gomp_team_barrier_wait_final (&team->barrier); 938 if (__builtin_expect (team->team_cancelled, 0)) 939 { 940 struct gomp_work_share *ws = team->work_shares_to_free; 941 do 942 { 943 struct gomp_work_share *next_ws = gomp_ptrlock_get (&ws->next_ws); 944 if (next_ws == NULL) 945 gomp_ptrlock_set (&ws->next_ws, ws); 946 gomp_fini_work_share (ws); 947 ws = next_ws; 948 } 949 while (ws != NULL); 950 } 951 else 952 gomp_fini_work_share (thr->ts.work_share); 953 954 gomp_end_task (); 955 thr->ts = team->prev_ts; 956 957 if (__builtin_expect (thr->ts.level != 0, 0)) 958 { 959#ifdef HAVE_SYNC_BUILTINS 960 __sync_fetch_and_add (&gomp_managed_threads, 1L - team->nthreads); 961#else 962 gomp_mutex_lock (&gomp_managed_threads_lock); 963 gomp_managed_threads -= team->nthreads - 1L; 964 gomp_mutex_unlock (&gomp_managed_threads_lock); 965#endif 966 /* This barrier has gomp_barrier_wait_last counterparts 967 and ensures the team can be safely destroyed. */ 968 gomp_barrier_wait (&team->barrier); 969 } 970 971 if (__builtin_expect (team->work_shares[0].next_alloc != NULL, 0)) 972 { 973 struct gomp_work_share *ws = team->work_shares[0].next_alloc; 974 do 975 { 976 struct gomp_work_share *next_ws = ws->next_alloc; 977 free (ws); 978 ws = next_ws; 979 } 980 while (ws != NULL); 981 } 982 gomp_sem_destroy (&team->master_release); 983 984 if (__builtin_expect (thr->ts.team != NULL, 0) 985 || __builtin_expect (team->nthreads == 1, 0)) 986 free_team (team); 987 else 988 { 989 struct gomp_thread_pool *pool = thr->thread_pool; 990 if (pool->last_team) 991 free_team (pool->last_team); 992 pool->last_team = team; 993 gomp_release_thread_pool (pool); 994 } 995} 996 997#ifdef LIBGOMP_USE_PTHREADS 998 999/* Constructors for this file. */ 1000 1001static void __attribute__((constructor)) 1002initialize_team (void) 1003{ 1004#if !defined HAVE_TLS && !defined USE_EMUTLS 1005 static struct gomp_thread initial_thread_tls_data; 1006 1007 pthread_key_create (&gomp_tls_key, NULL); 1008 pthread_setspecific (gomp_tls_key, &initial_thread_tls_data); 1009#endif 1010 1011 if (pthread_key_create (&gomp_thread_destructor, gomp_free_thread) != 0) 1012 gomp_fatal ("could not create thread pool destructor."); 1013} 1014 1015static void __attribute__((destructor)) 1016team_destructor (void) 1017{ 1018 /* Without this dlclose on libgomp could lead to subsequent 1019 crashes. */ 1020 pthread_key_delete (gomp_thread_destructor); 1021} 1022 1023/* Similar to gomp_free_pool_helper, but don't detach itself, 1024 gomp_pause_host will pthread_join those threads. */ 1025 1026static void 1027gomp_pause_pool_helper (void *thread_pool) 1028{ 1029 struct gomp_thread *thr = gomp_thread (); 1030 struct gomp_thread_pool *pool 1031 = (struct gomp_thread_pool *) thread_pool; 1032 gomp_simple_barrier_wait_last (&pool->threads_dock); 1033 gomp_sem_destroy (&thr->release); 1034 thr->thread_pool = NULL; 1035 thr->task = NULL; 1036 pthread_exit (NULL); 1037} 1038 1039/* Free a thread pool and release its threads. Return non-zero on 1040 failure. */ 1041 1042int 1043gomp_pause_host (void) 1044{ 1045 struct gomp_thread *thr = gomp_thread (); 1046 struct gomp_thread_pool *pool = thr->thread_pool; 1047 if (thr->ts.level) 1048 return -1; 1049 if (pool) 1050 { 1051 if (pool->threads_used > 0) 1052 { 1053 int i; 1054 pthread_t *thrs 1055 = gomp_alloca (sizeof (pthread_t) * pool->threads_used); 1056 for (i = 1; i < pool->threads_used; i++) 1057 { 1058 struct gomp_thread *nthr = pool->threads[i]; 1059 nthr->fn = gomp_pause_pool_helper; 1060 nthr->data = pool; 1061 thrs[i] = gomp_thread_to_pthread_t (nthr); 1062 } 1063 /* This barrier undocks threads docked on pool->threads_dock. */ 1064 gomp_simple_barrier_wait (&pool->threads_dock); 1065 /* And this waits till all threads have called gomp_barrier_wait_last 1066 in gomp_pause_pool_helper. */ 1067 gomp_simple_barrier_wait (&pool->threads_dock); 1068 /* Now it is safe to destroy the barrier and free the pool. */ 1069 gomp_simple_barrier_destroy (&pool->threads_dock); 1070 1071#ifdef HAVE_SYNC_BUILTINS 1072 __sync_fetch_and_add (&gomp_managed_threads, 1073 1L - pool->threads_used); 1074#else 1075 gomp_mutex_lock (&gomp_managed_threads_lock); 1076 gomp_managed_threads -= pool->threads_used - 1L; 1077 gomp_mutex_unlock (&gomp_managed_threads_lock); 1078#endif 1079 for (i = 1; i < pool->threads_used; i++) 1080 pthread_join (thrs[i], NULL); 1081 } 1082 if (pool->last_team) 1083 free_team (pool->last_team); 1084#ifndef __nvptx__ 1085 team_free (pool->threads); 1086 team_free (pool); 1087#endif 1088 thr->thread_pool = NULL; 1089 } 1090 return 0; 1091} 1092#endif 1093 1094struct gomp_task_icv * 1095gomp_new_icv (void) 1096{ 1097 struct gomp_thread *thr = gomp_thread (); 1098 struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task)); 1099 gomp_init_task (task, NULL, &gomp_global_icv); 1100 thr->task = task; 1101#ifdef LIBGOMP_USE_PTHREADS 1102 pthread_setspecific (gomp_thread_destructor, thr); 1103#endif 1104 return &task->icv; 1105} 1106