1/* Copyright (C) 2007-2022 Free Software Foundation, Inc. 2 Contributed by Richard Henderson <rth@redhat.com>. 3 4 This file is part of the GNU Offloading and Multi Processing Library 5 (libgomp). 6 7 Libgomp is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 13 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 14 FOR A PARTICULAR PURPOSE. See the GNU General Public License for 15 more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26/* This file handles the maintenance of tasks in response to task 27 creation and termination. */ 28 29#include "libgomp.h" 30#include <stdlib.h> 31#include <string.h> 32#include <assert.h> 33#include "gomp-constants.h" 34 35typedef struct gomp_task_depend_entry *hash_entry_type; 36 37static inline void * 38htab_alloc (size_t size) 39{ 40 return gomp_malloc (size); 41} 42 43static inline void 44htab_free (void *ptr) 45{ 46 free (ptr); 47} 48 49#include "hashtab.h" 50 51static inline hashval_t 52htab_hash (hash_entry_type element) 53{ 54 return hash_pointer (element->addr); 55} 56 57static inline bool 58htab_eq (hash_entry_type x, hash_entry_type y) 59{ 60 return x->addr == y->addr; 61} 62 63/* Create a new task data structure. */ 64 65void 66gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task, 67 struct gomp_task_icv *prev_icv) 68{ 69 /* It would seem that using memset here would be a win, but it turns 70 out that partially filling gomp_task allows us to keep the 71 overhead of task creation low. In the nqueens-1.c test, for a 72 sufficiently large N, we drop the overhead from 5-6% to 1%. 73 74 Note, the nqueens-1.c test in serial mode is a good test to 75 benchmark the overhead of creating tasks as there are millions of 76 tiny tasks created that all run undeferred. */ 77 task->parent = parent_task; 78 priority_queue_init (&task->children_queue); 79 task->taskgroup = NULL; 80 task->dependers = NULL; 81 task->depend_hash = NULL; 82 task->taskwait = NULL; 83 task->depend_count = 0; 84 task->completion_sem = NULL; 85 task->deferred_p = false; 86 task->icv = *prev_icv; 87 task->kind = GOMP_TASK_IMPLICIT; 88 task->in_tied_task = false; 89 task->final_task = false; 90 task->copy_ctors_done = false; 91 task->parent_depends_on = false; 92} 93 94/* Clean up a task, after completing it. */ 95 96void 97gomp_end_task (void) 98{ 99 struct gomp_thread *thr = gomp_thread (); 100 struct gomp_task *task = thr->task; 101 102 gomp_finish_task (task); 103 thr->task = task->parent; 104} 105 106/* Clear the parent field of every task in LIST. */ 107 108static inline void 109gomp_clear_parent_in_list (struct priority_list *list) 110{ 111 struct priority_node *p = list->tasks; 112 if (p) 113 do 114 { 115 priority_node_to_task (PQ_CHILDREN, p)->parent = NULL; 116 p = p->next; 117 } 118 while (p != list->tasks); 119} 120 121/* Splay tree version of gomp_clear_parent_in_list. 122 123 Clear the parent field of every task in NODE within SP, and free 124 the node when done. */ 125 126static void 127gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node) 128{ 129 if (!node) 130 return; 131 prio_splay_tree_node left = node->left, right = node->right; 132 gomp_clear_parent_in_list (&node->key.l); 133#if _LIBGOMP_CHECKING_ 134 memset (node, 0xaf, sizeof (*node)); 135#endif 136 /* No need to remove the node from the tree. We're nuking 137 everything, so just free the nodes and our caller can clear the 138 entire splay tree. */ 139 free (node); 140 gomp_clear_parent_in_tree (sp, left); 141 gomp_clear_parent_in_tree (sp, right); 142} 143 144/* Clear the parent field of every task in Q and remove every task 145 from Q. */ 146 147static inline void 148gomp_clear_parent (struct priority_queue *q) 149{ 150 if (priority_queue_multi_p (q)) 151 { 152 gomp_clear_parent_in_tree (&q->t, q->t.root); 153 /* All the nodes have been cleared in gomp_clear_parent_in_tree. 154 No need to remove anything. We can just nuke everything. */ 155 q->t.root = NULL; 156 } 157 else 158 gomp_clear_parent_in_list (&q->l); 159} 160 161/* Helper function for GOMP_task and gomp_create_target_task. 162 163 For a TASK with in/out dependencies, fill in the various dependency 164 queues. PARENT is the parent of said task. DEPEND is as in 165 GOMP_task. */ 166 167static void 168gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent, 169 void **depend) 170{ 171 size_t ndepend = (uintptr_t) depend[0]; 172 size_t i; 173 hash_entry_type ent; 174 175 if (ndepend) 176 { 177 /* depend[0] is total # */ 178 size_t nout = (uintptr_t) depend[1]; /* # of out: and inout: */ 179 /* ndepend - nout is # of in: */ 180 for (i = 0; i < ndepend; i++) 181 { 182 task->depend[i].addr = depend[2 + i]; 183 task->depend[i].is_in = i >= nout; 184 } 185 } 186 else 187 { 188 ndepend = (uintptr_t) depend[1]; /* total # */ 189 size_t nout = (uintptr_t) depend[2]; /* # of out: and inout: */ 190 size_t nmutexinoutset = (uintptr_t) depend[3]; /* # of mutexinoutset: */ 191 /* For now we treat mutexinoutset like out, which is compliant, but 192 inefficient. */ 193 size_t nin = (uintptr_t) depend[4]; /* # of in: */ 194 /* ndepend - nout - nmutexinoutset - nin is # of depobjs */ 195 size_t normal = nout + nmutexinoutset + nin; 196 size_t n = 0; 197 for (i = normal; i < ndepend; i++) 198 { 199 void **d = (void **) (uintptr_t) depend[5 + i]; 200 switch ((uintptr_t) d[1]) 201 { 202 case GOMP_DEPEND_OUT: 203 case GOMP_DEPEND_INOUT: 204 case GOMP_DEPEND_MUTEXINOUTSET: 205 break; 206 case GOMP_DEPEND_IN: 207 continue; 208 default: 209 gomp_fatal ("unknown omp_depend_t dependence type %d", 210 (int) (uintptr_t) d[1]); 211 } 212 task->depend[n].addr = d[0]; 213 task->depend[n++].is_in = 0; 214 } 215 for (i = 0; i < normal; i++) 216 { 217 task->depend[n].addr = depend[5 + i]; 218 task->depend[n++].is_in = i >= nout + nmutexinoutset; 219 } 220 for (i = normal; i < ndepend; i++) 221 { 222 void **d = (void **) (uintptr_t) depend[5 + i]; 223 if ((uintptr_t) d[1] != GOMP_DEPEND_IN) 224 continue; 225 task->depend[n].addr = d[0]; 226 task->depend[n++].is_in = 1; 227 } 228 } 229 task->depend_count = ndepend; 230 task->num_dependees = 0; 231 if (parent->depend_hash == NULL) 232 parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12); 233 for (i = 0; i < ndepend; i++) 234 { 235 task->depend[i].next = NULL; 236 task->depend[i].prev = NULL; 237 task->depend[i].task = task; 238 task->depend[i].redundant = false; 239 task->depend[i].redundant_out = false; 240 241 hash_entry_type *slot = htab_find_slot (&parent->depend_hash, 242 &task->depend[i], INSERT); 243 hash_entry_type out = NULL, last = NULL; 244 if (*slot) 245 { 246 /* If multiple depends on the same task are the same, all but the 247 first one are redundant. As inout/out come first, if any of them 248 is inout/out, it will win, which is the right semantics. */ 249 if ((*slot)->task == task) 250 { 251 task->depend[i].redundant = true; 252 continue; 253 } 254 for (ent = *slot; ent; ent = ent->next) 255 { 256 if (ent->redundant_out) 257 break; 258 259 last = ent; 260 261 /* depend(in:...) doesn't depend on earlier depend(in:...). */ 262 if (task->depend[i].is_in && ent->is_in) 263 continue; 264 265 if (!ent->is_in) 266 out = ent; 267 268 struct gomp_task *tsk = ent->task; 269 if (tsk->dependers == NULL) 270 { 271 tsk->dependers 272 = gomp_malloc (sizeof (struct gomp_dependers_vec) 273 + 6 * sizeof (struct gomp_task *)); 274 tsk->dependers->n_elem = 1; 275 tsk->dependers->allocated = 6; 276 tsk->dependers->elem[0] = task; 277 task->num_dependees++; 278 continue; 279 } 280 /* We already have some other dependency on tsk from earlier 281 depend clause. */ 282 else if (tsk->dependers->n_elem 283 && (tsk->dependers->elem[tsk->dependers->n_elem - 1] 284 == task)) 285 continue; 286 else if (tsk->dependers->n_elem == tsk->dependers->allocated) 287 { 288 tsk->dependers->allocated 289 = tsk->dependers->allocated * 2 + 2; 290 tsk->dependers 291 = gomp_realloc (tsk->dependers, 292 sizeof (struct gomp_dependers_vec) 293 + (tsk->dependers->allocated 294 * sizeof (struct gomp_task *))); 295 } 296 tsk->dependers->elem[tsk->dependers->n_elem++] = task; 297 task->num_dependees++; 298 } 299 task->depend[i].next = *slot; 300 (*slot)->prev = &task->depend[i]; 301 } 302 *slot = &task->depend[i]; 303 304 /* There is no need to store more than one depend({,in}out:) task per 305 address in the hash table chain for the purpose of creation of 306 deferred tasks, because each out depends on all earlier outs, thus it 307 is enough to record just the last depend({,in}out:). For depend(in:), 308 we need to keep all of the previous ones not terminated yet, because 309 a later depend({,in}out:) might need to depend on all of them. So, if 310 the new task's clause is depend({,in}out:), we know there is at most 311 one other depend({,in}out:) clause in the list (out). For 312 non-deferred tasks we want to see all outs, so they are moved to the 313 end of the chain, after first redundant_out entry all following 314 entries should be redundant_out. */ 315 if (!task->depend[i].is_in && out) 316 { 317 if (out != last) 318 { 319 out->next->prev = out->prev; 320 out->prev->next = out->next; 321 out->next = last->next; 322 out->prev = last; 323 last->next = out; 324 if (out->next) 325 out->next->prev = out; 326 } 327 out->redundant_out = true; 328 } 329 } 330} 331 332/* Called when encountering an explicit task directive. If IF_CLAUSE is 333 false, then we must not delay in executing the task. If UNTIED is true, 334 then the task may be executed by any member of the team. 335 336 DEPEND is an array containing: 337 if depend[0] is non-zero, then: 338 depend[0]: number of depend elements. 339 depend[1]: number of depend elements of type "out/inout". 340 depend[2..N+1]: address of [1..N]th depend element. 341 otherwise, when depend[0] is zero, then: 342 depend[1]: number of depend elements. 343 depend[2]: number of depend elements of type "out/inout". 344 depend[3]: number of depend elements of type "mutexinoutset". 345 depend[4]: number of depend elements of type "in". 346 depend[5..4+depend[2]+depend[3]+depend[4]]: address of depend elements 347 depend[5+depend[2]+depend[3]+depend[4]..4+depend[1]]: address of 348 omp_depend_t objects. */ 349 350void 351GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *), 352 long arg_size, long arg_align, bool if_clause, unsigned flags, 353 void **depend, int priority_arg, void *detach) 354{ 355 struct gomp_thread *thr = gomp_thread (); 356 struct gomp_team *team = thr->ts.team; 357 int priority = 0; 358 359#ifdef HAVE_BROKEN_POSIX_SEMAPHORES 360 /* If pthread_mutex_* is used for omp_*lock*, then each task must be 361 tied to one thread all the time. This means UNTIED tasks must be 362 tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN 363 might be running on different thread than FN. */ 364 if (cpyfn) 365 if_clause = false; 366 flags &= ~GOMP_TASK_FLAG_UNTIED; 367#endif 368 369 /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 370 if (__builtin_expect (gomp_cancel_var, 0) && team) 371 { 372 if (gomp_team_barrier_cancelled (&team->barrier)) 373 return; 374 if (thr->task->taskgroup) 375 { 376 if (thr->task->taskgroup->cancelled) 377 return; 378 if (thr->task->taskgroup->workshare 379 && thr->task->taskgroup->prev 380 && thr->task->taskgroup->prev->cancelled) 381 return; 382 } 383 } 384 385 if (__builtin_expect ((flags & GOMP_TASK_FLAG_PRIORITY) != 0, 0)) 386 { 387 priority = priority_arg; 388 if (priority > gomp_max_task_priority_var) 389 priority = gomp_max_task_priority_var; 390 } 391 392 if (!if_clause || team == NULL 393 || (thr->task && thr->task->final_task) 394 || team->task_count > 64 * team->nthreads) 395 { 396 struct gomp_task task; 397 gomp_sem_t completion_sem; 398 399 /* If there are depend clauses and earlier deferred sibling tasks 400 with depend clauses, check if there isn't a dependency. If there 401 is, we need to wait for them. There is no need to handle 402 depend clauses for non-deferred tasks other than this, because 403 the parent task is suspended until the child task finishes and thus 404 it can't start further child tasks. */ 405 if ((flags & GOMP_TASK_FLAG_DEPEND) 406 && thr->task && thr->task->depend_hash) 407 gomp_task_maybe_wait_for_dependencies (depend); 408 409 gomp_init_task (&task, thr->task, gomp_icv (false)); 410 task.kind = GOMP_TASK_UNDEFERRED; 411 task.final_task = (thr->task && thr->task->final_task) 412 || (flags & GOMP_TASK_FLAG_FINAL); 413 task.priority = priority; 414 415 if ((flags & GOMP_TASK_FLAG_DETACH) != 0) 416 { 417 gomp_sem_init (&completion_sem, 0); 418 task.completion_sem = &completion_sem; 419 *(void **) detach = &task; 420 if (data) 421 *(void **) data = &task; 422 423 gomp_debug (0, "Thread %d: new event: %p\n", 424 thr->ts.team_id, &task); 425 } 426 427 if (thr->task) 428 { 429 task.in_tied_task = thr->task->in_tied_task; 430 task.taskgroup = thr->task->taskgroup; 431 } 432 thr->task = &task; 433 if (__builtin_expect (cpyfn != NULL, 0)) 434 { 435 char buf[arg_size + arg_align - 1]; 436 char *arg = (char *) (((uintptr_t) buf + arg_align - 1) 437 & ~(uintptr_t) (arg_align - 1)); 438 cpyfn (arg, data); 439 fn (arg); 440 } 441 else 442 fn (data); 443 444 if ((flags & GOMP_TASK_FLAG_DETACH) != 0) 445 { 446 gomp_sem_wait (&completion_sem); 447 gomp_sem_destroy (&completion_sem); 448 } 449 450 /* Access to "children" is normally done inside a task_lock 451 mutex region, but the only way this particular task.children 452 can be set is if this thread's task work function (fn) 453 creates children. So since the setter is *this* thread, we 454 need no barriers here when testing for non-NULL. We can have 455 task.children set by the current thread then changed by a 456 child thread, but seeing a stale non-NULL value is not a 457 problem. Once past the task_lock acquisition, this thread 458 will see the real value of task.children. */ 459 if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED)) 460 { 461 gomp_mutex_lock (&team->task_lock); 462 gomp_clear_parent (&task.children_queue); 463 gomp_mutex_unlock (&team->task_lock); 464 } 465 gomp_end_task (); 466 } 467 else 468 { 469 struct gomp_task *task; 470 struct gomp_task *parent = thr->task; 471 struct gomp_taskgroup *taskgroup = parent->taskgroup; 472 char *arg; 473 bool do_wake; 474 size_t depend_size = 0; 475 476 if (flags & GOMP_TASK_FLAG_DEPEND) 477 depend_size = ((uintptr_t) (depend[0] ? depend[0] : depend[1]) 478 * sizeof (struct gomp_task_depend_entry)); 479 task = gomp_malloc (sizeof (*task) + depend_size 480 + arg_size + arg_align - 1); 481 arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1) 482 & ~(uintptr_t) (arg_align - 1)); 483 gomp_init_task (task, parent, gomp_icv (false)); 484 task->priority = priority; 485 task->kind = GOMP_TASK_UNDEFERRED; 486 task->in_tied_task = parent->in_tied_task; 487 task->taskgroup = taskgroup; 488 task->deferred_p = true; 489 if ((flags & GOMP_TASK_FLAG_DETACH) != 0) 490 { 491 task->detach_team = team; 492 493 *(void **) detach = task; 494 if (data) 495 *(void **) data = task; 496 497 gomp_debug (0, "Thread %d: new event: %p\n", thr->ts.team_id, task); 498 } 499 thr->task = task; 500 if (cpyfn) 501 { 502 cpyfn (arg, data); 503 task->copy_ctors_done = true; 504 } 505 else 506 memcpy (arg, data, arg_size); 507 thr->task = parent; 508 task->kind = GOMP_TASK_WAITING; 509 task->fn = fn; 510 task->fn_data = arg; 511 task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1; 512 gomp_mutex_lock (&team->task_lock); 513 /* If parallel or taskgroup has been cancelled, don't start new 514 tasks. */ 515 if (__builtin_expect (gomp_cancel_var, 0) 516 && !task->copy_ctors_done) 517 { 518 if (gomp_team_barrier_cancelled (&team->barrier)) 519 { 520 do_cancel: 521 gomp_mutex_unlock (&team->task_lock); 522 gomp_finish_task (task); 523 free (task); 524 return; 525 } 526 if (taskgroup) 527 { 528 if (taskgroup->cancelled) 529 goto do_cancel; 530 if (taskgroup->workshare 531 && taskgroup->prev 532 && taskgroup->prev->cancelled) 533 goto do_cancel; 534 } 535 } 536 if (taskgroup) 537 taskgroup->num_children++; 538 if (depend_size) 539 { 540 gomp_task_handle_depend (task, parent, depend); 541 if (task->num_dependees) 542 { 543 /* Tasks that depend on other tasks are not put into the 544 various waiting queues, so we are done for now. Said 545 tasks are instead put into the queues via 546 gomp_task_run_post_handle_dependers() after their 547 dependencies have been satisfied. After which, they 548 can be picked up by the various scheduling 549 points. */ 550 gomp_mutex_unlock (&team->task_lock); 551 return; 552 } 553 } 554 555 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, 556 task, priority, 557 PRIORITY_INSERT_BEGIN, 558 /*adjust_parent_depends_on=*/false, 559 task->parent_depends_on); 560 if (taskgroup) 561 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, 562 task, priority, 563 PRIORITY_INSERT_BEGIN, 564 /*adjust_parent_depends_on=*/false, 565 task->parent_depends_on); 566 567 priority_queue_insert (PQ_TEAM, &team->task_queue, 568 task, priority, 569 PRIORITY_INSERT_END, 570 /*adjust_parent_depends_on=*/false, 571 task->parent_depends_on); 572 573 ++team->task_count; 574 ++team->task_queued_count; 575 gomp_team_barrier_set_task_pending (&team->barrier); 576 do_wake = team->task_running_count + !parent->in_tied_task 577 < team->nthreads; 578 gomp_mutex_unlock (&team->task_lock); 579 if (do_wake) 580 gomp_team_barrier_wake (&team->barrier, 1); 581 } 582} 583 584ialias (GOMP_taskgroup_start) 585ialias (GOMP_taskgroup_end) 586ialias (GOMP_taskgroup_reduction_register) 587 588#define TYPE long 589#define UTYPE unsigned long 590#define TYPE_is_long 1 591#include "taskloop.c" 592#undef TYPE 593#undef UTYPE 594#undef TYPE_is_long 595 596#define TYPE unsigned long long 597#define UTYPE TYPE 598#define GOMP_taskloop GOMP_taskloop_ull 599#include "taskloop.c" 600#undef TYPE 601#undef UTYPE 602#undef GOMP_taskloop 603 604static void inline 605priority_queue_move_task_first (enum priority_queue_type type, 606 struct priority_queue *head, 607 struct gomp_task *task) 608{ 609#if _LIBGOMP_CHECKING_ 610 if (!priority_queue_task_in_queue_p (type, head, task)) 611 gomp_fatal ("Attempt to move first missing task %p", task); 612#endif 613 struct priority_list *list; 614 if (priority_queue_multi_p (head)) 615 { 616 list = priority_queue_lookup_priority (head, task->priority); 617#if _LIBGOMP_CHECKING_ 618 if (!list) 619 gomp_fatal ("Unable to find priority %d", task->priority); 620#endif 621 } 622 else 623 list = &head->l; 624 priority_list_remove (list, task_to_priority_node (type, task), 0); 625 priority_list_insert (type, list, task, task->priority, 626 PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN, 627 task->parent_depends_on); 628} 629 630/* Actual body of GOMP_PLUGIN_target_task_completion that is executed 631 with team->task_lock held, or is executed in the thread that called 632 gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been 633 run before it acquires team->task_lock. */ 634 635static void 636gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task) 637{ 638 struct gomp_task *parent = task->parent; 639 if (parent) 640 priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue, 641 task); 642 643 struct gomp_taskgroup *taskgroup = task->taskgroup; 644 if (taskgroup) 645 priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue, 646 task); 647 648 priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority, 649 PRIORITY_INSERT_BEGIN, false, 650 task->parent_depends_on); 651 task->kind = GOMP_TASK_WAITING; 652 if (parent && parent->taskwait) 653 { 654 if (parent->taskwait->in_taskwait) 655 { 656 /* One more task has had its dependencies met. 657 Inform any waiters. */ 658 parent->taskwait->in_taskwait = false; 659 gomp_sem_post (&parent->taskwait->taskwait_sem); 660 } 661 else if (parent->taskwait->in_depend_wait) 662 { 663 /* One more task has had its dependencies met. 664 Inform any waiters. */ 665 parent->taskwait->in_depend_wait = false; 666 gomp_sem_post (&parent->taskwait->taskwait_sem); 667 } 668 } 669 if (taskgroup && taskgroup->in_taskgroup_wait) 670 { 671 /* One more task has had its dependencies met. 672 Inform any waiters. */ 673 taskgroup->in_taskgroup_wait = false; 674 gomp_sem_post (&taskgroup->taskgroup_sem); 675 } 676 677 ++team->task_queued_count; 678 gomp_team_barrier_set_task_pending (&team->barrier); 679 /* I'm afraid this can't be done after releasing team->task_lock, 680 as gomp_target_task_completion is run from unrelated thread and 681 therefore in between gomp_mutex_unlock and gomp_team_barrier_wake 682 the team could be gone already. */ 683 if (team->nthreads > team->task_running_count) 684 gomp_team_barrier_wake (&team->barrier, 1); 685} 686 687/* Signal that a target task TTASK has completed the asynchronously 688 running phase and should be requeued as a task to handle the 689 variable unmapping. */ 690 691void 692GOMP_PLUGIN_target_task_completion (void *data) 693{ 694 struct gomp_target_task *ttask = (struct gomp_target_task *) data; 695 struct gomp_task *task = ttask->task; 696 struct gomp_team *team = ttask->team; 697 698 gomp_mutex_lock (&team->task_lock); 699 if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN) 700 { 701 ttask->state = GOMP_TARGET_TASK_FINISHED; 702 gomp_mutex_unlock (&team->task_lock); 703 return; 704 } 705 ttask->state = GOMP_TARGET_TASK_FINISHED; 706 gomp_target_task_completion (team, task); 707 gomp_mutex_unlock (&team->task_lock); 708} 709 710static void gomp_task_run_post_handle_depend_hash (struct gomp_task *); 711 712/* Called for nowait target tasks. */ 713 714bool 715gomp_create_target_task (struct gomp_device_descr *devicep, 716 void (*fn) (void *), size_t mapnum, void **hostaddrs, 717 size_t *sizes, unsigned short *kinds, 718 unsigned int flags, void **depend, void **args, 719 enum gomp_target_task_state state) 720{ 721 struct gomp_thread *thr = gomp_thread (); 722 struct gomp_team *team = thr->ts.team; 723 724 /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 725 if (__builtin_expect (gomp_cancel_var, 0) && team) 726 { 727 if (gomp_team_barrier_cancelled (&team->barrier)) 728 return true; 729 if (thr->task->taskgroup) 730 { 731 if (thr->task->taskgroup->cancelled) 732 return true; 733 if (thr->task->taskgroup->workshare 734 && thr->task->taskgroup->prev 735 && thr->task->taskgroup->prev->cancelled) 736 return true; 737 } 738 } 739 740 struct gomp_target_task *ttask; 741 struct gomp_task *task; 742 struct gomp_task *parent = thr->task; 743 struct gomp_taskgroup *taskgroup = parent->taskgroup; 744 bool do_wake; 745 size_t depend_size = 0; 746 uintptr_t depend_cnt = 0; 747 size_t tgt_align = 0, tgt_size = 0; 748 uintptr_t args_cnt = 0; 749 750 if (depend != NULL) 751 { 752 depend_cnt = (uintptr_t) (depend[0] ? depend[0] : depend[1]); 753 depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry); 754 } 755 if (fn) 756 { 757 /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are 758 firstprivate on the target task. */ 759 size_t i; 760 for (i = 0; i < mapnum; i++) 761 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) 762 { 763 size_t align = (size_t) 1 << (kinds[i] >> 8); 764 if (tgt_align < align) 765 tgt_align = align; 766 tgt_size = (tgt_size + align - 1) & ~(align - 1); 767 tgt_size += sizes[i]; 768 } 769 if (tgt_align) 770 tgt_size += tgt_align - 1; 771 else 772 tgt_size = 0; 773 if (args) 774 { 775 void **cargs = args; 776 while (*cargs) 777 { 778 intptr_t id = (intptr_t) *cargs++; 779 if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) 780 cargs++; 781 } 782 args_cnt = cargs + 1 - args; 783 } 784 } 785 786 task = gomp_malloc (sizeof (*task) + depend_size 787 + sizeof (*ttask) 788 + args_cnt * sizeof (void *) 789 + mapnum * (sizeof (void *) + sizeof (size_t) 790 + sizeof (unsigned short)) 791 + tgt_size); 792 gomp_init_task (task, parent, gomp_icv (false)); 793 task->priority = 0; 794 task->kind = GOMP_TASK_WAITING; 795 task->in_tied_task = parent->in_tied_task; 796 task->taskgroup = taskgroup; 797 ttask = (struct gomp_target_task *) &task->depend[depend_cnt]; 798 ttask->devicep = devicep; 799 ttask->fn = fn; 800 ttask->mapnum = mapnum; 801 memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *)); 802 if (args_cnt) 803 { 804 ttask->args = (void **) &ttask->hostaddrs[mapnum]; 805 memcpy (ttask->args, args, args_cnt * sizeof (void *)); 806 ttask->sizes = (size_t *) &ttask->args[args_cnt]; 807 } 808 else 809 { 810 ttask->args = args; 811 ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; 812 } 813 memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t)); 814 ttask->kinds = (unsigned short *) &ttask->sizes[mapnum]; 815 memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short)); 816 if (tgt_align) 817 { 818 char *tgt = (char *) &ttask->kinds[mapnum]; 819 size_t i; 820 uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); 821 if (al) 822 tgt += tgt_align - al; 823 tgt_size = 0; 824 for (i = 0; i < mapnum; i++) 825 if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) 826 { 827 size_t align = (size_t) 1 << (kinds[i] >> 8); 828 tgt_size = (tgt_size + align - 1) & ~(align - 1); 829 memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); 830 ttask->hostaddrs[i] = tgt + tgt_size; 831 tgt_size = tgt_size + sizes[i]; 832 } 833 } 834 ttask->flags = flags; 835 ttask->state = state; 836 ttask->task = task; 837 ttask->team = team; 838 task->fn = NULL; 839 task->fn_data = ttask; 840 task->final_task = 0; 841 gomp_mutex_lock (&team->task_lock); 842 /* If parallel or taskgroup has been cancelled, don't start new tasks. */ 843 if (__builtin_expect (gomp_cancel_var, 0)) 844 { 845 if (gomp_team_barrier_cancelled (&team->barrier)) 846 { 847 do_cancel: 848 gomp_mutex_unlock (&team->task_lock); 849 gomp_finish_task (task); 850 free (task); 851 return true; 852 } 853 if (taskgroup) 854 { 855 if (taskgroup->cancelled) 856 goto do_cancel; 857 if (taskgroup->workshare 858 && taskgroup->prev 859 && taskgroup->prev->cancelled) 860 goto do_cancel; 861 } 862 } 863 if (depend_size) 864 { 865 gomp_task_handle_depend (task, parent, depend); 866 if (task->num_dependees) 867 { 868 if (taskgroup) 869 taskgroup->num_children++; 870 gomp_mutex_unlock (&team->task_lock); 871 return true; 872 } 873 } 874 if (state == GOMP_TARGET_TASK_DATA) 875 { 876 gomp_task_run_post_handle_depend_hash (task); 877 gomp_mutex_unlock (&team->task_lock); 878 gomp_finish_task (task); 879 free (task); 880 return false; 881 } 882 if (taskgroup) 883 taskgroup->num_children++; 884 /* For async offloading, if we don't need to wait for dependencies, 885 run the gomp_target_task_fn right away, essentially schedule the 886 mapping part of the task in the current thread. */ 887 if (devicep != NULL 888 && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) 889 { 890 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, 891 PRIORITY_INSERT_END, 892 /*adjust_parent_depends_on=*/false, 893 task->parent_depends_on); 894 if (taskgroup) 895 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, 896 task, 0, PRIORITY_INSERT_END, 897 /*adjust_parent_depends_on=*/false, 898 task->parent_depends_on); 899 task->pnode[PQ_TEAM].next = NULL; 900 task->pnode[PQ_TEAM].prev = NULL; 901 task->kind = GOMP_TASK_TIED; 902 ++team->task_count; 903 gomp_mutex_unlock (&team->task_lock); 904 905 thr->task = task; 906 gomp_target_task_fn (task->fn_data); 907 thr->task = parent; 908 909 gomp_mutex_lock (&team->task_lock); 910 task->kind = GOMP_TASK_ASYNC_RUNNING; 911 /* If GOMP_PLUGIN_target_task_completion has run already 912 in between gomp_target_task_fn and the mutex lock, 913 perform the requeuing here. */ 914 if (ttask->state == GOMP_TARGET_TASK_FINISHED) 915 gomp_target_task_completion (team, task); 916 else 917 ttask->state = GOMP_TARGET_TASK_RUNNING; 918 gomp_mutex_unlock (&team->task_lock); 919 return true; 920 } 921 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0, 922 PRIORITY_INSERT_BEGIN, 923 /*adjust_parent_depends_on=*/false, 924 task->parent_depends_on); 925 if (taskgroup) 926 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0, 927 PRIORITY_INSERT_BEGIN, 928 /*adjust_parent_depends_on=*/false, 929 task->parent_depends_on); 930 priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0, 931 PRIORITY_INSERT_END, 932 /*adjust_parent_depends_on=*/false, 933 task->parent_depends_on); 934 ++team->task_count; 935 ++team->task_queued_count; 936 gomp_team_barrier_set_task_pending (&team->barrier); 937 do_wake = team->task_running_count + !parent->in_tied_task 938 < team->nthreads; 939 gomp_mutex_unlock (&team->task_lock); 940 if (do_wake) 941 gomp_team_barrier_wake (&team->barrier, 1); 942 return true; 943} 944 945/* Given a parent_depends_on task in LIST, move it to the front of its 946 priority so it is run as soon as possible. 947 948 Care is taken to update the list's LAST_PARENT_DEPENDS_ON field. 949 950 We rearrange the queue such that all parent_depends_on tasks are 951 first, and last_parent_depends_on points to the last such task we 952 rearranged. For example, given the following tasks in a queue 953 where PD[123] are the parent_depends_on tasks: 954 955 task->children 956 | 957 V 958 C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4 959 960 We rearrange such that: 961 962 task->children 963 | +--- last_parent_depends_on 964 | | 965 V V 966 PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4. */ 967 968static void inline 969priority_list_upgrade_task (struct priority_list *list, 970 struct priority_node *node) 971{ 972 struct priority_node *last_parent_depends_on 973 = list->last_parent_depends_on; 974 if (last_parent_depends_on) 975 { 976 node->prev->next = node->next; 977 node->next->prev = node->prev; 978 node->prev = last_parent_depends_on; 979 node->next = last_parent_depends_on->next; 980 node->prev->next = node; 981 node->next->prev = node; 982 } 983 else if (node != list->tasks) 984 { 985 node->prev->next = node->next; 986 node->next->prev = node->prev; 987 node->prev = list->tasks->prev; 988 node->next = list->tasks; 989 list->tasks = node; 990 node->prev->next = node; 991 node->next->prev = node; 992 } 993 list->last_parent_depends_on = node; 994} 995 996/* Given a parent_depends_on TASK in its parent's children_queue, move 997 it to the front of its priority so it is run as soon as possible. 998 999 PARENT is passed as an optimization. 1000 1001 (This function could be defined in priority_queue.c, but we want it 1002 inlined, and putting it in priority_queue.h is not an option, given 1003 that gomp_task has not been properly defined at that point). */ 1004 1005static void inline 1006priority_queue_upgrade_task (struct gomp_task *task, 1007 struct gomp_task *parent) 1008{ 1009 struct priority_queue *head = &parent->children_queue; 1010 struct priority_node *node = &task->pnode[PQ_CHILDREN]; 1011#if _LIBGOMP_CHECKING_ 1012 if (!task->parent_depends_on) 1013 gomp_fatal ("priority_queue_upgrade_task: task must be a " 1014 "parent_depends_on task"); 1015 if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task)) 1016 gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task); 1017#endif 1018 if (priority_queue_multi_p (head)) 1019 { 1020 struct priority_list *list 1021 = priority_queue_lookup_priority (head, task->priority); 1022 priority_list_upgrade_task (list, node); 1023 } 1024 else 1025 priority_list_upgrade_task (&head->l, node); 1026} 1027 1028/* Given a CHILD_TASK in LIST that is about to be executed, move it out of 1029 the way in LIST so that other tasks can be considered for 1030 execution. LIST contains tasks of type TYPE. 1031 1032 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field 1033 if applicable. */ 1034 1035static void inline 1036priority_list_downgrade_task (enum priority_queue_type type, 1037 struct priority_list *list, 1038 struct gomp_task *child_task) 1039{ 1040 struct priority_node *node = task_to_priority_node (type, child_task); 1041 if (list->tasks == node) 1042 list->tasks = node->next; 1043 else if (node->next != list->tasks) 1044 { 1045 /* The task in NODE is about to become TIED and TIED tasks 1046 cannot come before WAITING tasks. If we're about to 1047 leave the queue in such an indeterminate state, rewire 1048 things appropriately. However, a TIED task at the end is 1049 perfectly fine. */ 1050 struct gomp_task *next_task = priority_node_to_task (type, node->next); 1051 if (next_task->kind == GOMP_TASK_WAITING) 1052 { 1053 /* Remove from list. */ 1054 node->prev->next = node->next; 1055 node->next->prev = node->prev; 1056 /* Rewire at the end. */ 1057 node->next = list->tasks; 1058 node->prev = list->tasks->prev; 1059 list->tasks->prev->next = node; 1060 list->tasks->prev = node; 1061 } 1062 } 1063 1064 /* If the current task is the last_parent_depends_on for its 1065 priority, adjust last_parent_depends_on appropriately. */ 1066 if (__builtin_expect (child_task->parent_depends_on, 0) 1067 && list->last_parent_depends_on == node) 1068 { 1069 struct gomp_task *prev_child = priority_node_to_task (type, node->prev); 1070 if (node->prev != node 1071 && prev_child->kind == GOMP_TASK_WAITING 1072 && prev_child->parent_depends_on) 1073 list->last_parent_depends_on = node->prev; 1074 else 1075 { 1076 /* There are no more parent_depends_on entries waiting 1077 to run, clear the list. */ 1078 list->last_parent_depends_on = NULL; 1079 } 1080 } 1081} 1082 1083/* Given a TASK in HEAD that is about to be executed, move it out of 1084 the way so that other tasks can be considered for execution. HEAD 1085 contains tasks of type TYPE. 1086 1087 Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field 1088 if applicable. 1089 1090 (This function could be defined in priority_queue.c, but we want it 1091 inlined, and putting it in priority_queue.h is not an option, given 1092 that gomp_task has not been properly defined at that point). */ 1093 1094static void inline 1095priority_queue_downgrade_task (enum priority_queue_type type, 1096 struct priority_queue *head, 1097 struct gomp_task *task) 1098{ 1099#if _LIBGOMP_CHECKING_ 1100 if (!priority_queue_task_in_queue_p (type, head, task)) 1101 gomp_fatal ("Attempt to downgrade missing task %p", task); 1102#endif 1103 if (priority_queue_multi_p (head)) 1104 { 1105 struct priority_list *list 1106 = priority_queue_lookup_priority (head, task->priority); 1107 priority_list_downgrade_task (type, list, task); 1108 } 1109 else 1110 priority_list_downgrade_task (type, &head->l, task); 1111} 1112 1113/* Setup CHILD_TASK to execute. This is done by setting the task to 1114 TIED, and updating all relevant queues so that CHILD_TASK is no 1115 longer chosen for scheduling. Also, remove CHILD_TASK from the 1116 overall team task queue entirely. 1117 1118 Return TRUE if task or its containing taskgroup has been 1119 cancelled. */ 1120 1121static inline bool 1122gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent, 1123 struct gomp_team *team) 1124{ 1125#if _LIBGOMP_CHECKING_ 1126 if (child_task->parent) 1127 priority_queue_verify (PQ_CHILDREN, 1128 &child_task->parent->children_queue, true); 1129 if (child_task->taskgroup) 1130 priority_queue_verify (PQ_TASKGROUP, 1131 &child_task->taskgroup->taskgroup_queue, false); 1132 priority_queue_verify (PQ_TEAM, &team->task_queue, false); 1133#endif 1134 1135 /* Task is about to go tied, move it out of the way. */ 1136 if (parent) 1137 priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue, 1138 child_task); 1139 1140 /* Task is about to go tied, move it out of the way. */ 1141 struct gomp_taskgroup *taskgroup = child_task->taskgroup; 1142 if (taskgroup) 1143 priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, 1144 child_task); 1145 1146 priority_queue_remove (PQ_TEAM, &team->task_queue, child_task, 1147 MEMMODEL_RELAXED); 1148 child_task->pnode[PQ_TEAM].next = NULL; 1149 child_task->pnode[PQ_TEAM].prev = NULL; 1150 child_task->kind = GOMP_TASK_TIED; 1151 1152 if (--team->task_queued_count == 0) 1153 gomp_team_barrier_clear_task_pending (&team->barrier); 1154 if (__builtin_expect (gomp_cancel_var, 0) 1155 && !child_task->copy_ctors_done) 1156 { 1157 if (gomp_team_barrier_cancelled (&team->barrier)) 1158 return true; 1159 if (taskgroup) 1160 { 1161 if (taskgroup->cancelled) 1162 return true; 1163 if (taskgroup->workshare 1164 && taskgroup->prev 1165 && taskgroup->prev->cancelled) 1166 return true; 1167 } 1168 } 1169 return false; 1170} 1171 1172static void 1173gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task) 1174{ 1175 struct gomp_task *parent = child_task->parent; 1176 size_t i; 1177 1178 for (i = 0; i < child_task->depend_count; i++) 1179 if (!child_task->depend[i].redundant) 1180 { 1181 if (child_task->depend[i].next) 1182 child_task->depend[i].next->prev = child_task->depend[i].prev; 1183 if (child_task->depend[i].prev) 1184 child_task->depend[i].prev->next = child_task->depend[i].next; 1185 else 1186 { 1187 hash_entry_type *slot 1188 = htab_find_slot (&parent->depend_hash, &child_task->depend[i], 1189 NO_INSERT); 1190 if (*slot != &child_task->depend[i]) 1191 abort (); 1192 if (child_task->depend[i].next) 1193 *slot = child_task->depend[i].next; 1194 else 1195 htab_clear_slot (parent->depend_hash, slot); 1196 } 1197 } 1198} 1199 1200/* After a CHILD_TASK has been run, adjust the dependency queue for 1201 each task that depends on CHILD_TASK, to record the fact that there 1202 is one less dependency to worry about. If a task that depended on 1203 CHILD_TASK now has no dependencies, place it in the various queues 1204 so it gets scheduled to run. 1205 1206 TEAM is the team to which CHILD_TASK belongs to. */ 1207 1208static size_t 1209gomp_task_run_post_handle_dependers (struct gomp_task *child_task, 1210 struct gomp_team *team) 1211{ 1212 struct gomp_task *parent = child_task->parent; 1213 size_t i, count = child_task->dependers->n_elem, ret = 0; 1214 for (i = 0; i < count; i++) 1215 { 1216 struct gomp_task *task = child_task->dependers->elem[i]; 1217 1218 /* CHILD_TASK satisfies a dependency for TASK. Keep track of 1219 TASK's remaining dependencies. Once TASK has no other 1220 dependencies, put it into the various queues so it will get 1221 scheduled for execution. */ 1222 if (--task->num_dependees != 0) 1223 continue; 1224 1225 struct gomp_taskgroup *taskgroup = task->taskgroup; 1226 if (parent) 1227 { 1228 priority_queue_insert (PQ_CHILDREN, &parent->children_queue, 1229 task, task->priority, 1230 PRIORITY_INSERT_BEGIN, 1231 /*adjust_parent_depends_on=*/true, 1232 task->parent_depends_on); 1233 if (parent->taskwait) 1234 { 1235 if (parent->taskwait->in_taskwait) 1236 { 1237 /* One more task has had its dependencies met. 1238 Inform any waiters. */ 1239 parent->taskwait->in_taskwait = false; 1240 gomp_sem_post (&parent->taskwait->taskwait_sem); 1241 } 1242 else if (parent->taskwait->in_depend_wait) 1243 { 1244 /* One more task has had its dependencies met. 1245 Inform any waiters. */ 1246 parent->taskwait->in_depend_wait = false; 1247 gomp_sem_post (&parent->taskwait->taskwait_sem); 1248 } 1249 } 1250 } 1251 else 1252 task->parent = NULL; 1253 if (taskgroup) 1254 { 1255 priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, 1256 task, task->priority, 1257 PRIORITY_INSERT_BEGIN, 1258 /*adjust_parent_depends_on=*/false, 1259 task->parent_depends_on); 1260 if (taskgroup->in_taskgroup_wait) 1261 { 1262 /* One more task has had its dependencies met. 1263 Inform any waiters. */ 1264 taskgroup->in_taskgroup_wait = false; 1265 gomp_sem_post (&taskgroup->taskgroup_sem); 1266 } 1267 } 1268 priority_queue_insert (PQ_TEAM, &team->task_queue, 1269 task, task->priority, 1270 PRIORITY_INSERT_END, 1271 /*adjust_parent_depends_on=*/false, 1272 task->parent_depends_on); 1273 ++team->task_count; 1274 ++team->task_queued_count; 1275 ++ret; 1276 } 1277 free (child_task->dependers); 1278 child_task->dependers = NULL; 1279 if (ret > 1) 1280 gomp_team_barrier_set_task_pending (&team->barrier); 1281 return ret; 1282} 1283 1284static inline size_t 1285gomp_task_run_post_handle_depend (struct gomp_task *child_task, 1286 struct gomp_team *team) 1287{ 1288 if (child_task->depend_count == 0) 1289 return 0; 1290 1291 /* If parent is gone already, the hash table is freed and nothing 1292 will use the hash table anymore, no need to remove anything from it. */ 1293 if (child_task->parent != NULL) 1294 gomp_task_run_post_handle_depend_hash (child_task); 1295 1296 if (child_task->dependers == NULL) 1297 return 0; 1298 1299 return gomp_task_run_post_handle_dependers (child_task, team); 1300} 1301 1302/* Remove CHILD_TASK from its parent. */ 1303 1304static inline void 1305gomp_task_run_post_remove_parent (struct gomp_task *child_task) 1306{ 1307 struct gomp_task *parent = child_task->parent; 1308 if (parent == NULL) 1309 return; 1310 1311 /* If this was the last task the parent was depending on, 1312 synchronize with gomp_task_maybe_wait_for_dependencies so it can 1313 clean up and return. */ 1314 if (__builtin_expect (child_task->parent_depends_on, 0) 1315 && --parent->taskwait->n_depend == 0 1316 && parent->taskwait->in_depend_wait) 1317 { 1318 parent->taskwait->in_depend_wait = false; 1319 gomp_sem_post (&parent->taskwait->taskwait_sem); 1320 } 1321 1322 if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue, 1323 child_task, MEMMODEL_RELEASE) 1324 && parent->taskwait && parent->taskwait->in_taskwait) 1325 { 1326 parent->taskwait->in_taskwait = false; 1327 gomp_sem_post (&parent->taskwait->taskwait_sem); 1328 } 1329 child_task->pnode[PQ_CHILDREN].next = NULL; 1330 child_task->pnode[PQ_CHILDREN].prev = NULL; 1331} 1332 1333/* Remove CHILD_TASK from its taskgroup. */ 1334 1335static inline void 1336gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task) 1337{ 1338 struct gomp_taskgroup *taskgroup = child_task->taskgroup; 1339 if (taskgroup == NULL) 1340 return; 1341 bool empty = priority_queue_remove (PQ_TASKGROUP, 1342 &taskgroup->taskgroup_queue, 1343 child_task, MEMMODEL_RELAXED); 1344 child_task->pnode[PQ_TASKGROUP].next = NULL; 1345 child_task->pnode[PQ_TASKGROUP].prev = NULL; 1346 if (taskgroup->num_children > 1) 1347 --taskgroup->num_children; 1348 else 1349 { 1350 /* We access taskgroup->num_children in GOMP_taskgroup_end 1351 outside of the task lock mutex region, so 1352 need a release barrier here to ensure memory 1353 written by child_task->fn above is flushed 1354 before the NULL is written. */ 1355 __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE); 1356 } 1357 if (empty && taskgroup->in_taskgroup_wait) 1358 { 1359 taskgroup->in_taskgroup_wait = false; 1360 gomp_sem_post (&taskgroup->taskgroup_sem); 1361 } 1362} 1363 1364void 1365gomp_barrier_handle_tasks (gomp_barrier_state_t state) 1366{ 1367 struct gomp_thread *thr = gomp_thread (); 1368 struct gomp_team *team = thr->ts.team; 1369 struct gomp_task *task = thr->task; 1370 struct gomp_task *child_task = NULL; 1371 struct gomp_task *to_free = NULL; 1372 int do_wake = 0; 1373 1374 gomp_mutex_lock (&team->task_lock); 1375 if (gomp_barrier_last_thread (state)) 1376 { 1377 if (team->task_count == 0) 1378 { 1379 gomp_team_barrier_done (&team->barrier, state); 1380 gomp_mutex_unlock (&team->task_lock); 1381 gomp_team_barrier_wake (&team->barrier, 0); 1382 return; 1383 } 1384 gomp_team_barrier_set_waiting_for_tasks (&team->barrier); 1385 } 1386 1387 while (1) 1388 { 1389 bool cancelled = false; 1390 1391 if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED)) 1392 { 1393 bool ignored; 1394 child_task 1395 = priority_queue_next_task (PQ_TEAM, &team->task_queue, 1396 PQ_IGNORED, NULL, 1397 &ignored); 1398 cancelled = gomp_task_run_pre (child_task, child_task->parent, 1399 team); 1400 if (__builtin_expect (cancelled, 0)) 1401 { 1402 if (to_free) 1403 { 1404 gomp_finish_task (to_free); 1405 free (to_free); 1406 to_free = NULL; 1407 } 1408 goto finish_cancelled; 1409 } 1410 team->task_running_count++; 1411 child_task->in_tied_task = true; 1412 } 1413 else if (team->task_count == 0 1414 && gomp_team_barrier_waiting_for_tasks (&team->barrier)) 1415 { 1416 gomp_team_barrier_done (&team->barrier, state); 1417 gomp_mutex_unlock (&team->task_lock); 1418 gomp_team_barrier_wake (&team->barrier, 0); 1419 if (to_free) 1420 { 1421 gomp_finish_task (to_free); 1422 free (to_free); 1423 } 1424 return; 1425 } 1426 gomp_mutex_unlock (&team->task_lock); 1427 if (do_wake) 1428 { 1429 gomp_team_barrier_wake (&team->barrier, do_wake); 1430 do_wake = 0; 1431 } 1432 if (to_free) 1433 { 1434 gomp_finish_task (to_free); 1435 free (to_free); 1436 to_free = NULL; 1437 } 1438 if (child_task) 1439 { 1440 thr->task = child_task; 1441 if (__builtin_expect (child_task->fn == NULL, 0)) 1442 { 1443 if (gomp_target_task_fn (child_task->fn_data)) 1444 { 1445 thr->task = task; 1446 gomp_mutex_lock (&team->task_lock); 1447 child_task->kind = GOMP_TASK_ASYNC_RUNNING; 1448 team->task_running_count--; 1449 struct gomp_target_task *ttask 1450 = (struct gomp_target_task *) child_task->fn_data; 1451 /* If GOMP_PLUGIN_target_task_completion has run already 1452 in between gomp_target_task_fn and the mutex lock, 1453 perform the requeuing here. */ 1454 if (ttask->state == GOMP_TARGET_TASK_FINISHED) 1455 gomp_target_task_completion (team, child_task); 1456 else 1457 ttask->state = GOMP_TARGET_TASK_RUNNING; 1458 child_task = NULL; 1459 continue; 1460 } 1461 } 1462 else 1463 child_task->fn (child_task->fn_data); 1464 thr->task = task; 1465 } 1466 else 1467 return; 1468 gomp_mutex_lock (&team->task_lock); 1469 if (child_task) 1470 { 1471 if (child_task->detach_team) 1472 { 1473 assert (child_task->detach_team == team); 1474 child_task->kind = GOMP_TASK_DETACHED; 1475 ++team->task_detach_count; 1476 --team->task_running_count; 1477 gomp_debug (0, 1478 "thread %d: task with event %p finished without " 1479 "completion event fulfilled in team barrier\n", 1480 thr->ts.team_id, child_task); 1481 child_task = NULL; 1482 continue; 1483 } 1484 1485 finish_cancelled:; 1486 size_t new_tasks 1487 = gomp_task_run_post_handle_depend (child_task, team); 1488 gomp_task_run_post_remove_parent (child_task); 1489 gomp_clear_parent (&child_task->children_queue); 1490 gomp_task_run_post_remove_taskgroup (child_task); 1491 to_free = child_task; 1492 if (!cancelled) 1493 team->task_running_count--; 1494 child_task = NULL; 1495 if (new_tasks > 1) 1496 { 1497 do_wake = team->nthreads - team->task_running_count; 1498 if (do_wake > new_tasks) 1499 do_wake = new_tasks; 1500 } 1501 --team->task_count; 1502 } 1503 } 1504} 1505 1506/* Called when encountering a taskwait directive. 1507 1508 Wait for all children of the current task. */ 1509 1510void 1511GOMP_taskwait (void) 1512{ 1513 struct gomp_thread *thr = gomp_thread (); 1514 struct gomp_team *team = thr->ts.team; 1515 struct gomp_task *task = thr->task; 1516 struct gomp_task *child_task = NULL; 1517 struct gomp_task *to_free = NULL; 1518 struct gomp_taskwait taskwait; 1519 int do_wake = 0; 1520 1521 /* The acquire barrier on load of task->children here synchronizes 1522 with the write of a NULL in gomp_task_run_post_remove_parent. It is 1523 not necessary that we synchronize with other non-NULL writes at 1524 this point, but we must ensure that all writes to memory by a 1525 child thread task work function are seen before we exit from 1526 GOMP_taskwait. */ 1527 if (task == NULL 1528 || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE)) 1529 return; 1530 1531 memset (&taskwait, 0, sizeof (taskwait)); 1532 bool child_q = false; 1533 gomp_mutex_lock (&team->task_lock); 1534 while (1) 1535 { 1536 bool cancelled = false; 1537 if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED)) 1538 { 1539 bool destroy_taskwait = task->taskwait != NULL; 1540 task->taskwait = NULL; 1541 gomp_mutex_unlock (&team->task_lock); 1542 if (to_free) 1543 { 1544 gomp_finish_task (to_free); 1545 free (to_free); 1546 } 1547 if (destroy_taskwait) 1548 gomp_sem_destroy (&taskwait.taskwait_sem); 1549 return; 1550 } 1551 struct gomp_task *next_task 1552 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, 1553 PQ_TEAM, &team->task_queue, &child_q); 1554 if (next_task->kind == GOMP_TASK_WAITING) 1555 { 1556 child_task = next_task; 1557 cancelled 1558 = gomp_task_run_pre (child_task, task, team); 1559 if (__builtin_expect (cancelled, 0)) 1560 { 1561 if (to_free) 1562 { 1563 gomp_finish_task (to_free); 1564 free (to_free); 1565 to_free = NULL; 1566 } 1567 goto finish_cancelled; 1568 } 1569 } 1570 else 1571 { 1572 /* All tasks we are waiting for are either running in other 1573 threads, are detached and waiting for the completion event to be 1574 fulfilled, or they are tasks that have not had their 1575 dependencies met (so they're not even in the queue). Wait 1576 for them. */ 1577 if (task->taskwait == NULL) 1578 { 1579 taskwait.in_depend_wait = false; 1580 gomp_sem_init (&taskwait.taskwait_sem, 0); 1581 task->taskwait = &taskwait; 1582 } 1583 taskwait.in_taskwait = true; 1584 } 1585 gomp_mutex_unlock (&team->task_lock); 1586 if (do_wake) 1587 { 1588 gomp_team_barrier_wake (&team->barrier, do_wake); 1589 do_wake = 0; 1590 } 1591 if (to_free) 1592 { 1593 gomp_finish_task (to_free); 1594 free (to_free); 1595 to_free = NULL; 1596 } 1597 if (child_task) 1598 { 1599 thr->task = child_task; 1600 if (__builtin_expect (child_task->fn == NULL, 0)) 1601 { 1602 if (gomp_target_task_fn (child_task->fn_data)) 1603 { 1604 thr->task = task; 1605 gomp_mutex_lock (&team->task_lock); 1606 child_task->kind = GOMP_TASK_ASYNC_RUNNING; 1607 struct gomp_target_task *ttask 1608 = (struct gomp_target_task *) child_task->fn_data; 1609 /* If GOMP_PLUGIN_target_task_completion has run already 1610 in between gomp_target_task_fn and the mutex lock, 1611 perform the requeuing here. */ 1612 if (ttask->state == GOMP_TARGET_TASK_FINISHED) 1613 gomp_target_task_completion (team, child_task); 1614 else 1615 ttask->state = GOMP_TARGET_TASK_RUNNING; 1616 child_task = NULL; 1617 continue; 1618 } 1619 } 1620 else 1621 child_task->fn (child_task->fn_data); 1622 thr->task = task; 1623 } 1624 else 1625 gomp_sem_wait (&taskwait.taskwait_sem); 1626 gomp_mutex_lock (&team->task_lock); 1627 if (child_task) 1628 { 1629 if (child_task->detach_team) 1630 { 1631 assert (child_task->detach_team == team); 1632 child_task->kind = GOMP_TASK_DETACHED; 1633 ++team->task_detach_count; 1634 gomp_debug (0, 1635 "thread %d: task with event %p finished without " 1636 "completion event fulfilled in taskwait\n", 1637 thr->ts.team_id, child_task); 1638 child_task = NULL; 1639 continue; 1640 } 1641 1642 finish_cancelled:; 1643 size_t new_tasks 1644 = gomp_task_run_post_handle_depend (child_task, team); 1645 1646 if (child_q) 1647 { 1648 priority_queue_remove (PQ_CHILDREN, &task->children_queue, 1649 child_task, MEMMODEL_RELAXED); 1650 child_task->pnode[PQ_CHILDREN].next = NULL; 1651 child_task->pnode[PQ_CHILDREN].prev = NULL; 1652 } 1653 1654 gomp_clear_parent (&child_task->children_queue); 1655 1656 gomp_task_run_post_remove_taskgroup (child_task); 1657 1658 to_free = child_task; 1659 child_task = NULL; 1660 team->task_count--; 1661 if (new_tasks > 1) 1662 { 1663 do_wake = team->nthreads - team->task_running_count 1664 - !task->in_tied_task; 1665 if (do_wake > new_tasks) 1666 do_wake = new_tasks; 1667 } 1668 } 1669 } 1670} 1671 1672/* Called when encountering a taskwait directive with depend clause(s). 1673 Wait as if it was an mergeable included task construct with empty body. */ 1674 1675void 1676GOMP_taskwait_depend (void **depend) 1677{ 1678 struct gomp_thread *thr = gomp_thread (); 1679 struct gomp_team *team = thr->ts.team; 1680 1681 /* If parallel or taskgroup has been cancelled, return early. */ 1682 if (__builtin_expect (gomp_cancel_var, 0) && team) 1683 { 1684 if (gomp_team_barrier_cancelled (&team->barrier)) 1685 return; 1686 if (thr->task->taskgroup) 1687 { 1688 if (thr->task->taskgroup->cancelled) 1689 return; 1690 if (thr->task->taskgroup->workshare 1691 && thr->task->taskgroup->prev 1692 && thr->task->taskgroup->prev->cancelled) 1693 return; 1694 } 1695 } 1696 1697 if (thr->task && thr->task->depend_hash) 1698 gomp_task_maybe_wait_for_dependencies (depend); 1699} 1700 1701/* An undeferred task is about to run. Wait for all tasks that this 1702 undeferred task depends on. 1703 1704 This is done by first putting all known ready dependencies 1705 (dependencies that have their own dependencies met) at the top of 1706 the scheduling queues. Then we iterate through these imminently 1707 ready tasks (and possibly other high priority tasks), and run them. 1708 If we run out of ready dependencies to execute, we either wait for 1709 the remaining dependencies to finish, or wait for them to get 1710 scheduled so we can run them. 1711 1712 DEPEND is as in GOMP_task. */ 1713 1714void 1715gomp_task_maybe_wait_for_dependencies (void **depend) 1716{ 1717 struct gomp_thread *thr = gomp_thread (); 1718 struct gomp_task *task = thr->task; 1719 struct gomp_team *team = thr->ts.team; 1720 struct gomp_task_depend_entry elem, *ent = NULL; 1721 struct gomp_taskwait taskwait; 1722 size_t orig_ndepend = (uintptr_t) depend[0]; 1723 size_t nout = (uintptr_t) depend[1]; 1724 size_t ndepend = orig_ndepend; 1725 size_t normal = ndepend; 1726 size_t n = 2; 1727 size_t i; 1728 size_t num_awaited = 0; 1729 struct gomp_task *child_task = NULL; 1730 struct gomp_task *to_free = NULL; 1731 int do_wake = 0; 1732 1733 if (ndepend == 0) 1734 { 1735 ndepend = nout; 1736 nout = (uintptr_t) depend[2] + (uintptr_t) depend[3]; 1737 normal = nout + (uintptr_t) depend[4]; 1738 n = 5; 1739 } 1740 gomp_mutex_lock (&team->task_lock); 1741 for (i = 0; i < ndepend; i++) 1742 { 1743 elem.addr = depend[i + n]; 1744 elem.is_in = i >= nout; 1745 if (__builtin_expect (i >= normal, 0)) 1746 { 1747 void **d = (void **) elem.addr; 1748 switch ((uintptr_t) d[1]) 1749 { 1750 case GOMP_DEPEND_IN: 1751 break; 1752 case GOMP_DEPEND_OUT: 1753 case GOMP_DEPEND_INOUT: 1754 case GOMP_DEPEND_MUTEXINOUTSET: 1755 elem.is_in = 0; 1756 break; 1757 default: 1758 gomp_fatal ("unknown omp_depend_t dependence type %d", 1759 (int) (uintptr_t) d[1]); 1760 } 1761 elem.addr = d[0]; 1762 } 1763 ent = htab_find (task->depend_hash, &elem); 1764 for (; ent; ent = ent->next) 1765 if (elem.is_in && ent->is_in) 1766 continue; 1767 else 1768 { 1769 struct gomp_task *tsk = ent->task; 1770 if (!tsk->parent_depends_on) 1771 { 1772 tsk->parent_depends_on = true; 1773 ++num_awaited; 1774 /* If dependency TSK itself has no dependencies and is 1775 ready to run, move it up front so that we run it as 1776 soon as possible. */ 1777 if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING) 1778 priority_queue_upgrade_task (tsk, task); 1779 } 1780 } 1781 } 1782 if (num_awaited == 0) 1783 { 1784 gomp_mutex_unlock (&team->task_lock); 1785 return; 1786 } 1787 1788 memset (&taskwait, 0, sizeof (taskwait)); 1789 taskwait.n_depend = num_awaited; 1790 gomp_sem_init (&taskwait.taskwait_sem, 0); 1791 task->taskwait = &taskwait; 1792 1793 while (1) 1794 { 1795 bool cancelled = false; 1796 if (taskwait.n_depend == 0) 1797 { 1798 task->taskwait = NULL; 1799 gomp_mutex_unlock (&team->task_lock); 1800 if (to_free) 1801 { 1802 gomp_finish_task (to_free); 1803 free (to_free); 1804 } 1805 gomp_sem_destroy (&taskwait.taskwait_sem); 1806 return; 1807 } 1808 1809 /* Theoretically when we have multiple priorities, we should 1810 chose between the highest priority item in 1811 task->children_queue and team->task_queue here, so we should 1812 use priority_queue_next_task(). However, since we are 1813 running an undeferred task, perhaps that makes all tasks it 1814 depends on undeferred, thus a priority of INF? This would 1815 make it unnecessary to take anything into account here, 1816 but the dependencies. 1817 1818 On the other hand, if we want to use priority_queue_next_task(), 1819 care should be taken to only use priority_queue_remove() 1820 below if the task was actually removed from the children 1821 queue. */ 1822 bool ignored; 1823 struct gomp_task *next_task 1824 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, 1825 PQ_IGNORED, NULL, &ignored); 1826 1827 if (next_task->kind == GOMP_TASK_WAITING) 1828 { 1829 child_task = next_task; 1830 cancelled 1831 = gomp_task_run_pre (child_task, task, team); 1832 if (__builtin_expect (cancelled, 0)) 1833 { 1834 if (to_free) 1835 { 1836 gomp_finish_task (to_free); 1837 free (to_free); 1838 to_free = NULL; 1839 } 1840 goto finish_cancelled; 1841 } 1842 } 1843 else 1844 /* All tasks we are waiting for are either running in other 1845 threads, or they are tasks that have not had their 1846 dependencies met (so they're not even in the queue). Wait 1847 for them. */ 1848 taskwait.in_depend_wait = true; 1849 gomp_mutex_unlock (&team->task_lock); 1850 if (do_wake) 1851 { 1852 gomp_team_barrier_wake (&team->barrier, do_wake); 1853 do_wake = 0; 1854 } 1855 if (to_free) 1856 { 1857 gomp_finish_task (to_free); 1858 free (to_free); 1859 to_free = NULL; 1860 } 1861 if (child_task) 1862 { 1863 thr->task = child_task; 1864 if (__builtin_expect (child_task->fn == NULL, 0)) 1865 { 1866 if (gomp_target_task_fn (child_task->fn_data)) 1867 { 1868 thr->task = task; 1869 gomp_mutex_lock (&team->task_lock); 1870 child_task->kind = GOMP_TASK_ASYNC_RUNNING; 1871 struct gomp_target_task *ttask 1872 = (struct gomp_target_task *) child_task->fn_data; 1873 /* If GOMP_PLUGIN_target_task_completion has run already 1874 in between gomp_target_task_fn and the mutex lock, 1875 perform the requeuing here. */ 1876 if (ttask->state == GOMP_TARGET_TASK_FINISHED) 1877 gomp_target_task_completion (team, child_task); 1878 else 1879 ttask->state = GOMP_TARGET_TASK_RUNNING; 1880 child_task = NULL; 1881 continue; 1882 } 1883 } 1884 else 1885 child_task->fn (child_task->fn_data); 1886 thr->task = task; 1887 } 1888 else 1889 gomp_sem_wait (&taskwait.taskwait_sem); 1890 gomp_mutex_lock (&team->task_lock); 1891 if (child_task) 1892 { 1893 finish_cancelled:; 1894 size_t new_tasks 1895 = gomp_task_run_post_handle_depend (child_task, team); 1896 if (child_task->parent_depends_on) 1897 --taskwait.n_depend; 1898 1899 priority_queue_remove (PQ_CHILDREN, &task->children_queue, 1900 child_task, MEMMODEL_RELAXED); 1901 child_task->pnode[PQ_CHILDREN].next = NULL; 1902 child_task->pnode[PQ_CHILDREN].prev = NULL; 1903 1904 gomp_clear_parent (&child_task->children_queue); 1905 gomp_task_run_post_remove_taskgroup (child_task); 1906 to_free = child_task; 1907 child_task = NULL; 1908 team->task_count--; 1909 if (new_tasks > 1) 1910 { 1911 do_wake = team->nthreads - team->task_running_count 1912 - !task->in_tied_task; 1913 if (do_wake > new_tasks) 1914 do_wake = new_tasks; 1915 } 1916 } 1917 } 1918} 1919 1920/* Called when encountering a taskyield directive. */ 1921 1922void 1923GOMP_taskyield (void) 1924{ 1925 /* Nothing at the moment. */ 1926} 1927 1928static inline struct gomp_taskgroup * 1929gomp_taskgroup_init (struct gomp_taskgroup *prev) 1930{ 1931 struct gomp_taskgroup *taskgroup 1932 = gomp_malloc (sizeof (struct gomp_taskgroup)); 1933 taskgroup->prev = prev; 1934 priority_queue_init (&taskgroup->taskgroup_queue); 1935 taskgroup->reductions = prev ? prev->reductions : NULL; 1936 taskgroup->in_taskgroup_wait = false; 1937 taskgroup->cancelled = false; 1938 taskgroup->workshare = false; 1939 taskgroup->num_children = 0; 1940 gomp_sem_init (&taskgroup->taskgroup_sem, 0); 1941 return taskgroup; 1942} 1943 1944void 1945GOMP_taskgroup_start (void) 1946{ 1947 struct gomp_thread *thr = gomp_thread (); 1948 struct gomp_team *team = thr->ts.team; 1949 struct gomp_task *task = thr->task; 1950 1951 /* If team is NULL, all tasks are executed as 1952 GOMP_TASK_UNDEFERRED tasks and thus all children tasks of 1953 taskgroup and their descendant tasks will be finished 1954 by the time GOMP_taskgroup_end is called. */ 1955 if (team == NULL) 1956 return; 1957 task->taskgroup = gomp_taskgroup_init (task->taskgroup); 1958} 1959 1960void 1961GOMP_taskgroup_end (void) 1962{ 1963 struct gomp_thread *thr = gomp_thread (); 1964 struct gomp_team *team = thr->ts.team; 1965 struct gomp_task *task = thr->task; 1966 struct gomp_taskgroup *taskgroup; 1967 struct gomp_task *child_task = NULL; 1968 struct gomp_task *to_free = NULL; 1969 int do_wake = 0; 1970 1971 if (team == NULL) 1972 return; 1973 taskgroup = task->taskgroup; 1974 if (__builtin_expect (taskgroup == NULL, 0) 1975 && thr->ts.level == 0) 1976 { 1977 /* This can happen if GOMP_taskgroup_start is called when 1978 thr->ts.team == NULL, but inside of the taskgroup there 1979 is #pragma omp target nowait that creates an implicit 1980 team with a single thread. In this case, we want to wait 1981 for all outstanding tasks in this team. */ 1982 gomp_team_barrier_wait (&team->barrier); 1983 return; 1984 } 1985 1986 /* The acquire barrier on load of taskgroup->num_children here 1987 synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup. 1988 It is not necessary that we synchronize with other non-0 writes at 1989 this point, but we must ensure that all writes to memory by a 1990 child thread task work function are seen before we exit from 1991 GOMP_taskgroup_end. */ 1992 if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0) 1993 goto finish; 1994 1995 bool unused; 1996 gomp_mutex_lock (&team->task_lock); 1997 while (1) 1998 { 1999 bool cancelled = false; 2000 if (priority_queue_empty_p (&taskgroup->taskgroup_queue, 2001 MEMMODEL_RELAXED)) 2002 { 2003 if (taskgroup->num_children) 2004 { 2005 if (priority_queue_empty_p (&task->children_queue, 2006 MEMMODEL_RELAXED)) 2007 goto do_wait; 2008 child_task 2009 = priority_queue_next_task (PQ_CHILDREN, &task->children_queue, 2010 PQ_TEAM, &team->task_queue, 2011 &unused); 2012 } 2013 else 2014 { 2015 gomp_mutex_unlock (&team->task_lock); 2016 if (to_free) 2017 { 2018 gomp_finish_task (to_free); 2019 free (to_free); 2020 } 2021 goto finish; 2022 } 2023 } 2024 else 2025 child_task 2026 = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue, 2027 PQ_TEAM, &team->task_queue, &unused); 2028 if (child_task->kind == GOMP_TASK_WAITING) 2029 { 2030 cancelled 2031 = gomp_task_run_pre (child_task, child_task->parent, team); 2032 if (__builtin_expect (cancelled, 0)) 2033 { 2034 if (to_free) 2035 { 2036 gomp_finish_task (to_free); 2037 free (to_free); 2038 to_free = NULL; 2039 } 2040 goto finish_cancelled; 2041 } 2042 } 2043 else 2044 { 2045 child_task = NULL; 2046 do_wait: 2047 /* All tasks we are waiting for are either running in other 2048 threads, or they are tasks that have not had their 2049 dependencies met (so they're not even in the queue). Wait 2050 for them. */ 2051 taskgroup->in_taskgroup_wait = true; 2052 } 2053 gomp_mutex_unlock (&team->task_lock); 2054 if (do_wake) 2055 { 2056 gomp_team_barrier_wake (&team->barrier, do_wake); 2057 do_wake = 0; 2058 } 2059 if (to_free) 2060 { 2061 gomp_finish_task (to_free); 2062 free (to_free); 2063 to_free = NULL; 2064 } 2065 if (child_task) 2066 { 2067 thr->task = child_task; 2068 if (__builtin_expect (child_task->fn == NULL, 0)) 2069 { 2070 if (gomp_target_task_fn (child_task->fn_data)) 2071 { 2072 thr->task = task; 2073 gomp_mutex_lock (&team->task_lock); 2074 child_task->kind = GOMP_TASK_ASYNC_RUNNING; 2075 struct gomp_target_task *ttask 2076 = (struct gomp_target_task *) child_task->fn_data; 2077 /* If GOMP_PLUGIN_target_task_completion has run already 2078 in between gomp_target_task_fn and the mutex lock, 2079 perform the requeuing here. */ 2080 if (ttask->state == GOMP_TARGET_TASK_FINISHED) 2081 gomp_target_task_completion (team, child_task); 2082 else 2083 ttask->state = GOMP_TARGET_TASK_RUNNING; 2084 child_task = NULL; 2085 continue; 2086 } 2087 } 2088 else 2089 child_task->fn (child_task->fn_data); 2090 thr->task = task; 2091 } 2092 else 2093 gomp_sem_wait (&taskgroup->taskgroup_sem); 2094 gomp_mutex_lock (&team->task_lock); 2095 if (child_task) 2096 { 2097 if (child_task->detach_team) 2098 { 2099 assert (child_task->detach_team == team); 2100 child_task->kind = GOMP_TASK_DETACHED; 2101 ++team->task_detach_count; 2102 gomp_debug (0, 2103 "thread %d: task with event %p finished without " 2104 "completion event fulfilled in taskgroup\n", 2105 thr->ts.team_id, child_task); 2106 child_task = NULL; 2107 continue; 2108 } 2109 2110 finish_cancelled:; 2111 size_t new_tasks 2112 = gomp_task_run_post_handle_depend (child_task, team); 2113 gomp_task_run_post_remove_parent (child_task); 2114 gomp_clear_parent (&child_task->children_queue); 2115 gomp_task_run_post_remove_taskgroup (child_task); 2116 to_free = child_task; 2117 child_task = NULL; 2118 team->task_count--; 2119 if (new_tasks > 1) 2120 { 2121 do_wake = team->nthreads - team->task_running_count 2122 - !task->in_tied_task; 2123 if (do_wake > new_tasks) 2124 do_wake = new_tasks; 2125 } 2126 } 2127 } 2128 2129 finish: 2130 task->taskgroup = taskgroup->prev; 2131 gomp_sem_destroy (&taskgroup->taskgroup_sem); 2132 free (taskgroup); 2133} 2134 2135static inline __attribute__((always_inline)) void 2136gomp_reduction_register (uintptr_t *data, uintptr_t *old, uintptr_t *orig, 2137 unsigned nthreads) 2138{ 2139 size_t total_cnt = 0; 2140 uintptr_t *d = data; 2141 struct htab *old_htab = NULL, *new_htab; 2142 do 2143 { 2144 if (__builtin_expect (orig != NULL, 0)) 2145 { 2146 /* For worksharing task reductions, memory has been allocated 2147 already by some other thread that encountered the construct 2148 earlier. */ 2149 d[2] = orig[2]; 2150 d[6] = orig[6]; 2151 orig = (uintptr_t *) orig[4]; 2152 } 2153 else 2154 { 2155 size_t sz = d[1] * nthreads; 2156 /* Should use omp_alloc if d[3] is not -1. */ 2157 void *ptr = gomp_aligned_alloc (d[2], sz); 2158 memset (ptr, '\0', sz); 2159 d[2] = (uintptr_t) ptr; 2160 d[6] = d[2] + sz; 2161 } 2162 d[5] = 0; 2163 total_cnt += d[0]; 2164 if (d[4] == 0) 2165 { 2166 d[4] = (uintptr_t) old; 2167 break; 2168 } 2169 else 2170 d = (uintptr_t *) d[4]; 2171 } 2172 while (1); 2173 if (old && old[5]) 2174 { 2175 old_htab = (struct htab *) old[5]; 2176 total_cnt += htab_elements (old_htab); 2177 } 2178 new_htab = htab_create (total_cnt); 2179 if (old_htab) 2180 { 2181 /* Copy old hash table, like in htab_expand. */ 2182 hash_entry_type *p, *olimit; 2183 new_htab->n_elements = htab_elements (old_htab); 2184 olimit = old_htab->entries + old_htab->size; 2185 p = old_htab->entries; 2186 do 2187 { 2188 hash_entry_type x = *p; 2189 if (x != HTAB_EMPTY_ENTRY && x != HTAB_DELETED_ENTRY) 2190 *find_empty_slot_for_expand (new_htab, htab_hash (x)) = x; 2191 p++; 2192 } 2193 while (p < olimit); 2194 } 2195 d = data; 2196 do 2197 { 2198 size_t j; 2199 for (j = 0; j < d[0]; ++j) 2200 { 2201 uintptr_t *p = d + 7 + j * 3; 2202 p[2] = (uintptr_t) d; 2203 /* Ugly hack, hash_entry_type is defined for the task dependencies, 2204 which hash on the first element which is a pointer. We need 2205 to hash also on the first sizeof (uintptr_t) bytes which contain 2206 a pointer. Hide the cast from the compiler. */ 2207 hash_entry_type n; 2208 __asm ("" : "=g" (n) : "0" (p)); 2209 *htab_find_slot (&new_htab, n, INSERT) = n; 2210 } 2211 if (d[4] == (uintptr_t) old) 2212 break; 2213 else 2214 d = (uintptr_t *) d[4]; 2215 } 2216 while (1); 2217 d[5] = (uintptr_t) new_htab; 2218} 2219 2220static void 2221gomp_create_artificial_team (void) 2222{ 2223 struct gomp_thread *thr = gomp_thread (); 2224 struct gomp_task_icv *icv; 2225 struct gomp_team *team = gomp_new_team (1); 2226 struct gomp_task *task = thr->task; 2227 struct gomp_task **implicit_task = &task; 2228 icv = task ? &task->icv : &gomp_global_icv; 2229 team->prev_ts = thr->ts; 2230 thr->ts.team = team; 2231 thr->ts.team_id = 0; 2232 thr->ts.work_share = &team->work_shares[0]; 2233 thr->ts.last_work_share = NULL; 2234#ifdef HAVE_SYNC_BUILTINS 2235 thr->ts.single_count = 0; 2236#endif 2237 thr->ts.static_trip = 0; 2238 thr->task = &team->implicit_task[0]; 2239 gomp_init_task (thr->task, NULL, icv); 2240 while (*implicit_task 2241 && (*implicit_task)->kind != GOMP_TASK_IMPLICIT) 2242 implicit_task = &(*implicit_task)->parent; 2243 if (*implicit_task) 2244 { 2245 thr->task = *implicit_task; 2246 gomp_end_task (); 2247 free (*implicit_task); 2248 thr->task = &team->implicit_task[0]; 2249 } 2250#ifdef LIBGOMP_USE_PTHREADS 2251 else 2252 pthread_setspecific (gomp_thread_destructor, thr); 2253#endif 2254 if (implicit_task != &task) 2255 { 2256 *implicit_task = thr->task; 2257 thr->task = task; 2258 } 2259} 2260 2261/* The format of data is: 2262 data[0] cnt 2263 data[1] size 2264 data[2] alignment (on output array pointer) 2265 data[3] allocator (-1 if malloc allocator) 2266 data[4] next pointer 2267 data[5] used internally (htab pointer) 2268 data[6] used internally (end of array) 2269 cnt times 2270 ent[0] address 2271 ent[1] offset 2272 ent[2] used internally (pointer to data[0]) 2273 The entries are sorted by increasing offset, so that a binary 2274 search can be performed. Normally, data[8] is 0, exception is 2275 for worksharing construct task reductions in cancellable parallel, 2276 where at offset 0 there should be space for a pointer and an integer 2277 which are used internally. */ 2278 2279void 2280GOMP_taskgroup_reduction_register (uintptr_t *data) 2281{ 2282 struct gomp_thread *thr = gomp_thread (); 2283 struct gomp_team *team = thr->ts.team; 2284 struct gomp_task *task; 2285 unsigned nthreads; 2286 if (__builtin_expect (team == NULL, 0)) 2287 { 2288 /* The task reduction code needs a team and task, so for 2289 orphaned taskgroups just create the implicit team. */ 2290 gomp_create_artificial_team (); 2291 ialias_call (GOMP_taskgroup_start) (); 2292 team = thr->ts.team; 2293 } 2294 nthreads = team->nthreads; 2295 task = thr->task; 2296 gomp_reduction_register (data, task->taskgroup->reductions, NULL, nthreads); 2297 task->taskgroup->reductions = data; 2298} 2299 2300void 2301GOMP_taskgroup_reduction_unregister (uintptr_t *data) 2302{ 2303 uintptr_t *d = data; 2304 htab_free ((struct htab *) data[5]); 2305 do 2306 { 2307 gomp_aligned_free ((void *) d[2]); 2308 d = (uintptr_t *) d[4]; 2309 } 2310 while (d && !d[5]); 2311} 2312ialias (GOMP_taskgroup_reduction_unregister) 2313 2314/* For i = 0 to cnt-1, remap ptrs[i] which is either address of the 2315 original list item or address of previously remapped original list 2316 item to address of the private copy, store that to ptrs[i]. 2317 For i < cntorig, additionally set ptrs[cnt+i] to the address of 2318 the original list item. */ 2319 2320void 2321GOMP_task_reduction_remap (size_t cnt, size_t cntorig, void **ptrs) 2322{ 2323 struct gomp_thread *thr = gomp_thread (); 2324 struct gomp_task *task = thr->task; 2325 unsigned id = thr->ts.team_id; 2326 uintptr_t *data = task->taskgroup->reductions; 2327 uintptr_t *d; 2328 struct htab *reduction_htab = (struct htab *) data[5]; 2329 size_t i; 2330 for (i = 0; i < cnt; ++i) 2331 { 2332 hash_entry_type ent, n; 2333 __asm ("" : "=g" (ent) : "0" (ptrs + i)); 2334 n = htab_find (reduction_htab, ent); 2335 if (n) 2336 { 2337 uintptr_t *p; 2338 __asm ("" : "=g" (p) : "0" (n)); 2339 /* At this point, p[0] should be equal to (uintptr_t) ptrs[i], 2340 p[1] is the offset within the allocated chunk for each 2341 thread, p[2] is the array registered with 2342 GOMP_taskgroup_reduction_register, d[2] is the base of the 2343 allocated memory and d[1] is the size of the allocated chunk 2344 for one thread. */ 2345 d = (uintptr_t *) p[2]; 2346 ptrs[i] = (void *) (d[2] + id * d[1] + p[1]); 2347 if (__builtin_expect (i < cntorig, 0)) 2348 ptrs[cnt + i] = (void *) p[0]; 2349 continue; 2350 } 2351 d = data; 2352 while (d != NULL) 2353 { 2354 if ((uintptr_t) ptrs[i] >= d[2] && (uintptr_t) ptrs[i] < d[6]) 2355 break; 2356 d = (uintptr_t *) d[4]; 2357 } 2358 if (d == NULL) 2359 gomp_fatal ("couldn't find matching task_reduction or reduction with " 2360 "task modifier for %p", ptrs[i]); 2361 uintptr_t off = ((uintptr_t) ptrs[i] - d[2]) % d[1]; 2362 ptrs[i] = (void *) (d[2] + id * d[1] + off); 2363 if (__builtin_expect (i < cntorig, 0)) 2364 { 2365 size_t lo = 0, hi = d[0] - 1; 2366 while (lo <= hi) 2367 { 2368 size_t m = (lo + hi) / 2; 2369 if (d[7 + 3 * m + 1] < off) 2370 lo = m + 1; 2371 else if (d[7 + 3 * m + 1] == off) 2372 { 2373 ptrs[cnt + i] = (void *) d[7 + 3 * m]; 2374 break; 2375 } 2376 else 2377 hi = m - 1; 2378 } 2379 if (lo > hi) 2380 gomp_fatal ("couldn't find matching task_reduction or reduction " 2381 "with task modifier for %p", ptrs[i]); 2382 } 2383 } 2384} 2385 2386struct gomp_taskgroup * 2387gomp_parallel_reduction_register (uintptr_t *data, unsigned nthreads) 2388{ 2389 struct gomp_taskgroup *taskgroup = gomp_taskgroup_init (NULL); 2390 gomp_reduction_register (data, NULL, NULL, nthreads); 2391 taskgroup->reductions = data; 2392 return taskgroup; 2393} 2394 2395void 2396gomp_workshare_task_reduction_register (uintptr_t *data, uintptr_t *orig) 2397{ 2398 struct gomp_thread *thr = gomp_thread (); 2399 struct gomp_team *team = thr->ts.team; 2400 struct gomp_task *task = thr->task; 2401 unsigned nthreads = team->nthreads; 2402 gomp_reduction_register (data, task->taskgroup->reductions, orig, nthreads); 2403 task->taskgroup->reductions = data; 2404} 2405 2406void 2407gomp_workshare_taskgroup_start (void) 2408{ 2409 struct gomp_thread *thr = gomp_thread (); 2410 struct gomp_team *team = thr->ts.team; 2411 struct gomp_task *task; 2412 2413 if (team == NULL) 2414 { 2415 gomp_create_artificial_team (); 2416 team = thr->ts.team; 2417 } 2418 task = thr->task; 2419 task->taskgroup = gomp_taskgroup_init (task->taskgroup); 2420 task->taskgroup->workshare = true; 2421} 2422 2423void 2424GOMP_workshare_task_reduction_unregister (bool cancelled) 2425{ 2426 struct gomp_thread *thr = gomp_thread (); 2427 struct gomp_task *task = thr->task; 2428 struct gomp_team *team = thr->ts.team; 2429 uintptr_t *data = task->taskgroup->reductions; 2430 ialias_call (GOMP_taskgroup_end) (); 2431 if (thr->ts.team_id == 0) 2432 ialias_call (GOMP_taskgroup_reduction_unregister) (data); 2433 else 2434 htab_free ((struct htab *) data[5]); 2435 2436 if (!cancelled) 2437 gomp_team_barrier_wait (&team->barrier); 2438} 2439 2440int 2441omp_in_final (void) 2442{ 2443 struct gomp_thread *thr = gomp_thread (); 2444 return thr->task && thr->task->final_task; 2445} 2446 2447ialias (omp_in_final) 2448 2449void 2450omp_fulfill_event (omp_event_handle_t event) 2451{ 2452 struct gomp_task *task = (struct gomp_task *) event; 2453 if (!task->deferred_p) 2454 { 2455 if (gomp_sem_getcount (task->completion_sem) > 0) 2456 gomp_fatal ("omp_fulfill_event: %p event already fulfilled!\n", task); 2457 2458 gomp_debug (0, "omp_fulfill_event: %p event for undeferred task\n", 2459 task); 2460 gomp_sem_post (task->completion_sem); 2461 return; 2462 } 2463 2464 struct gomp_team *team = __atomic_load_n (&task->detach_team, 2465 MEMMODEL_RELAXED); 2466 if (!team) 2467 gomp_fatal ("omp_fulfill_event: %p event is invalid or has already " 2468 "been fulfilled!\n", task); 2469 2470 gomp_mutex_lock (&team->task_lock); 2471 if (task->kind != GOMP_TASK_DETACHED) 2472 { 2473 /* The task has not finished running yet. */ 2474 gomp_debug (0, 2475 "omp_fulfill_event: %p event fulfilled for unfinished " 2476 "task\n", task); 2477 __atomic_store_n (&task->detach_team, NULL, MEMMODEL_RELAXED); 2478 gomp_mutex_unlock (&team->task_lock); 2479 return; 2480 } 2481 2482 gomp_debug (0, "omp_fulfill_event: %p event fulfilled for finished task\n", 2483 task); 2484 size_t new_tasks = gomp_task_run_post_handle_depend (task, team); 2485 gomp_task_run_post_remove_parent (task); 2486 gomp_clear_parent (&task->children_queue); 2487 gomp_task_run_post_remove_taskgroup (task); 2488 team->task_count--; 2489 team->task_detach_count--; 2490 2491 int do_wake = 0; 2492 bool shackled_thread_p = team == gomp_thread ()->ts.team; 2493 if (new_tasks > 0) 2494 { 2495 /* Wake up threads to run new tasks. */ 2496 gomp_team_barrier_set_task_pending (&team->barrier); 2497 do_wake = team->nthreads - team->task_running_count; 2498 if (do_wake > new_tasks) 2499 do_wake = new_tasks; 2500 } 2501 2502 if (!shackled_thread_p 2503 && !do_wake 2504 && team->task_detach_count == 0 2505 && gomp_team_barrier_waiting_for_tasks (&team->barrier)) 2506 /* Ensure that at least one thread is woken up to signal that the 2507 barrier can finish. */ 2508 do_wake = 1; 2509 2510 /* If we are running in an unshackled thread, the team might vanish before 2511 gomp_team_barrier_wake is run if we release the lock first, so keep the 2512 lock for the call in that case. */ 2513 if (shackled_thread_p) 2514 gomp_mutex_unlock (&team->task_lock); 2515 if (do_wake) 2516 gomp_team_barrier_wake (&team->barrier, do_wake); 2517 if (!shackled_thread_p) 2518 gomp_mutex_unlock (&team->task_lock); 2519 2520 gomp_finish_task (task); 2521 free (task); 2522} 2523 2524ialias (omp_fulfill_event) 2525