task.c revision 1.6
1/* Copyright (C) 2007-2016 Free Software Foundation, Inc.
2   Contributed by Richard Henderson <rth@redhat.com>.
3
4   This file is part of the GNU Offloading and Multi Processing Library
5   (libgomp).
6
7   Libgomp is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15   more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26/* This file handles the maintainence of tasks in response to task
27   creation and termination.  */
28
29#include "libgomp.h"
30#include <stdlib.h>
31#include <string.h>
32#include "gomp-constants.h"
33
34typedef struct gomp_task_depend_entry *hash_entry_type;
35
36static inline void *
37htab_alloc (size_t size)
38{
39  return gomp_malloc (size);
40}
41
42static inline void
43htab_free (void *ptr)
44{
45  free (ptr);
46}
47
48#include "hashtab.h"
49
50static inline hashval_t
51htab_hash (hash_entry_type element)
52{
53  return hash_pointer (element->addr);
54}
55
56static inline bool
57htab_eq (hash_entry_type x, hash_entry_type y)
58{
59  return x->addr == y->addr;
60}
61
62/* Create a new task data structure.  */
63
64void
65gomp_init_task (struct gomp_task *task, struct gomp_task *parent_task,
66		struct gomp_task_icv *prev_icv)
67{
68  /* It would seem that using memset here would be a win, but it turns
69     out that partially filling gomp_task allows us to keep the
70     overhead of task creation low.  In the nqueens-1.c test, for a
71     sufficiently large N, we drop the overhead from 5-6% to 1%.
72
73     Note, the nqueens-1.c test in serial mode is a good test to
74     benchmark the overhead of creating tasks as there are millions of
75     tiny tasks created that all run undeferred.  */
76  task->parent = parent_task;
77  task->icv = *prev_icv;
78  task->kind = GOMP_TASK_IMPLICIT;
79  task->taskwait = NULL;
80  task->in_tied_task = false;
81  task->final_task = false;
82  task->copy_ctors_done = false;
83  task->parent_depends_on = false;
84  priority_queue_init (&task->children_queue);
85  task->taskgroup = NULL;
86  task->dependers = NULL;
87  task->depend_hash = NULL;
88  task->depend_count = 0;
89}
90
91/* Clean up a task, after completing it.  */
92
93void
94gomp_end_task (void)
95{
96  struct gomp_thread *thr = gomp_thread ();
97  struct gomp_task *task = thr->task;
98
99  gomp_finish_task (task);
100  thr->task = task->parent;
101}
102
103/* Clear the parent field of every task in LIST.  */
104
105static inline void
106gomp_clear_parent_in_list (struct priority_list *list)
107{
108  struct priority_node *p = list->tasks;
109  if (p)
110    do
111      {
112	priority_node_to_task (PQ_CHILDREN, p)->parent = NULL;
113	p = p->next;
114      }
115    while (p != list->tasks);
116}
117
118/* Splay tree version of gomp_clear_parent_in_list.
119
120   Clear the parent field of every task in NODE within SP, and free
121   the node when done.  */
122
123static void
124gomp_clear_parent_in_tree (prio_splay_tree sp, prio_splay_tree_node node)
125{
126  if (!node)
127    return;
128  prio_splay_tree_node left = node->left, right = node->right;
129  gomp_clear_parent_in_list (&node->key.l);
130#if _LIBGOMP_CHECKING_
131  memset (node, 0xaf, sizeof (*node));
132#endif
133  /* No need to remove the node from the tree.  We're nuking
134     everything, so just free the nodes and our caller can clear the
135     entire splay tree.  */
136  free (node);
137  gomp_clear_parent_in_tree (sp, left);
138  gomp_clear_parent_in_tree (sp, right);
139}
140
141/* Clear the parent field of every task in Q and remove every task
142   from Q.  */
143
144static inline void
145gomp_clear_parent (struct priority_queue *q)
146{
147  if (priority_queue_multi_p (q))
148    {
149      gomp_clear_parent_in_tree (&q->t, q->t.root);
150      /* All the nodes have been cleared in gomp_clear_parent_in_tree.
151	 No need to remove anything.  We can just nuke everything.  */
152      q->t.root = NULL;
153    }
154  else
155    gomp_clear_parent_in_list (&q->l);
156}
157
158/* Helper function for GOMP_task and gomp_create_target_task.
159
160   For a TASK with in/out dependencies, fill in the various dependency
161   queues.  PARENT is the parent of said task.  DEPEND is as in
162   GOMP_task.  */
163
164static void
165gomp_task_handle_depend (struct gomp_task *task, struct gomp_task *parent,
166			 void **depend)
167{
168  size_t ndepend = (uintptr_t) depend[0];
169  size_t nout = (uintptr_t) depend[1];
170  size_t i;
171  hash_entry_type ent;
172
173  task->depend_count = ndepend;
174  task->num_dependees = 0;
175  if (parent->depend_hash == NULL)
176    parent->depend_hash = htab_create (2 * ndepend > 12 ? 2 * ndepend : 12);
177  for (i = 0; i < ndepend; i++)
178    {
179      task->depend[i].addr = depend[2 + i];
180      task->depend[i].next = NULL;
181      task->depend[i].prev = NULL;
182      task->depend[i].task = task;
183      task->depend[i].is_in = i >= nout;
184      task->depend[i].redundant = false;
185      task->depend[i].redundant_out = false;
186
187      hash_entry_type *slot = htab_find_slot (&parent->depend_hash,
188					      &task->depend[i], INSERT);
189      hash_entry_type out = NULL, last = NULL;
190      if (*slot)
191	{
192	  /* If multiple depends on the same task are the same, all but the
193	     first one are redundant.  As inout/out come first, if any of them
194	     is inout/out, it will win, which is the right semantics.  */
195	  if ((*slot)->task == task)
196	    {
197	      task->depend[i].redundant = true;
198	      continue;
199	    }
200	  for (ent = *slot; ent; ent = ent->next)
201	    {
202	      if (ent->redundant_out)
203		break;
204
205	      last = ent;
206
207	      /* depend(in:...) doesn't depend on earlier depend(in:...).  */
208	      if (i >= nout && ent->is_in)
209		continue;
210
211	      if (!ent->is_in)
212		out = ent;
213
214	      struct gomp_task *tsk = ent->task;
215	      if (tsk->dependers == NULL)
216		{
217		  tsk->dependers
218		    = gomp_malloc (sizeof (struct gomp_dependers_vec)
219				   + 6 * sizeof (struct gomp_task *));
220		  tsk->dependers->n_elem = 1;
221		  tsk->dependers->allocated = 6;
222		  tsk->dependers->elem[0] = task;
223		  task->num_dependees++;
224		  continue;
225		}
226	      /* We already have some other dependency on tsk from earlier
227		 depend clause.  */
228	      else if (tsk->dependers->n_elem
229		       && (tsk->dependers->elem[tsk->dependers->n_elem - 1]
230			   == task))
231		continue;
232	      else if (tsk->dependers->n_elem == tsk->dependers->allocated)
233		{
234		  tsk->dependers->allocated
235		    = tsk->dependers->allocated * 2 + 2;
236		  tsk->dependers
237		    = gomp_realloc (tsk->dependers,
238				    sizeof (struct gomp_dependers_vec)
239				    + (tsk->dependers->allocated
240				       * sizeof (struct gomp_task *)));
241		}
242	      tsk->dependers->elem[tsk->dependers->n_elem++] = task;
243	      task->num_dependees++;
244	    }
245	  task->depend[i].next = *slot;
246	  (*slot)->prev = &task->depend[i];
247	}
248      *slot = &task->depend[i];
249
250      /* There is no need to store more than one depend({,in}out:) task per
251	 address in the hash table chain for the purpose of creation of
252	 deferred tasks, because each out depends on all earlier outs, thus it
253	 is enough to record just the last depend({,in}out:).  For depend(in:),
254	 we need to keep all of the previous ones not terminated yet, because
255	 a later depend({,in}out:) might need to depend on all of them.  So, if
256	 the new task's clause is depend({,in}out:), we know there is at most
257	 one other depend({,in}out:) clause in the list (out).  For
258	 non-deferred tasks we want to see all outs, so they are moved to the
259	 end of the chain, after first redundant_out entry all following
260	 entries should be redundant_out.  */
261      if (!task->depend[i].is_in && out)
262	{
263	  if (out != last)
264	    {
265	      out->next->prev = out->prev;
266	      out->prev->next = out->next;
267	      out->next = last->next;
268	      out->prev = last;
269	      last->next = out;
270	      if (out->next)
271		out->next->prev = out;
272	    }
273	  out->redundant_out = true;
274	}
275    }
276}
277
278/* Called when encountering an explicit task directive.  If IF_CLAUSE is
279   false, then we must not delay in executing the task.  If UNTIED is true,
280   then the task may be executed by any member of the team.
281
282   DEPEND is an array containing:
283	depend[0]: number of depend elements.
284	depend[1]: number of depend elements of type "out".
285	depend[2..N+1]: address of [1..N]th depend element.  */
286
287void
288GOMP_task (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
289	   long arg_size, long arg_align, bool if_clause, unsigned flags,
290	   void **depend, int priority)
291{
292  struct gomp_thread *thr = gomp_thread ();
293  struct gomp_team *team = thr->ts.team;
294
295#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
296  /* If pthread_mutex_* is used for omp_*lock*, then each task must be
297     tied to one thread all the time.  This means UNTIED tasks must be
298     tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
299     might be running on different thread than FN.  */
300  if (cpyfn)
301    if_clause = false;
302  flags &= ~GOMP_TASK_FLAG_UNTIED;
303#endif
304
305  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
306  if (team
307      && (gomp_team_barrier_cancelled (&team->barrier)
308	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
309    return;
310
311  if ((flags & GOMP_TASK_FLAG_PRIORITY) == 0)
312    priority = 0;
313  else if (priority > gomp_max_task_priority_var)
314    priority = gomp_max_task_priority_var;
315
316  if (!if_clause || team == NULL
317      || (thr->task && thr->task->final_task)
318      || team->task_count > 64 * team->nthreads)
319    {
320      struct gomp_task task;
321
322      /* If there are depend clauses and earlier deferred sibling tasks
323	 with depend clauses, check if there isn't a dependency.  If there
324	 is, we need to wait for them.  There is no need to handle
325	 depend clauses for non-deferred tasks other than this, because
326	 the parent task is suspended until the child task finishes and thus
327	 it can't start further child tasks.  */
328      if ((flags & GOMP_TASK_FLAG_DEPEND)
329	  && thr->task && thr->task->depend_hash)
330	gomp_task_maybe_wait_for_dependencies (depend);
331
332      gomp_init_task (&task, thr->task, gomp_icv (false));
333      task.kind = GOMP_TASK_UNDEFERRED;
334      task.final_task = (thr->task && thr->task->final_task)
335			|| (flags & GOMP_TASK_FLAG_FINAL);
336      task.priority = priority;
337      if (thr->task)
338	{
339	  task.in_tied_task = thr->task->in_tied_task;
340	  task.taskgroup = thr->task->taskgroup;
341	}
342      thr->task = &task;
343      if (__builtin_expect (cpyfn != NULL, 0))
344	{
345	  char buf[arg_size + arg_align - 1];
346	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
347				& ~(uintptr_t) (arg_align - 1));
348	  cpyfn (arg, data);
349	  fn (arg);
350	}
351      else
352	fn (data);
353      /* Access to "children" is normally done inside a task_lock
354	 mutex region, but the only way this particular task.children
355	 can be set is if this thread's task work function (fn)
356	 creates children.  So since the setter is *this* thread, we
357	 need no barriers here when testing for non-NULL.  We can have
358	 task.children set by the current thread then changed by a
359	 child thread, but seeing a stale non-NULL value is not a
360	 problem.  Once past the task_lock acquisition, this thread
361	 will see the real value of task.children.  */
362      if (!priority_queue_empty_p (&task.children_queue, MEMMODEL_RELAXED))
363	{
364	  gomp_mutex_lock (&team->task_lock);
365	  gomp_clear_parent (&task.children_queue);
366	  gomp_mutex_unlock (&team->task_lock);
367	}
368      gomp_end_task ();
369    }
370  else
371    {
372      struct gomp_task *task;
373      struct gomp_task *parent = thr->task;
374      struct gomp_taskgroup *taskgroup = parent->taskgroup;
375      char *arg;
376      bool do_wake;
377      size_t depend_size = 0;
378
379      if (flags & GOMP_TASK_FLAG_DEPEND)
380	depend_size = ((uintptr_t) depend[0]
381		       * sizeof (struct gomp_task_depend_entry));
382      task = gomp_malloc (sizeof (*task) + depend_size
383			  + arg_size + arg_align - 1);
384      arg = (char *) (((uintptr_t) (task + 1) + depend_size + arg_align - 1)
385		      & ~(uintptr_t) (arg_align - 1));
386      gomp_init_task (task, parent, gomp_icv (false));
387      task->priority = priority;
388      task->kind = GOMP_TASK_UNDEFERRED;
389      task->in_tied_task = parent->in_tied_task;
390      task->taskgroup = taskgroup;
391      thr->task = task;
392      if (cpyfn)
393	{
394	  cpyfn (arg, data);
395	  task->copy_ctors_done = true;
396	}
397      else
398	memcpy (arg, data, arg_size);
399      thr->task = parent;
400      task->kind = GOMP_TASK_WAITING;
401      task->fn = fn;
402      task->fn_data = arg;
403      task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
404      gomp_mutex_lock (&team->task_lock);
405      /* If parallel or taskgroup has been cancelled, don't start new
406	 tasks.  */
407      if (__builtin_expect ((gomp_team_barrier_cancelled (&team->barrier)
408			     || (taskgroup && taskgroup->cancelled))
409			    && !task->copy_ctors_done, 0))
410	{
411	  gomp_mutex_unlock (&team->task_lock);
412	  gomp_finish_task (task);
413	  free (task);
414	  return;
415	}
416      if (taskgroup)
417	taskgroup->num_children++;
418      if (depend_size)
419	{
420	  gomp_task_handle_depend (task, parent, depend);
421	  if (task->num_dependees)
422	    {
423	      /* Tasks that depend on other tasks are not put into the
424		 various waiting queues, so we are done for now.  Said
425		 tasks are instead put into the queues via
426		 gomp_task_run_post_handle_dependers() after their
427		 dependencies have been satisfied.  After which, they
428		 can be picked up by the various scheduling
429		 points.  */
430	      gomp_mutex_unlock (&team->task_lock);
431	      return;
432	    }
433	}
434
435      priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
436			     task, priority,
437			     PRIORITY_INSERT_BEGIN,
438			     /*adjust_parent_depends_on=*/false,
439			     task->parent_depends_on);
440      if (taskgroup)
441	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
442			       task, priority,
443			       PRIORITY_INSERT_BEGIN,
444			       /*adjust_parent_depends_on=*/false,
445			       task->parent_depends_on);
446
447      priority_queue_insert (PQ_TEAM, &team->task_queue,
448			     task, priority,
449			     PRIORITY_INSERT_END,
450			     /*adjust_parent_depends_on=*/false,
451			     task->parent_depends_on);
452
453      ++team->task_count;
454      ++team->task_queued_count;
455      gomp_team_barrier_set_task_pending (&team->barrier);
456      do_wake = team->task_running_count + !parent->in_tied_task
457		< team->nthreads;
458      gomp_mutex_unlock (&team->task_lock);
459      if (do_wake)
460	gomp_team_barrier_wake (&team->barrier, 1);
461    }
462}
463
464ialias (GOMP_taskgroup_start)
465ialias (GOMP_taskgroup_end)
466
467#define TYPE long
468#define UTYPE unsigned long
469#define TYPE_is_long 1
470#include "taskloop.c"
471#undef TYPE
472#undef UTYPE
473#undef TYPE_is_long
474
475#define TYPE unsigned long long
476#define UTYPE TYPE
477#define GOMP_taskloop GOMP_taskloop_ull
478#include "taskloop.c"
479#undef TYPE
480#undef UTYPE
481#undef GOMP_taskloop
482
483static void inline
484priority_queue_move_task_first (enum priority_queue_type type,
485				struct priority_queue *head,
486				struct gomp_task *task)
487{
488#if _LIBGOMP_CHECKING_
489  if (!priority_queue_task_in_queue_p (type, head, task))
490    gomp_fatal ("Attempt to move first missing task %p", task);
491#endif
492  struct priority_list *list;
493  if (priority_queue_multi_p (head))
494    {
495      list = priority_queue_lookup_priority (head, task->priority);
496#if _LIBGOMP_CHECKING_
497      if (!list)
498	gomp_fatal ("Unable to find priority %d", task->priority);
499#endif
500    }
501  else
502    list = &head->l;
503  priority_list_remove (list, task_to_priority_node (type, task), 0);
504  priority_list_insert (type, list, task, task->priority,
505			PRIORITY_INSERT_BEGIN, type == PQ_CHILDREN,
506			task->parent_depends_on);
507}
508
509/* Actual body of GOMP_PLUGIN_target_task_completion that is executed
510   with team->task_lock held, or is executed in the thread that called
511   gomp_target_task_fn if GOMP_PLUGIN_target_task_completion has been
512   run before it acquires team->task_lock.  */
513
514static void
515gomp_target_task_completion (struct gomp_team *team, struct gomp_task *task)
516{
517  struct gomp_task *parent = task->parent;
518  if (parent)
519    priority_queue_move_task_first (PQ_CHILDREN, &parent->children_queue,
520				    task);
521
522  struct gomp_taskgroup *taskgroup = task->taskgroup;
523  if (taskgroup)
524    priority_queue_move_task_first (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
525				    task);
526
527  priority_queue_insert (PQ_TEAM, &team->task_queue, task, task->priority,
528			 PRIORITY_INSERT_BEGIN, false,
529			 task->parent_depends_on);
530  task->kind = GOMP_TASK_WAITING;
531  if (parent && parent->taskwait)
532    {
533      if (parent->taskwait->in_taskwait)
534	{
535	  /* One more task has had its dependencies met.
536	     Inform any waiters.  */
537	  parent->taskwait->in_taskwait = false;
538	  gomp_sem_post (&parent->taskwait->taskwait_sem);
539	}
540      else if (parent->taskwait->in_depend_wait)
541	{
542	  /* One more task has had its dependencies met.
543	     Inform any waiters.  */
544	  parent->taskwait->in_depend_wait = false;
545	  gomp_sem_post (&parent->taskwait->taskwait_sem);
546	}
547    }
548  if (taskgroup && taskgroup->in_taskgroup_wait)
549    {
550      /* One more task has had its dependencies met.
551	 Inform any waiters.  */
552      taskgroup->in_taskgroup_wait = false;
553      gomp_sem_post (&taskgroup->taskgroup_sem);
554    }
555
556  ++team->task_queued_count;
557  gomp_team_barrier_set_task_pending (&team->barrier);
558  /* I'm afraid this can't be done after releasing team->task_lock,
559     as gomp_target_task_completion is run from unrelated thread and
560     therefore in between gomp_mutex_unlock and gomp_team_barrier_wake
561     the team could be gone already.  */
562  if (team->nthreads > team->task_running_count)
563    gomp_team_barrier_wake (&team->barrier, 1);
564}
565
566/* Signal that a target task TTASK has completed the asynchronously
567   running phase and should be requeued as a task to handle the
568   variable unmapping.  */
569
570void
571GOMP_PLUGIN_target_task_completion (void *data)
572{
573  struct gomp_target_task *ttask = (struct gomp_target_task *) data;
574  struct gomp_task *task = ttask->task;
575  struct gomp_team *team = ttask->team;
576
577  gomp_mutex_lock (&team->task_lock);
578  if (ttask->state == GOMP_TARGET_TASK_READY_TO_RUN)
579    {
580      ttask->state = GOMP_TARGET_TASK_FINISHED;
581      gomp_mutex_unlock (&team->task_lock);
582      return;
583    }
584  ttask->state = GOMP_TARGET_TASK_FINISHED;
585  gomp_target_task_completion (team, task);
586  gomp_mutex_unlock (&team->task_lock);
587}
588
589static void gomp_task_run_post_handle_depend_hash (struct gomp_task *);
590
591/* Called for nowait target tasks.  */
592
593bool
594gomp_create_target_task (struct gomp_device_descr *devicep,
595			 void (*fn) (void *), size_t mapnum, void **hostaddrs,
596			 size_t *sizes, unsigned short *kinds,
597			 unsigned int flags, void **depend, void **args,
598			 enum gomp_target_task_state state)
599{
600  struct gomp_thread *thr = gomp_thread ();
601  struct gomp_team *team = thr->ts.team;
602
603  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
604  if (team
605      && (gomp_team_barrier_cancelled (&team->barrier)
606	  || (thr->task->taskgroup && thr->task->taskgroup->cancelled)))
607    return true;
608
609  struct gomp_target_task *ttask;
610  struct gomp_task *task;
611  struct gomp_task *parent = thr->task;
612  struct gomp_taskgroup *taskgroup = parent->taskgroup;
613  bool do_wake;
614  size_t depend_size = 0;
615  uintptr_t depend_cnt = 0;
616  size_t tgt_align = 0, tgt_size = 0;
617
618  if (depend != NULL)
619    {
620      depend_cnt = (uintptr_t) depend[0];
621      depend_size = depend_cnt * sizeof (struct gomp_task_depend_entry);
622    }
623  if (fn)
624    {
625      /* GOMP_MAP_FIRSTPRIVATE need to be copied first, as they are
626	 firstprivate on the target task.  */
627      size_t i;
628      for (i = 0; i < mapnum; i++)
629	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
630	  {
631	    size_t align = (size_t) 1 << (kinds[i] >> 8);
632	    if (tgt_align < align)
633	      tgt_align = align;
634	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
635	    tgt_size += sizes[i];
636	  }
637      if (tgt_align)
638	tgt_size += tgt_align - 1;
639      else
640	tgt_size = 0;
641    }
642
643  task = gomp_malloc (sizeof (*task) + depend_size
644		      + sizeof (*ttask)
645		      + mapnum * (sizeof (void *) + sizeof (size_t)
646				  + sizeof (unsigned short))
647		      + tgt_size);
648  gomp_init_task (task, parent, gomp_icv (false));
649  task->priority = 0;
650  task->kind = GOMP_TASK_WAITING;
651  task->in_tied_task = parent->in_tied_task;
652  task->taskgroup = taskgroup;
653  ttask = (struct gomp_target_task *) &task->depend[depend_cnt];
654  ttask->devicep = devicep;
655  ttask->fn = fn;
656  ttask->mapnum = mapnum;
657  ttask->args = args;
658  memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *));
659  ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum];
660  memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t));
661  ttask->kinds = (unsigned short *) &ttask->sizes[mapnum];
662  memcpy (ttask->kinds, kinds, mapnum * sizeof (unsigned short));
663  if (tgt_align)
664    {
665      char *tgt = (char *) &ttask->kinds[mapnum];
666      size_t i;
667      uintptr_t al = (uintptr_t) tgt & (tgt_align - 1);
668      if (al)
669	tgt += tgt_align - al;
670      tgt_size = 0;
671      for (i = 0; i < mapnum; i++)
672	if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE)
673	  {
674	    size_t align = (size_t) 1 << (kinds[i] >> 8);
675	    tgt_size = (tgt_size + align - 1) & ~(align - 1);
676	    memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]);
677	    ttask->hostaddrs[i] = tgt + tgt_size;
678	    tgt_size = tgt_size + sizes[i];
679	  }
680    }
681  ttask->flags = flags;
682  ttask->state = state;
683  ttask->task = task;
684  ttask->team = team;
685  task->fn = NULL;
686  task->fn_data = ttask;
687  task->final_task = 0;
688  gomp_mutex_lock (&team->task_lock);
689  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
690  if (__builtin_expect (gomp_team_barrier_cancelled (&team->barrier)
691			|| (taskgroup && taskgroup->cancelled), 0))
692    {
693      gomp_mutex_unlock (&team->task_lock);
694      gomp_finish_task (task);
695      free (task);
696      return true;
697    }
698  if (depend_size)
699    {
700      gomp_task_handle_depend (task, parent, depend);
701      if (task->num_dependees)
702	{
703	  if (taskgroup)
704	    taskgroup->num_children++;
705	  gomp_mutex_unlock (&team->task_lock);
706	  return true;
707	}
708    }
709  if (state == GOMP_TARGET_TASK_DATA)
710    {
711      gomp_task_run_post_handle_depend_hash (task);
712      gomp_mutex_unlock (&team->task_lock);
713      gomp_finish_task (task);
714      free (task);
715      return false;
716    }
717  if (taskgroup)
718    taskgroup->num_children++;
719  /* For async offloading, if we don't need to wait for dependencies,
720     run the gomp_target_task_fn right away, essentially schedule the
721     mapping part of the task in the current thread.  */
722  if (devicep != NULL
723      && (devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400))
724    {
725      priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
726			     PRIORITY_INSERT_END,
727			     /*adjust_parent_depends_on=*/false,
728			     task->parent_depends_on);
729      if (taskgroup)
730	priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
731			       task, 0, PRIORITY_INSERT_END,
732			       /*adjust_parent_depends_on=*/false,
733			       task->parent_depends_on);
734      task->pnode[PQ_TEAM].next = NULL;
735      task->pnode[PQ_TEAM].prev = NULL;
736      task->kind = GOMP_TASK_TIED;
737      ++team->task_count;
738      gomp_mutex_unlock (&team->task_lock);
739
740      thr->task = task;
741      gomp_target_task_fn (task->fn_data);
742      thr->task = parent;
743
744      gomp_mutex_lock (&team->task_lock);
745      task->kind = GOMP_TASK_ASYNC_RUNNING;
746      /* If GOMP_PLUGIN_target_task_completion has run already
747	 in between gomp_target_task_fn and the mutex lock,
748	 perform the requeuing here.  */
749      if (ttask->state == GOMP_TARGET_TASK_FINISHED)
750	gomp_target_task_completion (team, task);
751      else
752	ttask->state = GOMP_TARGET_TASK_RUNNING;
753      gomp_mutex_unlock (&team->task_lock);
754      return true;
755    }
756  priority_queue_insert (PQ_CHILDREN, &parent->children_queue, task, 0,
757			 PRIORITY_INSERT_BEGIN,
758			 /*adjust_parent_depends_on=*/false,
759			 task->parent_depends_on);
760  if (taskgroup)
761    priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue, task, 0,
762			   PRIORITY_INSERT_BEGIN,
763			   /*adjust_parent_depends_on=*/false,
764			   task->parent_depends_on);
765  priority_queue_insert (PQ_TEAM, &team->task_queue, task, 0,
766			 PRIORITY_INSERT_END,
767			 /*adjust_parent_depends_on=*/false,
768			 task->parent_depends_on);
769  ++team->task_count;
770  ++team->task_queued_count;
771  gomp_team_barrier_set_task_pending (&team->barrier);
772  do_wake = team->task_running_count + !parent->in_tied_task
773	    < team->nthreads;
774  gomp_mutex_unlock (&team->task_lock);
775  if (do_wake)
776    gomp_team_barrier_wake (&team->barrier, 1);
777  return true;
778}
779
780/* Given a parent_depends_on task in LIST, move it to the front of its
781   priority so it is run as soon as possible.
782
783   Care is taken to update the list's LAST_PARENT_DEPENDS_ON field.
784
785   We rearrange the queue such that all parent_depends_on tasks are
786   first, and last_parent_depends_on points to the last such task we
787   rearranged.  For example, given the following tasks in a queue
788   where PD[123] are the parent_depends_on tasks:
789
790	task->children
791	|
792	V
793	C1 -> C2 -> C3 -> PD1 -> PD2 -> PD3 -> C4
794
795	We rearrange such that:
796
797	task->children
798	|	       +--- last_parent_depends_on
799	|	       |
800	V	       V
801	PD1 -> PD2 -> PD3 -> C1 -> C2 -> C3 -> C4.  */
802
803static void inline
804priority_list_upgrade_task (struct priority_list *list,
805			    struct priority_node *node)
806{
807  struct priority_node *last_parent_depends_on
808    = list->last_parent_depends_on;
809  if (last_parent_depends_on)
810    {
811      node->prev->next = node->next;
812      node->next->prev = node->prev;
813      node->prev = last_parent_depends_on;
814      node->next = last_parent_depends_on->next;
815      node->prev->next = node;
816      node->next->prev = node;
817    }
818  else if (node != list->tasks)
819    {
820      node->prev->next = node->next;
821      node->next->prev = node->prev;
822      node->prev = list->tasks->prev;
823      node->next = list->tasks;
824      list->tasks = node;
825      node->prev->next = node;
826      node->next->prev = node;
827    }
828  list->last_parent_depends_on = node;
829}
830
831/* Given a parent_depends_on TASK in its parent's children_queue, move
832   it to the front of its priority so it is run as soon as possible.
833
834   PARENT is passed as an optimization.
835
836   (This function could be defined in priority_queue.c, but we want it
837   inlined, and putting it in priority_queue.h is not an option, given
838   that gomp_task has not been properly defined at that point).  */
839
840static void inline
841priority_queue_upgrade_task (struct gomp_task *task,
842			     struct gomp_task *parent)
843{
844  struct priority_queue *head = &parent->children_queue;
845  struct priority_node *node = &task->pnode[PQ_CHILDREN];
846#if _LIBGOMP_CHECKING_
847  if (!task->parent_depends_on)
848    gomp_fatal ("priority_queue_upgrade_task: task must be a "
849		"parent_depends_on task");
850  if (!priority_queue_task_in_queue_p (PQ_CHILDREN, head, task))
851    gomp_fatal ("priority_queue_upgrade_task: cannot find task=%p", task);
852#endif
853  if (priority_queue_multi_p (head))
854    {
855      struct priority_list *list
856	= priority_queue_lookup_priority (head, task->priority);
857      priority_list_upgrade_task (list, node);
858    }
859  else
860    priority_list_upgrade_task (&head->l, node);
861}
862
863/* Given a CHILD_TASK in LIST that is about to be executed, move it out of
864   the way in LIST so that other tasks can be considered for
865   execution.  LIST contains tasks of type TYPE.
866
867   Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
868   if applicable.  */
869
870static void inline
871priority_list_downgrade_task (enum priority_queue_type type,
872			      struct priority_list *list,
873			      struct gomp_task *child_task)
874{
875  struct priority_node *node = task_to_priority_node (type, child_task);
876  if (list->tasks == node)
877    list->tasks = node->next;
878  else if (node->next != list->tasks)
879    {
880      /* The task in NODE is about to become TIED and TIED tasks
881	 cannot come before WAITING tasks.  If we're about to
882	 leave the queue in such an indeterminate state, rewire
883	 things appropriately.  However, a TIED task at the end is
884	 perfectly fine.  */
885      struct gomp_task *next_task = priority_node_to_task (type, node->next);
886      if (next_task->kind == GOMP_TASK_WAITING)
887	{
888	  /* Remove from list.  */
889	  node->prev->next = node->next;
890	  node->next->prev = node->prev;
891	  /* Rewire at the end.  */
892	  node->next = list->tasks;
893	  node->prev = list->tasks->prev;
894	  list->tasks->prev->next = node;
895	  list->tasks->prev = node;
896	}
897    }
898
899  /* If the current task is the last_parent_depends_on for its
900     priority, adjust last_parent_depends_on appropriately.  */
901  if (__builtin_expect (child_task->parent_depends_on, 0)
902      && list->last_parent_depends_on == node)
903    {
904      struct gomp_task *prev_child = priority_node_to_task (type, node->prev);
905      if (node->prev != node
906	  && prev_child->kind == GOMP_TASK_WAITING
907	  && prev_child->parent_depends_on)
908	list->last_parent_depends_on = node->prev;
909      else
910	{
911	  /* There are no more parent_depends_on entries waiting
912	     to run, clear the list.  */
913	  list->last_parent_depends_on = NULL;
914	}
915    }
916}
917
918/* Given a TASK in HEAD that is about to be executed, move it out of
919   the way so that other tasks can be considered for execution.  HEAD
920   contains tasks of type TYPE.
921
922   Care is taken to update the queue's LAST_PARENT_DEPENDS_ON field
923   if applicable.
924
925   (This function could be defined in priority_queue.c, but we want it
926   inlined, and putting it in priority_queue.h is not an option, given
927   that gomp_task has not been properly defined at that point).  */
928
929static void inline
930priority_queue_downgrade_task (enum priority_queue_type type,
931			       struct priority_queue *head,
932			       struct gomp_task *task)
933{
934#if _LIBGOMP_CHECKING_
935  if (!priority_queue_task_in_queue_p (type, head, task))
936    gomp_fatal ("Attempt to downgrade missing task %p", task);
937#endif
938  if (priority_queue_multi_p (head))
939    {
940      struct priority_list *list
941	= priority_queue_lookup_priority (head, task->priority);
942      priority_list_downgrade_task (type, list, task);
943    }
944  else
945    priority_list_downgrade_task (type, &head->l, task);
946}
947
948/* Setup CHILD_TASK to execute.  This is done by setting the task to
949   TIED, and updating all relevant queues so that CHILD_TASK is no
950   longer chosen for scheduling.  Also, remove CHILD_TASK from the
951   overall team task queue entirely.
952
953   Return TRUE if task or its containing taskgroup has been
954   cancelled.  */
955
956static inline bool
957gomp_task_run_pre (struct gomp_task *child_task, struct gomp_task *parent,
958		   struct gomp_team *team)
959{
960#if _LIBGOMP_CHECKING_
961  if (child_task->parent)
962    priority_queue_verify (PQ_CHILDREN,
963			   &child_task->parent->children_queue, true);
964  if (child_task->taskgroup)
965    priority_queue_verify (PQ_TASKGROUP,
966			   &child_task->taskgroup->taskgroup_queue, false);
967  priority_queue_verify (PQ_TEAM, &team->task_queue, false);
968#endif
969
970  /* Task is about to go tied, move it out of the way.  */
971  if (parent)
972    priority_queue_downgrade_task (PQ_CHILDREN, &parent->children_queue,
973				   child_task);
974
975  /* Task is about to go tied, move it out of the way.  */
976  struct gomp_taskgroup *taskgroup = child_task->taskgroup;
977  if (taskgroup)
978    priority_queue_downgrade_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
979				   child_task);
980
981  priority_queue_remove (PQ_TEAM, &team->task_queue, child_task,
982			 MEMMODEL_RELAXED);
983  child_task->pnode[PQ_TEAM].next = NULL;
984  child_task->pnode[PQ_TEAM].prev = NULL;
985  child_task->kind = GOMP_TASK_TIED;
986
987  if (--team->task_queued_count == 0)
988    gomp_team_barrier_clear_task_pending (&team->barrier);
989  if ((gomp_team_barrier_cancelled (&team->barrier)
990       || (taskgroup && taskgroup->cancelled))
991      && !child_task->copy_ctors_done)
992    return true;
993  return false;
994}
995
996static void
997gomp_task_run_post_handle_depend_hash (struct gomp_task *child_task)
998{
999  struct gomp_task *parent = child_task->parent;
1000  size_t i;
1001
1002  for (i = 0; i < child_task->depend_count; i++)
1003    if (!child_task->depend[i].redundant)
1004      {
1005	if (child_task->depend[i].next)
1006	  child_task->depend[i].next->prev = child_task->depend[i].prev;
1007	if (child_task->depend[i].prev)
1008	  child_task->depend[i].prev->next = child_task->depend[i].next;
1009	else
1010	  {
1011	    hash_entry_type *slot
1012	      = htab_find_slot (&parent->depend_hash, &child_task->depend[i],
1013				NO_INSERT);
1014	    if (*slot != &child_task->depend[i])
1015	      abort ();
1016	    if (child_task->depend[i].next)
1017	      *slot = child_task->depend[i].next;
1018	    else
1019	      htab_clear_slot (parent->depend_hash, slot);
1020	  }
1021      }
1022}
1023
1024/* After a CHILD_TASK has been run, adjust the dependency queue for
1025   each task that depends on CHILD_TASK, to record the fact that there
1026   is one less dependency to worry about.  If a task that depended on
1027   CHILD_TASK now has no dependencies, place it in the various queues
1028   so it gets scheduled to run.
1029
1030   TEAM is the team to which CHILD_TASK belongs to.  */
1031
1032static size_t
1033gomp_task_run_post_handle_dependers (struct gomp_task *child_task,
1034				     struct gomp_team *team)
1035{
1036  struct gomp_task *parent = child_task->parent;
1037  size_t i, count = child_task->dependers->n_elem, ret = 0;
1038  for (i = 0; i < count; i++)
1039    {
1040      struct gomp_task *task = child_task->dependers->elem[i];
1041
1042      /* CHILD_TASK satisfies a dependency for TASK.  Keep track of
1043	 TASK's remaining dependencies.  Once TASK has no other
1044	 depenencies, put it into the various queues so it will get
1045	 scheduled for execution.  */
1046      if (--task->num_dependees != 0)
1047	continue;
1048
1049      struct gomp_taskgroup *taskgroup = task->taskgroup;
1050      if (parent)
1051	{
1052	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
1053				 task, task->priority,
1054				 PRIORITY_INSERT_BEGIN,
1055				 /*adjust_parent_depends_on=*/true,
1056				 task->parent_depends_on);
1057	  if (parent->taskwait)
1058	    {
1059	      if (parent->taskwait->in_taskwait)
1060		{
1061		  /* One more task has had its dependencies met.
1062		     Inform any waiters.  */
1063		  parent->taskwait->in_taskwait = false;
1064		  gomp_sem_post (&parent->taskwait->taskwait_sem);
1065		}
1066	      else if (parent->taskwait->in_depend_wait)
1067		{
1068		  /* One more task has had its dependencies met.
1069		     Inform any waiters.  */
1070		  parent->taskwait->in_depend_wait = false;
1071		  gomp_sem_post (&parent->taskwait->taskwait_sem);
1072		}
1073	    }
1074	}
1075      if (taskgroup)
1076	{
1077	  priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1078				 task, task->priority,
1079				 PRIORITY_INSERT_BEGIN,
1080				 /*adjust_parent_depends_on=*/false,
1081				 task->parent_depends_on);
1082	  if (taskgroup->in_taskgroup_wait)
1083	    {
1084	      /* One more task has had its dependencies met.
1085		 Inform any waiters.  */
1086	      taskgroup->in_taskgroup_wait = false;
1087	      gomp_sem_post (&taskgroup->taskgroup_sem);
1088	    }
1089	}
1090      priority_queue_insert (PQ_TEAM, &team->task_queue,
1091			     task, task->priority,
1092			     PRIORITY_INSERT_END,
1093			     /*adjust_parent_depends_on=*/false,
1094			     task->parent_depends_on);
1095      ++team->task_count;
1096      ++team->task_queued_count;
1097      ++ret;
1098    }
1099  free (child_task->dependers);
1100  child_task->dependers = NULL;
1101  if (ret > 1)
1102    gomp_team_barrier_set_task_pending (&team->barrier);
1103  return ret;
1104}
1105
1106static inline size_t
1107gomp_task_run_post_handle_depend (struct gomp_task *child_task,
1108				  struct gomp_team *team)
1109{
1110  if (child_task->depend_count == 0)
1111    return 0;
1112
1113  /* If parent is gone already, the hash table is freed and nothing
1114     will use the hash table anymore, no need to remove anything from it.  */
1115  if (child_task->parent != NULL)
1116    gomp_task_run_post_handle_depend_hash (child_task);
1117
1118  if (child_task->dependers == NULL)
1119    return 0;
1120
1121  return gomp_task_run_post_handle_dependers (child_task, team);
1122}
1123
1124/* Remove CHILD_TASK from its parent.  */
1125
1126static inline void
1127gomp_task_run_post_remove_parent (struct gomp_task *child_task)
1128{
1129  struct gomp_task *parent = child_task->parent;
1130  if (parent == NULL)
1131    return;
1132
1133  /* If this was the last task the parent was depending on,
1134     synchronize with gomp_task_maybe_wait_for_dependencies so it can
1135     clean up and return.  */
1136  if (__builtin_expect (child_task->parent_depends_on, 0)
1137      && --parent->taskwait->n_depend == 0
1138      && parent->taskwait->in_depend_wait)
1139    {
1140      parent->taskwait->in_depend_wait = false;
1141      gomp_sem_post (&parent->taskwait->taskwait_sem);
1142    }
1143
1144  if (priority_queue_remove (PQ_CHILDREN, &parent->children_queue,
1145			     child_task, MEMMODEL_RELEASE)
1146      && parent->taskwait && parent->taskwait->in_taskwait)
1147    {
1148      parent->taskwait->in_taskwait = false;
1149      gomp_sem_post (&parent->taskwait->taskwait_sem);
1150    }
1151  child_task->pnode[PQ_CHILDREN].next = NULL;
1152  child_task->pnode[PQ_CHILDREN].prev = NULL;
1153}
1154
1155/* Remove CHILD_TASK from its taskgroup.  */
1156
1157static inline void
1158gomp_task_run_post_remove_taskgroup (struct gomp_task *child_task)
1159{
1160  struct gomp_taskgroup *taskgroup = child_task->taskgroup;
1161  if (taskgroup == NULL)
1162    return;
1163  bool empty = priority_queue_remove (PQ_TASKGROUP,
1164				      &taskgroup->taskgroup_queue,
1165				      child_task, MEMMODEL_RELAXED);
1166  child_task->pnode[PQ_TASKGROUP].next = NULL;
1167  child_task->pnode[PQ_TASKGROUP].prev = NULL;
1168  if (taskgroup->num_children > 1)
1169    --taskgroup->num_children;
1170  else
1171    {
1172      /* We access taskgroup->num_children in GOMP_taskgroup_end
1173	 outside of the task lock mutex region, so
1174	 need a release barrier here to ensure memory
1175	 written by child_task->fn above is flushed
1176	 before the NULL is written.  */
1177      __atomic_store_n (&taskgroup->num_children, 0, MEMMODEL_RELEASE);
1178    }
1179  if (empty && taskgroup->in_taskgroup_wait)
1180    {
1181      taskgroup->in_taskgroup_wait = false;
1182      gomp_sem_post (&taskgroup->taskgroup_sem);
1183    }
1184}
1185
1186void
1187gomp_barrier_handle_tasks (gomp_barrier_state_t state)
1188{
1189  struct gomp_thread *thr = gomp_thread ();
1190  struct gomp_team *team = thr->ts.team;
1191  struct gomp_task *task = thr->task;
1192  struct gomp_task *child_task = NULL;
1193  struct gomp_task *to_free = NULL;
1194  int do_wake = 0;
1195
1196  gomp_mutex_lock (&team->task_lock);
1197  if (gomp_barrier_last_thread (state))
1198    {
1199      if (team->task_count == 0)
1200	{
1201	  gomp_team_barrier_done (&team->barrier, state);
1202	  gomp_mutex_unlock (&team->task_lock);
1203	  gomp_team_barrier_wake (&team->barrier, 0);
1204	  return;
1205	}
1206      gomp_team_barrier_set_waiting_for_tasks (&team->barrier);
1207    }
1208
1209  while (1)
1210    {
1211      bool cancelled = false;
1212      if (!priority_queue_empty_p (&team->task_queue, MEMMODEL_RELAXED))
1213	{
1214	  bool ignored;
1215	  child_task
1216	    = priority_queue_next_task (PQ_TEAM, &team->task_queue,
1217					PQ_IGNORED, NULL,
1218					&ignored);
1219	  cancelled = gomp_task_run_pre (child_task, child_task->parent,
1220					 team);
1221	  if (__builtin_expect (cancelled, 0))
1222	    {
1223	      if (to_free)
1224		{
1225		  gomp_finish_task (to_free);
1226		  free (to_free);
1227		  to_free = NULL;
1228		}
1229	      goto finish_cancelled;
1230	    }
1231	  team->task_running_count++;
1232	  child_task->in_tied_task = true;
1233	}
1234      gomp_mutex_unlock (&team->task_lock);
1235      if (do_wake)
1236	{
1237	  gomp_team_barrier_wake (&team->barrier, do_wake);
1238	  do_wake = 0;
1239	}
1240      if (to_free)
1241	{
1242	  gomp_finish_task (to_free);
1243	  free (to_free);
1244	  to_free = NULL;
1245	}
1246      if (child_task)
1247	{
1248	  thr->task = child_task;
1249	  if (__builtin_expect (child_task->fn == NULL, 0))
1250	    {
1251	      if (gomp_target_task_fn (child_task->fn_data))
1252		{
1253		  thr->task = task;
1254		  gomp_mutex_lock (&team->task_lock);
1255		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1256		  team->task_running_count--;
1257		  struct gomp_target_task *ttask
1258		    = (struct gomp_target_task *) child_task->fn_data;
1259		  /* If GOMP_PLUGIN_target_task_completion has run already
1260		     in between gomp_target_task_fn and the mutex lock,
1261		     perform the requeuing here.  */
1262		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1263		    gomp_target_task_completion (team, child_task);
1264		  else
1265		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1266		  child_task = NULL;
1267		  continue;
1268		}
1269	    }
1270	  else
1271	    child_task->fn (child_task->fn_data);
1272	  thr->task = task;
1273	}
1274      else
1275	return;
1276      gomp_mutex_lock (&team->task_lock);
1277      if (child_task)
1278	{
1279	 finish_cancelled:;
1280	  size_t new_tasks
1281	    = gomp_task_run_post_handle_depend (child_task, team);
1282	  gomp_task_run_post_remove_parent (child_task);
1283	  gomp_clear_parent (&child_task->children_queue);
1284	  gomp_task_run_post_remove_taskgroup (child_task);
1285	  to_free = child_task;
1286	  child_task = NULL;
1287	  if (!cancelled)
1288	    team->task_running_count--;
1289	  if (new_tasks > 1)
1290	    {
1291	      do_wake = team->nthreads - team->task_running_count;
1292	      if (do_wake > new_tasks)
1293		do_wake = new_tasks;
1294	    }
1295	  if (--team->task_count == 0
1296	      && gomp_team_barrier_waiting_for_tasks (&team->barrier))
1297	    {
1298	      gomp_team_barrier_done (&team->barrier, state);
1299	      gomp_mutex_unlock (&team->task_lock);
1300	      gomp_team_barrier_wake (&team->barrier, 0);
1301	      gomp_mutex_lock (&team->task_lock);
1302	    }
1303	}
1304    }
1305}
1306
1307/* Called when encountering a taskwait directive.
1308
1309   Wait for all children of the current task.  */
1310
1311void
1312GOMP_taskwait (void)
1313{
1314  struct gomp_thread *thr = gomp_thread ();
1315  struct gomp_team *team = thr->ts.team;
1316  struct gomp_task *task = thr->task;
1317  struct gomp_task *child_task = NULL;
1318  struct gomp_task *to_free = NULL;
1319  struct gomp_taskwait taskwait;
1320  int do_wake = 0;
1321
1322  /* The acquire barrier on load of task->children here synchronizes
1323     with the write of a NULL in gomp_task_run_post_remove_parent.  It is
1324     not necessary that we synchronize with other non-NULL writes at
1325     this point, but we must ensure that all writes to memory by a
1326     child thread task work function are seen before we exit from
1327     GOMP_taskwait.  */
1328  if (task == NULL
1329      || priority_queue_empty_p (&task->children_queue, MEMMODEL_ACQUIRE))
1330    return;
1331
1332  memset (&taskwait, 0, sizeof (taskwait));
1333  bool child_q = false;
1334  gomp_mutex_lock (&team->task_lock);
1335  while (1)
1336    {
1337      bool cancelled = false;
1338      if (priority_queue_empty_p (&task->children_queue, MEMMODEL_RELAXED))
1339	{
1340	  bool destroy_taskwait = task->taskwait != NULL;
1341	  task->taskwait = NULL;
1342	  gomp_mutex_unlock (&team->task_lock);
1343	  if (to_free)
1344	    {
1345	      gomp_finish_task (to_free);
1346	      free (to_free);
1347	    }
1348	  if (destroy_taskwait)
1349	    gomp_sem_destroy (&taskwait.taskwait_sem);
1350	  return;
1351	}
1352      struct gomp_task *next_task
1353	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1354				    PQ_TEAM, &team->task_queue, &child_q);
1355      if (next_task->kind == GOMP_TASK_WAITING)
1356	{
1357	  child_task = next_task;
1358	  cancelled
1359	    = gomp_task_run_pre (child_task, task, team);
1360	  if (__builtin_expect (cancelled, 0))
1361	    {
1362	      if (to_free)
1363		{
1364		  gomp_finish_task (to_free);
1365		  free (to_free);
1366		  to_free = NULL;
1367		}
1368	      goto finish_cancelled;
1369	    }
1370	}
1371      else
1372	{
1373	/* All tasks we are waiting for are either running in other
1374	   threads, or they are tasks that have not had their
1375	   dependencies met (so they're not even in the queue).  Wait
1376	   for them.  */
1377	  if (task->taskwait == NULL)
1378	    {
1379	      taskwait.in_depend_wait = false;
1380	      gomp_sem_init (&taskwait.taskwait_sem, 0);
1381	      task->taskwait = &taskwait;
1382	    }
1383	  taskwait.in_taskwait = true;
1384	}
1385      gomp_mutex_unlock (&team->task_lock);
1386      if (do_wake)
1387	{
1388	  gomp_team_barrier_wake (&team->barrier, do_wake);
1389	  do_wake = 0;
1390	}
1391      if (to_free)
1392	{
1393	  gomp_finish_task (to_free);
1394	  free (to_free);
1395	  to_free = NULL;
1396	}
1397      if (child_task)
1398	{
1399	  thr->task = child_task;
1400	  if (__builtin_expect (child_task->fn == NULL, 0))
1401	    {
1402	      if (gomp_target_task_fn (child_task->fn_data))
1403		{
1404		  thr->task = task;
1405		  gomp_mutex_lock (&team->task_lock);
1406		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1407		  struct gomp_target_task *ttask
1408		    = (struct gomp_target_task *) child_task->fn_data;
1409		  /* If GOMP_PLUGIN_target_task_completion has run already
1410		     in between gomp_target_task_fn and the mutex lock,
1411		     perform the requeuing here.  */
1412		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1413		    gomp_target_task_completion (team, child_task);
1414		  else
1415		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1416		  child_task = NULL;
1417		  continue;
1418		}
1419	    }
1420	  else
1421	    child_task->fn (child_task->fn_data);
1422	  thr->task = task;
1423	}
1424      else
1425	gomp_sem_wait (&taskwait.taskwait_sem);
1426      gomp_mutex_lock (&team->task_lock);
1427      if (child_task)
1428	{
1429	 finish_cancelled:;
1430	  size_t new_tasks
1431	    = gomp_task_run_post_handle_depend (child_task, team);
1432
1433	  if (child_q)
1434	    {
1435	      priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1436				     child_task, MEMMODEL_RELAXED);
1437	      child_task->pnode[PQ_CHILDREN].next = NULL;
1438	      child_task->pnode[PQ_CHILDREN].prev = NULL;
1439	    }
1440
1441	  gomp_clear_parent (&child_task->children_queue);
1442
1443	  gomp_task_run_post_remove_taskgroup (child_task);
1444
1445	  to_free = child_task;
1446	  child_task = NULL;
1447	  team->task_count--;
1448	  if (new_tasks > 1)
1449	    {
1450	      do_wake = team->nthreads - team->task_running_count
1451			- !task->in_tied_task;
1452	      if (do_wake > new_tasks)
1453		do_wake = new_tasks;
1454	    }
1455	}
1456    }
1457}
1458
1459/* An undeferred task is about to run.  Wait for all tasks that this
1460   undeferred task depends on.
1461
1462   This is done by first putting all known ready dependencies
1463   (dependencies that have their own dependencies met) at the top of
1464   the scheduling queues.  Then we iterate through these imminently
1465   ready tasks (and possibly other high priority tasks), and run them.
1466   If we run out of ready dependencies to execute, we either wait for
1467   the reamining dependencies to finish, or wait for them to get
1468   scheduled so we can run them.
1469
1470   DEPEND is as in GOMP_task.  */
1471
1472void
1473gomp_task_maybe_wait_for_dependencies (void **depend)
1474{
1475  struct gomp_thread *thr = gomp_thread ();
1476  struct gomp_task *task = thr->task;
1477  struct gomp_team *team = thr->ts.team;
1478  struct gomp_task_depend_entry elem, *ent = NULL;
1479  struct gomp_taskwait taskwait;
1480  size_t ndepend = (uintptr_t) depend[0];
1481  size_t nout = (uintptr_t) depend[1];
1482  size_t i;
1483  size_t num_awaited = 0;
1484  struct gomp_task *child_task = NULL;
1485  struct gomp_task *to_free = NULL;
1486  int do_wake = 0;
1487
1488  gomp_mutex_lock (&team->task_lock);
1489  for (i = 0; i < ndepend; i++)
1490    {
1491      elem.addr = depend[i + 2];
1492      ent = htab_find (task->depend_hash, &elem);
1493      for (; ent; ent = ent->next)
1494	if (i >= nout && ent->is_in)
1495	  continue;
1496	else
1497	  {
1498	    struct gomp_task *tsk = ent->task;
1499	    if (!tsk->parent_depends_on)
1500	      {
1501		tsk->parent_depends_on = true;
1502		++num_awaited;
1503		/* If depenency TSK itself has no dependencies and is
1504		   ready to run, move it up front so that we run it as
1505		   soon as possible.  */
1506		if (tsk->num_dependees == 0 && tsk->kind == GOMP_TASK_WAITING)
1507		  priority_queue_upgrade_task (tsk, task);
1508	      }
1509	  }
1510    }
1511  if (num_awaited == 0)
1512    {
1513      gomp_mutex_unlock (&team->task_lock);
1514      return;
1515    }
1516
1517  memset (&taskwait, 0, sizeof (taskwait));
1518  taskwait.n_depend = num_awaited;
1519  gomp_sem_init (&taskwait.taskwait_sem, 0);
1520  task->taskwait = &taskwait;
1521
1522  while (1)
1523    {
1524      bool cancelled = false;
1525      if (taskwait.n_depend == 0)
1526	{
1527	  task->taskwait = NULL;
1528	  gomp_mutex_unlock (&team->task_lock);
1529	  if (to_free)
1530	    {
1531	      gomp_finish_task (to_free);
1532	      free (to_free);
1533	    }
1534	  gomp_sem_destroy (&taskwait.taskwait_sem);
1535	  return;
1536	}
1537
1538      /* Theoretically when we have multiple priorities, we should
1539	 chose between the highest priority item in
1540	 task->children_queue and team->task_queue here, so we should
1541	 use priority_queue_next_task().  However, since we are
1542	 running an undeferred task, perhaps that makes all tasks it
1543	 depends on undeferred, thus a priority of INF?  This would
1544	 make it unnecessary to take anything into account here,
1545	 but the dependencies.
1546
1547	 On the other hand, if we want to use priority_queue_next_task(),
1548	 care should be taken to only use priority_queue_remove()
1549	 below if the task was actually removed from the children
1550	 queue.  */
1551      bool ignored;
1552      struct gomp_task *next_task
1553	= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1554				    PQ_IGNORED, NULL, &ignored);
1555
1556      if (next_task->kind == GOMP_TASK_WAITING)
1557	{
1558	  child_task = next_task;
1559	  cancelled
1560	    = gomp_task_run_pre (child_task, task, team);
1561	  if (__builtin_expect (cancelled, 0))
1562	    {
1563	      if (to_free)
1564		{
1565		  gomp_finish_task (to_free);
1566		  free (to_free);
1567		  to_free = NULL;
1568		}
1569	      goto finish_cancelled;
1570	    }
1571	}
1572      else
1573	/* All tasks we are waiting for are either running in other
1574	   threads, or they are tasks that have not had their
1575	   dependencies met (so they're not even in the queue).  Wait
1576	   for them.  */
1577	taskwait.in_depend_wait = true;
1578      gomp_mutex_unlock (&team->task_lock);
1579      if (do_wake)
1580	{
1581	  gomp_team_barrier_wake (&team->barrier, do_wake);
1582	  do_wake = 0;
1583	}
1584      if (to_free)
1585	{
1586	  gomp_finish_task (to_free);
1587	  free (to_free);
1588	  to_free = NULL;
1589	}
1590      if (child_task)
1591	{
1592	  thr->task = child_task;
1593	  if (__builtin_expect (child_task->fn == NULL, 0))
1594	    {
1595	      if (gomp_target_task_fn (child_task->fn_data))
1596		{
1597		  thr->task = task;
1598		  gomp_mutex_lock (&team->task_lock);
1599		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1600		  struct gomp_target_task *ttask
1601		    = (struct gomp_target_task *) child_task->fn_data;
1602		  /* If GOMP_PLUGIN_target_task_completion has run already
1603		     in between gomp_target_task_fn and the mutex lock,
1604		     perform the requeuing here.  */
1605		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1606		    gomp_target_task_completion (team, child_task);
1607		  else
1608		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1609		  child_task = NULL;
1610		  continue;
1611		}
1612	    }
1613	  else
1614	    child_task->fn (child_task->fn_data);
1615	  thr->task = task;
1616	}
1617      else
1618	gomp_sem_wait (&taskwait.taskwait_sem);
1619      gomp_mutex_lock (&team->task_lock);
1620      if (child_task)
1621	{
1622	 finish_cancelled:;
1623	  size_t new_tasks
1624	    = gomp_task_run_post_handle_depend (child_task, team);
1625	  if (child_task->parent_depends_on)
1626	    --taskwait.n_depend;
1627
1628	  priority_queue_remove (PQ_CHILDREN, &task->children_queue,
1629				 child_task, MEMMODEL_RELAXED);
1630	  child_task->pnode[PQ_CHILDREN].next = NULL;
1631	  child_task->pnode[PQ_CHILDREN].prev = NULL;
1632
1633	  gomp_clear_parent (&child_task->children_queue);
1634	  gomp_task_run_post_remove_taskgroup (child_task);
1635	  to_free = child_task;
1636	  child_task = NULL;
1637	  team->task_count--;
1638	  if (new_tasks > 1)
1639	    {
1640	      do_wake = team->nthreads - team->task_running_count
1641			- !task->in_tied_task;
1642	      if (do_wake > new_tasks)
1643		do_wake = new_tasks;
1644	    }
1645	}
1646    }
1647}
1648
1649/* Called when encountering a taskyield directive.  */
1650
1651void
1652GOMP_taskyield (void)
1653{
1654  /* Nothing at the moment.  */
1655}
1656
1657void
1658GOMP_taskgroup_start (void)
1659{
1660  struct gomp_thread *thr = gomp_thread ();
1661  struct gomp_team *team = thr->ts.team;
1662  struct gomp_task *task = thr->task;
1663  struct gomp_taskgroup *taskgroup;
1664
1665  /* If team is NULL, all tasks are executed as
1666     GOMP_TASK_UNDEFERRED tasks and thus all children tasks of
1667     taskgroup and their descendant tasks will be finished
1668     by the time GOMP_taskgroup_end is called.  */
1669  if (team == NULL)
1670    return;
1671  taskgroup = gomp_malloc (sizeof (struct gomp_taskgroup));
1672  taskgroup->prev = task->taskgroup;
1673  priority_queue_init (&taskgroup->taskgroup_queue);
1674  taskgroup->in_taskgroup_wait = false;
1675  taskgroup->cancelled = false;
1676  taskgroup->num_children = 0;
1677  gomp_sem_init (&taskgroup->taskgroup_sem, 0);
1678  task->taskgroup = taskgroup;
1679}
1680
1681void
1682GOMP_taskgroup_end (void)
1683{
1684  struct gomp_thread *thr = gomp_thread ();
1685  struct gomp_team *team = thr->ts.team;
1686  struct gomp_task *task = thr->task;
1687  struct gomp_taskgroup *taskgroup;
1688  struct gomp_task *child_task = NULL;
1689  struct gomp_task *to_free = NULL;
1690  int do_wake = 0;
1691
1692  if (team == NULL)
1693    return;
1694  taskgroup = task->taskgroup;
1695  if (__builtin_expect (taskgroup == NULL, 0)
1696      && thr->ts.level == 0)
1697    {
1698      /* This can happen if GOMP_taskgroup_start is called when
1699	 thr->ts.team == NULL, but inside of the taskgroup there
1700	 is #pragma omp target nowait that creates an implicit
1701	 team with a single thread.  In this case, we want to wait
1702	 for all outstanding tasks in this team.  */
1703      gomp_team_barrier_wait (&team->barrier);
1704      return;
1705    }
1706
1707  /* The acquire barrier on load of taskgroup->num_children here
1708     synchronizes with the write of 0 in gomp_task_run_post_remove_taskgroup.
1709     It is not necessary that we synchronize with other non-0 writes at
1710     this point, but we must ensure that all writes to memory by a
1711     child thread task work function are seen before we exit from
1712     GOMP_taskgroup_end.  */
1713  if (__atomic_load_n (&taskgroup->num_children, MEMMODEL_ACQUIRE) == 0)
1714    goto finish;
1715
1716  bool unused;
1717  gomp_mutex_lock (&team->task_lock);
1718  while (1)
1719    {
1720      bool cancelled = false;
1721      if (priority_queue_empty_p (&taskgroup->taskgroup_queue,
1722				  MEMMODEL_RELAXED))
1723	{
1724	  if (taskgroup->num_children)
1725	    {
1726	      if (priority_queue_empty_p (&task->children_queue,
1727					  MEMMODEL_RELAXED))
1728		goto do_wait;
1729	      child_task
1730		= priority_queue_next_task (PQ_CHILDREN, &task->children_queue,
1731					    PQ_TEAM, &team->task_queue,
1732					    &unused);
1733	    }
1734	  else
1735	    {
1736	      gomp_mutex_unlock (&team->task_lock);
1737	      if (to_free)
1738		{
1739		  gomp_finish_task (to_free);
1740		  free (to_free);
1741		}
1742	      goto finish;
1743	    }
1744	}
1745      else
1746	child_task
1747	  = priority_queue_next_task (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
1748				      PQ_TEAM, &team->task_queue, &unused);
1749      if (child_task->kind == GOMP_TASK_WAITING)
1750	{
1751	  cancelled
1752	    = gomp_task_run_pre (child_task, child_task->parent, team);
1753	  if (__builtin_expect (cancelled, 0))
1754	    {
1755	      if (to_free)
1756		{
1757		  gomp_finish_task (to_free);
1758		  free (to_free);
1759		  to_free = NULL;
1760		}
1761	      goto finish_cancelled;
1762	    }
1763	}
1764      else
1765	{
1766	  child_task = NULL;
1767	 do_wait:
1768	/* All tasks we are waiting for are either running in other
1769	   threads, or they are tasks that have not had their
1770	   dependencies met (so they're not even in the queue).  Wait
1771	   for them.  */
1772	  taskgroup->in_taskgroup_wait = true;
1773	}
1774      gomp_mutex_unlock (&team->task_lock);
1775      if (do_wake)
1776	{
1777	  gomp_team_barrier_wake (&team->barrier, do_wake);
1778	  do_wake = 0;
1779	}
1780      if (to_free)
1781	{
1782	  gomp_finish_task (to_free);
1783	  free (to_free);
1784	  to_free = NULL;
1785	}
1786      if (child_task)
1787	{
1788	  thr->task = child_task;
1789	  if (__builtin_expect (child_task->fn == NULL, 0))
1790	    {
1791	      if (gomp_target_task_fn (child_task->fn_data))
1792		{
1793		  thr->task = task;
1794		  gomp_mutex_lock (&team->task_lock);
1795		  child_task->kind = GOMP_TASK_ASYNC_RUNNING;
1796		  struct gomp_target_task *ttask
1797		    = (struct gomp_target_task *) child_task->fn_data;
1798		  /* If GOMP_PLUGIN_target_task_completion has run already
1799		     in between gomp_target_task_fn and the mutex lock,
1800		     perform the requeuing here.  */
1801		  if (ttask->state == GOMP_TARGET_TASK_FINISHED)
1802		    gomp_target_task_completion (team, child_task);
1803		  else
1804		    ttask->state = GOMP_TARGET_TASK_RUNNING;
1805		  child_task = NULL;
1806		  continue;
1807		}
1808	    }
1809	  else
1810	    child_task->fn (child_task->fn_data);
1811	  thr->task = task;
1812	}
1813      else
1814	gomp_sem_wait (&taskgroup->taskgroup_sem);
1815      gomp_mutex_lock (&team->task_lock);
1816      if (child_task)
1817	{
1818	 finish_cancelled:;
1819	  size_t new_tasks
1820	    = gomp_task_run_post_handle_depend (child_task, team);
1821	  gomp_task_run_post_remove_parent (child_task);
1822	  gomp_clear_parent (&child_task->children_queue);
1823	  gomp_task_run_post_remove_taskgroup (child_task);
1824	  to_free = child_task;
1825	  child_task = NULL;
1826	  team->task_count--;
1827	  if (new_tasks > 1)
1828	    {
1829	      do_wake = team->nthreads - team->task_running_count
1830			- !task->in_tied_task;
1831	      if (do_wake > new_tasks)
1832		do_wake = new_tasks;
1833	    }
1834	}
1835    }
1836
1837 finish:
1838  task->taskgroup = taskgroup->prev;
1839  gomp_sem_destroy (&taskgroup->taskgroup_sem);
1840  free (taskgroup);
1841}
1842
1843int
1844omp_in_final (void)
1845{
1846  struct gomp_thread *thr = gomp_thread ();
1847  return thr->task && thr->task->final_task;
1848}
1849
1850ialias (omp_in_final)
1851