1/* Copyright (C) 2015-2020 Free Software Foundation, Inc.
2   Contributed by Jakub Jelinek <jakub@redhat.com>.
3
4   This file is part of the GNU Offloading and Multi Processing Library
5   (libgomp).
6
7   Libgomp is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3, or (at your option)
10   any later version.
11
12   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
13   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15   more details.
16
17   Under Section 7 of GPL version 3, you are granted additional
18   permissions described in the GCC Runtime Library Exception, version
19   3.1, as published by the Free Software Foundation.
20
21   You should have received a copy of the GNU General Public License and
22   a copy of the GCC Runtime Library Exception along with this program;
23   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24   <http://www.gnu.org/licenses/>.  */
25
26/* This file handles the taskloop construct.  It is included twice, once
27   for the long and once for unsigned long long variant.  */
28
29/* Called when encountering an explicit task directive.  If IF_CLAUSE is
30   false, then we must not delay in executing the task.  If UNTIED is true,
31   then the task may be executed by any member of the team.  */
32
33void
34GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
35	       long arg_size, long arg_align, unsigned flags,
36	       unsigned long num_tasks, int priority,
37	       TYPE start, TYPE end, TYPE step)
38{
39  struct gomp_thread *thr = gomp_thread ();
40  struct gomp_team *team = thr->ts.team;
41
42#ifdef HAVE_BROKEN_POSIX_SEMAPHORES
43  /* If pthread_mutex_* is used for omp_*lock*, then each task must be
44     tied to one thread all the time.  This means UNTIED tasks must be
45     tied and if CPYFN is non-NULL IF(0) must be forced, as CPYFN
46     might be running on different thread than FN.  */
47  if (cpyfn)
48    flags &= ~GOMP_TASK_FLAG_IF;
49  flags &= ~GOMP_TASK_FLAG_UNTIED;
50#endif
51
52  /* If parallel or taskgroup has been cancelled, don't start new tasks.  */
53  if (team && gomp_team_barrier_cancelled (&team->barrier))
54    {
55    early_return:
56      if ((flags & (GOMP_TASK_FLAG_NOGROUP | GOMP_TASK_FLAG_REDUCTION))
57	  == GOMP_TASK_FLAG_REDUCTION)
58	{
59	  struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
60	  uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
61	  /* Tell callers GOMP_taskgroup_reduction_register has not been
62	     called.  */
63	  ptr[2] = 0;
64	}
65      return;
66    }
67
68#ifdef TYPE_is_long
69  TYPE s = step;
70  if (step > 0)
71    {
72      if (start >= end)
73	goto early_return;
74      s--;
75    }
76  else
77    {
78      if (start <= end)
79	goto early_return;
80      s++;
81    }
82  UTYPE n = (end - start + s) / step;
83#else
84  UTYPE n;
85  if (flags & GOMP_TASK_FLAG_UP)
86    {
87      if (start >= end)
88	goto early_return;
89      n = (end - start + step - 1) / step;
90    }
91  else
92    {
93      if (start <= end)
94	goto early_return;
95      n = (start - end - step - 1) / -step;
96    }
97#endif
98
99  TYPE task_step = step;
100  unsigned long nfirst = n;
101  if (flags & GOMP_TASK_FLAG_GRAINSIZE)
102    {
103      unsigned long grainsize = num_tasks;
104#ifdef TYPE_is_long
105      num_tasks = n / grainsize;
106#else
107      UTYPE ndiv = n / grainsize;
108      num_tasks = ndiv;
109      if (num_tasks != ndiv)
110	num_tasks = ~0UL;
111#endif
112      if (num_tasks <= 1)
113	{
114	  num_tasks = 1;
115	  task_step = end - start;
116	}
117      else if (num_tasks >= grainsize
118#ifndef TYPE_is_long
119	       && num_tasks != ~0UL
120#endif
121	      )
122	{
123	  UTYPE mul = num_tasks * grainsize;
124	  task_step = (TYPE) grainsize * step;
125	  if (mul != n)
126	    {
127	      task_step += step;
128	      nfirst = n - mul - 1;
129	    }
130	}
131      else
132	{
133	  UTYPE div = n / num_tasks;
134	  UTYPE mod = n % num_tasks;
135	  task_step = (TYPE) div * step;
136	  if (mod)
137	    {
138	      task_step += step;
139	      nfirst = mod - 1;
140	    }
141	}
142    }
143  else
144    {
145      if (num_tasks == 0)
146	num_tasks = team ? team->nthreads : 1;
147      if (num_tasks >= n)
148	num_tasks = n;
149      else
150	{
151	  UTYPE div = n / num_tasks;
152	  UTYPE mod = n % num_tasks;
153	  task_step = (TYPE) div * step;
154	  if (mod)
155	    {
156	      task_step += step;
157	      nfirst = mod - 1;
158	    }
159	}
160    }
161
162  if (flags & GOMP_TASK_FLAG_NOGROUP)
163    {
164      if (__builtin_expect (gomp_cancel_var, 0)
165	  && thr->task
166	  && thr->task->taskgroup)
167	{
168	  if (thr->task->taskgroup->cancelled)
169	    return;
170	  if (thr->task->taskgroup->workshare
171	      && thr->task->taskgroup->prev
172	      && thr->task->taskgroup->prev->cancelled)
173	    return;
174	}
175    }
176  else
177    {
178      ialias_call (GOMP_taskgroup_start) ();
179      if (flags & GOMP_TASK_FLAG_REDUCTION)
180	{
181	  struct gomp_data_head { TYPE t1, t2; uintptr_t *ptr; };
182	  uintptr_t *ptr = ((struct gomp_data_head *) data)->ptr;
183	  ialias_call (GOMP_taskgroup_reduction_register) (ptr);
184	}
185    }
186
187  if (priority > gomp_max_task_priority_var)
188    priority = gomp_max_task_priority_var;
189
190  if ((flags & GOMP_TASK_FLAG_IF) == 0 || team == NULL
191      || (thr->task && thr->task->final_task)
192      || team->task_count + num_tasks > 64 * team->nthreads)
193    {
194      unsigned long i;
195      if (__builtin_expect (cpyfn != NULL, 0))
196	{
197	  struct gomp_task task[num_tasks];
198	  struct gomp_task *parent = thr->task;
199	  arg_size = (arg_size + arg_align - 1) & ~(arg_align - 1);
200	  char buf[num_tasks * arg_size + arg_align - 1];
201	  char *arg = (char *) (((uintptr_t) buf + arg_align - 1)
202				& ~(uintptr_t) (arg_align - 1));
203	  char *orig_arg = arg;
204	  for (i = 0; i < num_tasks; i++)
205	    {
206	      gomp_init_task (&task[i], parent, gomp_icv (false));
207	      task[i].priority = priority;
208	      task[i].kind = GOMP_TASK_UNDEFERRED;
209	      task[i].final_task = (thr->task && thr->task->final_task)
210				   || (flags & GOMP_TASK_FLAG_FINAL);
211	      if (thr->task)
212		{
213		  task[i].in_tied_task = thr->task->in_tied_task;
214		  task[i].taskgroup = thr->task->taskgroup;
215		}
216	      thr->task = &task[i];
217	      cpyfn (arg, data);
218	      arg += arg_size;
219	    }
220	  arg = orig_arg;
221	  for (i = 0; i < num_tasks; i++)
222	    {
223	      thr->task = &task[i];
224	      ((TYPE *)arg)[0] = start;
225	      start += task_step;
226	      ((TYPE *)arg)[1] = start;
227	      if (i == nfirst)
228		task_step -= step;
229	      fn (arg);
230	      arg += arg_size;
231	      if (!priority_queue_empty_p (&task[i].children_queue,
232					   MEMMODEL_RELAXED))
233		{
234		  gomp_mutex_lock (&team->task_lock);
235		  gomp_clear_parent (&task[i].children_queue);
236		  gomp_mutex_unlock (&team->task_lock);
237		}
238	      gomp_end_task ();
239	    }
240	}
241      else
242	for (i = 0; i < num_tasks; i++)
243	  {
244	    struct gomp_task task;
245
246	    gomp_init_task (&task, thr->task, gomp_icv (false));
247	    task.priority = priority;
248	    task.kind = GOMP_TASK_UNDEFERRED;
249	    task.final_task = (thr->task && thr->task->final_task)
250			      || (flags & GOMP_TASK_FLAG_FINAL);
251	    if (thr->task)
252	      {
253		task.in_tied_task = thr->task->in_tied_task;
254		task.taskgroup = thr->task->taskgroup;
255	      }
256	    thr->task = &task;
257	    ((TYPE *)data)[0] = start;
258	    start += task_step;
259	    ((TYPE *)data)[1] = start;
260	    if (i == nfirst)
261	      task_step -= step;
262	    fn (data);
263	    if (!priority_queue_empty_p (&task.children_queue,
264					 MEMMODEL_RELAXED))
265	      {
266		gomp_mutex_lock (&team->task_lock);
267		gomp_clear_parent (&task.children_queue);
268		gomp_mutex_unlock (&team->task_lock);
269	      }
270	    gomp_end_task ();
271	  }
272    }
273  else
274    {
275      struct gomp_task *tasks[num_tasks];
276      struct gomp_task *parent = thr->task;
277      struct gomp_taskgroup *taskgroup = parent->taskgroup;
278      char *arg;
279      int do_wake;
280      unsigned long i;
281
282      for (i = 0; i < num_tasks; i++)
283	{
284	  struct gomp_task *task
285	    = gomp_malloc (sizeof (*task) + arg_size + arg_align - 1);
286	  tasks[i] = task;
287	  arg = (char *) (((uintptr_t) (task + 1) + arg_align - 1)
288			  & ~(uintptr_t) (arg_align - 1));
289	  gomp_init_task (task, parent, gomp_icv (false));
290	  task->priority = priority;
291	  task->kind = GOMP_TASK_UNDEFERRED;
292	  task->in_tied_task = parent->in_tied_task;
293	  task->taskgroup = taskgroup;
294	  thr->task = task;
295	  if (cpyfn)
296	    {
297	      cpyfn (arg, data);
298	      task->copy_ctors_done = true;
299	    }
300	  else
301	    memcpy (arg, data, arg_size);
302	  ((TYPE *)arg)[0] = start;
303	  start += task_step;
304	  ((TYPE *)arg)[1] = start;
305	  if (i == nfirst)
306	    task_step -= step;
307	  thr->task = parent;
308	  task->kind = GOMP_TASK_WAITING;
309	  task->fn = fn;
310	  task->fn_data = arg;
311	  task->final_task = (flags & GOMP_TASK_FLAG_FINAL) >> 1;
312	}
313      gomp_mutex_lock (&team->task_lock);
314      /* If parallel or taskgroup has been cancelled, don't start new
315	 tasks.  */
316      if (__builtin_expect (gomp_cancel_var, 0)
317	  && cpyfn == NULL)
318	{
319	  if (gomp_team_barrier_cancelled (&team->barrier))
320	    {
321	    do_cancel:
322	      gomp_mutex_unlock (&team->task_lock);
323	      for (i = 0; i < num_tasks; i++)
324		{
325		  gomp_finish_task (tasks[i]);
326		  free (tasks[i]);
327		}
328	      if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
329		ialias_call (GOMP_taskgroup_end) ();
330	      return;
331	    }
332	  if (taskgroup)
333	    {
334	      if (taskgroup->cancelled)
335		goto do_cancel;
336	      if (taskgroup->workshare
337		  && taskgroup->prev
338		  && taskgroup->prev->cancelled)
339		goto do_cancel;
340	    }
341	}
342      if (taskgroup)
343	taskgroup->num_children += num_tasks;
344      for (i = 0; i < num_tasks; i++)
345	{
346	  struct gomp_task *task = tasks[i];
347	  priority_queue_insert (PQ_CHILDREN, &parent->children_queue,
348				 task, priority,
349				 PRIORITY_INSERT_BEGIN,
350				 /*last_parent_depends_on=*/false,
351				 task->parent_depends_on);
352	  if (taskgroup)
353	    priority_queue_insert (PQ_TASKGROUP, &taskgroup->taskgroup_queue,
354				   task, priority, PRIORITY_INSERT_BEGIN,
355				   /*last_parent_depends_on=*/false,
356				   task->parent_depends_on);
357	  priority_queue_insert (PQ_TEAM, &team->task_queue, task, priority,
358				 PRIORITY_INSERT_END,
359				 /*last_parent_depends_on=*/false,
360				 task->parent_depends_on);
361	  ++team->task_count;
362	  ++team->task_queued_count;
363	}
364      gomp_team_barrier_set_task_pending (&team->barrier);
365      if (team->task_running_count + !parent->in_tied_task
366	  < team->nthreads)
367	{
368	  do_wake = team->nthreads - team->task_running_count
369		    - !parent->in_tied_task;
370	  if ((unsigned long) do_wake > num_tasks)
371	    do_wake = num_tasks;
372	}
373      else
374	do_wake = 0;
375      gomp_mutex_unlock (&team->task_lock);
376      if (do_wake)
377	gomp_team_barrier_wake (&team->barrier, do_wake);
378    }
379  if ((flags & GOMP_TASK_FLAG_NOGROUP) == 0)
380    ialias_call (GOMP_taskgroup_end) ();
381}
382