omp-expand.c revision 1.3
1/* Expansion pass for OMP directives.  Outlines regions of certain OMP
2   directives to separate functions, converts others into explicit calls to the
3   runtime library (libgomp) and so forth
4
5Copyright (C) 2005-2018 Free Software Foundation, Inc.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
55#include "symbol-summary.h"
56#include "gomp-constants.h"
57#include "gimple-pretty-print.h"
58#include "hsa-common.h"
59#include "stringpool.h"
60#include "attribs.h"
61
62/* OMP region information.  Every parallel and workshare
63   directive is enclosed between two markers, the OMP_* directive
64   and a corresponding GIMPLE_OMP_RETURN statement.  */
65
66struct omp_region
67{
68  /* The enclosing region.  */
69  struct omp_region *outer;
70
71  /* First child region.  */
72  struct omp_region *inner;
73
74  /* Next peer region.  */
75  struct omp_region *next;
76
77  /* Block containing the omp directive as its last stmt.  */
78  basic_block entry;
79
80  /* Block containing the GIMPLE_OMP_RETURN as its last stmt.  */
81  basic_block exit;
82
83  /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt.  */
84  basic_block cont;
85
86  /* If this is a combined parallel+workshare region, this is a list
87     of additional arguments needed by the combined parallel+workshare
88     library call.  */
89  vec<tree, va_gc> *ws_args;
90
91  /* The code for the omp directive of this region.  */
92  enum gimple_code type;
93
94  /* Schedule kind, only used for GIMPLE_OMP_FOR type regions.  */
95  enum omp_clause_schedule_kind sched_kind;
96
97  /* Schedule modifiers.  */
98  unsigned char sched_modifiers;
99
100  /* True if this is a combined parallel+workshare region.  */
101  bool is_combined_parallel;
102
103  /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104     a depend clause.  */
105  gomp_ordered *ord_stmt;
106};
107
108static struct omp_region *root_omp_region;
109static bool omp_any_child_fn_dumped;
110
111static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112				     bool = false);
113static gphi *find_phi_with_arg_on_edge (tree, edge);
114static void expand_omp (struct omp_region *region);
115
116/* Return true if REGION is a combined parallel+workshare region.  */
117
118static inline bool
119is_combined_parallel (struct omp_region *region)
120{
121  return region->is_combined_parallel;
122}
123
124/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125   is the immediate dominator of PAR_ENTRY_BB, return true if there
126   are no data dependencies that would prevent expanding the parallel
127   directive at PAR_ENTRY_BB as a combined parallel+workshare region.
128
129   When expanding a combined parallel+workshare region, the call to
130   the child function may need additional arguments in the case of
131   GIMPLE_OMP_FOR regions.  In some cases, these arguments are
132   computed out of variables passed in from the parent to the child
133   via 'struct .omp_data_s'.  For instance:
134
135	#pragma omp parallel for schedule (guided, i * 4)
136	for (j ...)
137
138   Is lowered into:
139
140	# BLOCK 2 (PAR_ENTRY_BB)
141	.omp_data_o.i = i;
142	#pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
143
144	# BLOCK 3 (WS_ENTRY_BB)
145	.omp_data_i = &.omp_data_o;
146	D.1667 = .omp_data_i->i;
147	D.1598 = D.1667 * 4;
148	#pragma omp for schedule (guided, D.1598)
149
150   When we outline the parallel region, the call to the child function
151   'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152   that value is computed *after* the call site.  So, in principle we
153   cannot do the transformation.
154
155   To see whether the code in WS_ENTRY_BB blocks the combined
156   parallel+workshare call, we collect all the variables used in the
157   GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158   statement in WS_ENTRY_BB.  If so, then we cannot emit the combined
159   call.
160
161   FIXME.  If we had the SSA form built at this point, we could merely
162   hoist the code in block 3 into block 2 and be done with it.  But at
163   this point we don't have dataflow information and though we could
164   hack something up here, it is really not worth the aggravation.  */
165
166static bool
167workshare_safe_to_combine_p (basic_block ws_entry_bb)
168{
169  struct omp_for_data fd;
170  gimple *ws_stmt = last_stmt (ws_entry_bb);
171
172  if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173    return true;
174
175  gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
176
177  omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
178
179  if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
180    return false;
181  if (fd.iter_type != long_integer_type_node)
182    return false;
183
184  /* FIXME.  We give up too easily here.  If any of these arguments
185     are not constants, they will likely involve variables that have
186     been mapped into fields of .omp_data_s for sharing with the child
187     function.  With appropriate data flow, it would be possible to
188     see through this.  */
189  if (!is_gimple_min_invariant (fd.loop.n1)
190      || !is_gimple_min_invariant (fd.loop.n2)
191      || !is_gimple_min_invariant (fd.loop.step)
192      || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
193    return false;
194
195  return true;
196}
197
198/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
199   presence (SIMD_SCHEDULE).  */
200
201static tree
202omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
203{
204  if (!simd_schedule)
205    return chunk_size;
206
207  poly_uint64 vf = omp_max_vf ();
208  if (known_eq (vf, 1U))
209    return chunk_size;
210
211  tree type = TREE_TYPE (chunk_size);
212  chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
213			    build_int_cst (type, vf - 1));
214  return fold_build2 (BIT_AND_EXPR, type, chunk_size,
215		      build_int_cst (type, -vf));
216}
217
218/* Collect additional arguments needed to emit a combined
219   parallel+workshare call.  WS_STMT is the workshare directive being
220   expanded.  */
221
222static vec<tree, va_gc> *
223get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
224{
225  tree t;
226  location_t loc = gimple_location (ws_stmt);
227  vec<tree, va_gc> *ws_args;
228
229  if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
230    {
231      struct omp_for_data fd;
232      tree n1, n2;
233
234      omp_extract_for_data (for_stmt, &fd, NULL);
235      n1 = fd.loop.n1;
236      n2 = fd.loop.n2;
237
238      if (gimple_omp_for_combined_into_p (for_stmt))
239	{
240	  tree innerc
241	    = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
242			       OMP_CLAUSE__LOOPTEMP_);
243	  gcc_assert (innerc);
244	  n1 = OMP_CLAUSE_DECL (innerc);
245	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
246				    OMP_CLAUSE__LOOPTEMP_);
247	  gcc_assert (innerc);
248	  n2 = OMP_CLAUSE_DECL (innerc);
249	}
250
251      vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
252
253      t = fold_convert_loc (loc, long_integer_type_node, n1);
254      ws_args->quick_push (t);
255
256      t = fold_convert_loc (loc, long_integer_type_node, n2);
257      ws_args->quick_push (t);
258
259      t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
260      ws_args->quick_push (t);
261
262      if (fd.chunk_size)
263	{
264	  t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
265	  t = omp_adjust_chunk_size (t, fd.simd_schedule);
266	  ws_args->quick_push (t);
267	}
268
269      return ws_args;
270    }
271  else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
272    {
273      /* Number of sections is equal to the number of edges from the
274	 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
275	 the exit of the sections region.  */
276      basic_block bb = single_succ (gimple_bb (ws_stmt));
277      t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
278      vec_alloc (ws_args, 1);
279      ws_args->quick_push (t);
280      return ws_args;
281    }
282
283  gcc_unreachable ();
284}
285
286/* Discover whether REGION is a combined parallel+workshare region.  */
287
288static void
289determine_parallel_type (struct omp_region *region)
290{
291  basic_block par_entry_bb, par_exit_bb;
292  basic_block ws_entry_bb, ws_exit_bb;
293
294  if (region == NULL || region->inner == NULL
295      || region->exit == NULL || region->inner->exit == NULL
296      || region->inner->cont == NULL)
297    return;
298
299  /* We only support parallel+for and parallel+sections.  */
300  if (region->type != GIMPLE_OMP_PARALLEL
301      || (region->inner->type != GIMPLE_OMP_FOR
302	  && region->inner->type != GIMPLE_OMP_SECTIONS))
303    return;
304
305  /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
306     WS_EXIT_BB -> PAR_EXIT_BB.  */
307  par_entry_bb = region->entry;
308  par_exit_bb = region->exit;
309  ws_entry_bb = region->inner->entry;
310  ws_exit_bb = region->inner->exit;
311
312  if (single_succ (par_entry_bb) == ws_entry_bb
313      && single_succ (ws_exit_bb) == par_exit_bb
314      && workshare_safe_to_combine_p (ws_entry_bb)
315      && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
316	  || (last_and_only_stmt (ws_entry_bb)
317	      && last_and_only_stmt (par_exit_bb))))
318    {
319      gimple *par_stmt = last_stmt (par_entry_bb);
320      gimple *ws_stmt = last_stmt (ws_entry_bb);
321
322      if (region->inner->type == GIMPLE_OMP_FOR)
323	{
324	  /* If this is a combined parallel loop, we need to determine
325	     whether or not to use the combined library calls.  There
326	     are two cases where we do not apply the transformation:
327	     static loops and any kind of ordered loop.  In the first
328	     case, we already open code the loop so there is no need
329	     to do anything else.  In the latter case, the combined
330	     parallel loop call would still need extra synchronization
331	     to implement ordered semantics, so there would not be any
332	     gain in using the combined call.  */
333	  tree clauses = gimple_omp_for_clauses (ws_stmt);
334	  tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
335	  if (c == NULL
336	      || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
337		  == OMP_CLAUSE_SCHEDULE_STATIC)
338	      || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
339	    {
340	      region->is_combined_parallel = false;
341	      region->inner->is_combined_parallel = false;
342	      return;
343	    }
344	}
345
346      region->is_combined_parallel = true;
347      region->inner->is_combined_parallel = true;
348      region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
349    }
350}
351
352/* Debugging dumps for parallel regions.  */
353void dump_omp_region (FILE *, struct omp_region *, int);
354void debug_omp_region (struct omp_region *);
355void debug_all_omp_regions (void);
356
357/* Dump the parallel region tree rooted at REGION.  */
358
359void
360dump_omp_region (FILE *file, struct omp_region *region, int indent)
361{
362  fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
363	   gimple_code_name[region->type]);
364
365  if (region->inner)
366    dump_omp_region (file, region->inner, indent + 4);
367
368  if (region->cont)
369    {
370      fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
371	       region->cont->index);
372    }
373
374  if (region->exit)
375    fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
376	     region->exit->index);
377  else
378    fprintf (file, "%*s[no exit marker]\n", indent, "");
379
380  if (region->next)
381    dump_omp_region (file, region->next, indent);
382}
383
384DEBUG_FUNCTION void
385debug_omp_region (struct omp_region *region)
386{
387  dump_omp_region (stderr, region, 0);
388}
389
390DEBUG_FUNCTION void
391debug_all_omp_regions (void)
392{
393  dump_omp_region (stderr, root_omp_region, 0);
394}
395
396/* Create a new parallel region starting at STMT inside region PARENT.  */
397
398static struct omp_region *
399new_omp_region (basic_block bb, enum gimple_code type,
400		struct omp_region *parent)
401{
402  struct omp_region *region = XCNEW (struct omp_region);
403
404  region->outer = parent;
405  region->entry = bb;
406  region->type = type;
407
408  if (parent)
409    {
410      /* This is a nested region.  Add it to the list of inner
411	 regions in PARENT.  */
412      region->next = parent->inner;
413      parent->inner = region;
414    }
415  else
416    {
417      /* This is a toplevel region.  Add it to the list of toplevel
418	 regions in ROOT_OMP_REGION.  */
419      region->next = root_omp_region;
420      root_omp_region = region;
421    }
422
423  return region;
424}
425
426/* Release the memory associated with the region tree rooted at REGION.  */
427
428static void
429free_omp_region_1 (struct omp_region *region)
430{
431  struct omp_region *i, *n;
432
433  for (i = region->inner; i ; i = n)
434    {
435      n = i->next;
436      free_omp_region_1 (i);
437    }
438
439  free (region);
440}
441
442/* Release the memory for the entire omp region tree.  */
443
444void
445omp_free_regions (void)
446{
447  struct omp_region *r, *n;
448  for (r = root_omp_region; r ; r = n)
449    {
450      n = r->next;
451      free_omp_region_1 (r);
452    }
453  root_omp_region = NULL;
454}
455
456/* A convenience function to build an empty GIMPLE_COND with just the
457   condition.  */
458
459static gcond *
460gimple_build_cond_empty (tree cond)
461{
462  enum tree_code pred_code;
463  tree lhs, rhs;
464
465  gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
466  return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
467}
468
469/* Return true if a parallel REGION is within a declare target function or
470   within a target region and is not a part of a gridified target.  */
471
472static bool
473parallel_needs_hsa_kernel_p (struct omp_region *region)
474{
475  bool indirect = false;
476  for (region = region->outer; region; region = region->outer)
477    {
478      if (region->type == GIMPLE_OMP_PARALLEL)
479	indirect = true;
480      else if (region->type == GIMPLE_OMP_TARGET)
481	{
482	  gomp_target *tgt_stmt
483	    = as_a <gomp_target *> (last_stmt (region->entry));
484
485	  if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
486			       OMP_CLAUSE__GRIDDIM_))
487	    return indirect;
488	  else
489	    return true;
490	}
491    }
492
493  if (lookup_attribute ("omp declare target",
494			DECL_ATTRIBUTES (current_function_decl)))
495    return true;
496
497  return false;
498}
499
500/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
501   Add CHILD_FNDECL to decl chain of the supercontext of the block
502   ENTRY_BLOCK - this is the block which originally contained the
503   code from which CHILD_FNDECL was created.
504
505   Together, these actions ensure that the debug info for the outlined
506   function will be emitted with the correct lexical scope.  */
507
508static void
509adjust_context_and_scope (struct omp_region *region, tree entry_block,
510			  tree child_fndecl)
511{
512  tree parent_fndecl = NULL_TREE;
513  gimple *entry_stmt;
514  /* OMP expansion expands inner regions before outer ones, so if
515     we e.g. have explicit task region nested in parallel region, when
516     expanding the task region current_function_decl will be the original
517     source function, but we actually want to use as context the child
518     function of the parallel.  */
519  for (region = region->outer;
520       region && parent_fndecl == NULL_TREE; region = region->outer)
521    switch (region->type)
522      {
523      case GIMPLE_OMP_PARALLEL:
524      case GIMPLE_OMP_TASK:
525	entry_stmt = last_stmt (region->entry);
526	parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
527	break;
528      case GIMPLE_OMP_TARGET:
529	entry_stmt = last_stmt (region->entry);
530	parent_fndecl
531	  = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
532	break;
533      default:
534	break;
535      }
536
537  if (parent_fndecl == NULL_TREE)
538    parent_fndecl = current_function_decl;
539  DECL_CONTEXT (child_fndecl) = parent_fndecl;
540
541  if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
542    {
543      tree b = BLOCK_SUPERCONTEXT (entry_block);
544      if (TREE_CODE (b) == BLOCK)
545        {
546	  DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
547	  BLOCK_VARS (b) = child_fndecl;
548	}
549    }
550}
551
552/* Build the function calls to GOMP_parallel_start etc to actually
553   generate the parallel operation.  REGION is the parallel region
554   being expanded.  BB is the block where to insert the code.  WS_ARGS
555   will be set if this is a call to a combined parallel+workshare
556   construct, it contains the list of additional arguments needed by
557   the workshare construct.  */
558
559static void
560expand_parallel_call (struct omp_region *region, basic_block bb,
561		      gomp_parallel *entry_stmt,
562		      vec<tree, va_gc> *ws_args)
563{
564  tree t, t1, t2, val, cond, c, clauses, flags;
565  gimple_stmt_iterator gsi;
566  gimple *stmt;
567  enum built_in_function start_ix;
568  int start_ix2;
569  location_t clause_loc;
570  vec<tree, va_gc> *args;
571
572  clauses = gimple_omp_parallel_clauses (entry_stmt);
573
574  /* Determine what flavor of GOMP_parallel we will be
575     emitting.  */
576  start_ix = BUILT_IN_GOMP_PARALLEL;
577  if (is_combined_parallel (region))
578    {
579      switch (region->inner->type)
580	{
581	case GIMPLE_OMP_FOR:
582	  gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
583	  switch (region->inner->sched_kind)
584	    {
585	    case OMP_CLAUSE_SCHEDULE_RUNTIME:
586	      start_ix2 = 3;
587	      break;
588	    case OMP_CLAUSE_SCHEDULE_DYNAMIC:
589	    case OMP_CLAUSE_SCHEDULE_GUIDED:
590	      if (region->inner->sched_modifiers
591		  & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
592		{
593		  start_ix2 = 3 + region->inner->sched_kind;
594		  break;
595		}
596	      /* FALLTHRU */
597	    default:
598	      start_ix2 = region->inner->sched_kind;
599	      break;
600	    }
601	  start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
602	  start_ix = (enum built_in_function) start_ix2;
603	  break;
604	case GIMPLE_OMP_SECTIONS:
605	  start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
606	  break;
607	default:
608	  gcc_unreachable ();
609	}
610    }
611
612  /* By default, the value of NUM_THREADS is zero (selected at run time)
613     and there is no conditional.  */
614  cond = NULL_TREE;
615  val = build_int_cst (unsigned_type_node, 0);
616  flags = build_int_cst (unsigned_type_node, 0);
617
618  c = omp_find_clause (clauses, OMP_CLAUSE_IF);
619  if (c)
620    cond = OMP_CLAUSE_IF_EXPR (c);
621
622  c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
623  if (c)
624    {
625      val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
626      clause_loc = OMP_CLAUSE_LOCATION (c);
627    }
628  else
629    clause_loc = gimple_location (entry_stmt);
630
631  c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
632  if (c)
633    flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
634
635  /* Ensure 'val' is of the correct type.  */
636  val = fold_convert_loc (clause_loc, unsigned_type_node, val);
637
638  /* If we found the clause 'if (cond)', build either
639     (cond != 0) or (cond ? val : 1u).  */
640  if (cond)
641    {
642      cond = gimple_boolify (cond);
643
644      if (integer_zerop (val))
645	val = fold_build2_loc (clause_loc,
646			   EQ_EXPR, unsigned_type_node, cond,
647			   build_int_cst (TREE_TYPE (cond), 0));
648      else
649	{
650	  basic_block cond_bb, then_bb, else_bb;
651	  edge e, e_then, e_else;
652	  tree tmp_then, tmp_else, tmp_join, tmp_var;
653
654	  tmp_var = create_tmp_var (TREE_TYPE (val));
655	  if (gimple_in_ssa_p (cfun))
656	    {
657	      tmp_then = make_ssa_name (tmp_var);
658	      tmp_else = make_ssa_name (tmp_var);
659	      tmp_join = make_ssa_name (tmp_var);
660	    }
661	  else
662	    {
663	      tmp_then = tmp_var;
664	      tmp_else = tmp_var;
665	      tmp_join = tmp_var;
666	    }
667
668	  e = split_block_after_labels (bb);
669	  cond_bb = e->src;
670	  bb = e->dest;
671	  remove_edge (e);
672
673	  then_bb = create_empty_bb (cond_bb);
674	  else_bb = create_empty_bb (then_bb);
675	  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
676	  set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
677
678	  stmt = gimple_build_cond_empty (cond);
679	  gsi = gsi_start_bb (cond_bb);
680	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
681
682	  gsi = gsi_start_bb (then_bb);
683	  expand_omp_build_assign (&gsi, tmp_then, val, true);
684
685	  gsi = gsi_start_bb (else_bb);
686	  expand_omp_build_assign (&gsi, tmp_else,
687				   build_int_cst (unsigned_type_node, 1),
688				   true);
689
690	  make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
691	  make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
692	  add_bb_to_loop (then_bb, cond_bb->loop_father);
693	  add_bb_to_loop (else_bb, cond_bb->loop_father);
694	  e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
695	  e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
696
697	  if (gimple_in_ssa_p (cfun))
698	    {
699	      gphi *phi = create_phi_node (tmp_join, bb);
700	      add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
701	      add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
702	    }
703
704	  val = tmp_join;
705	}
706
707      gsi = gsi_start_bb (bb);
708      val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
709				      false, GSI_CONTINUE_LINKING);
710    }
711
712  gsi = gsi_last_nondebug_bb (bb);
713  t = gimple_omp_parallel_data_arg (entry_stmt);
714  if (t == NULL)
715    t1 = null_pointer_node;
716  else
717    t1 = build_fold_addr_expr (t);
718  tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
719  t2 = build_fold_addr_expr (child_fndecl);
720
721  vec_alloc (args, 4 + vec_safe_length (ws_args));
722  args->quick_push (t2);
723  args->quick_push (t1);
724  args->quick_push (val);
725  if (ws_args)
726    args->splice (*ws_args);
727  args->quick_push (flags);
728
729  t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
730			       builtin_decl_explicit (start_ix), args);
731
732  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
733			    false, GSI_CONTINUE_LINKING);
734
735  if (hsa_gen_requested_p ()
736      && parallel_needs_hsa_kernel_p (region))
737    {
738      cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
739      hsa_register_kernel (child_cnode);
740    }
741}
742
743/* Build the function call to GOMP_task to actually
744   generate the task operation.  BB is the block where to insert the code.  */
745
746static void
747expand_task_call (struct omp_region *region, basic_block bb,
748		  gomp_task *entry_stmt)
749{
750  tree t1, t2, t3;
751  gimple_stmt_iterator gsi;
752  location_t loc = gimple_location (entry_stmt);
753
754  tree clauses = gimple_omp_task_clauses (entry_stmt);
755
756  tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
757  tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
758  tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
759  tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
760  tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
761  tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
762
763  unsigned int iflags
764    = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
765      | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
766      | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
767
768  bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
769  tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
770  tree num_tasks = NULL_TREE;
771  bool ull = false;
772  if (taskloop_p)
773    {
774      gimple *g = last_stmt (region->outer->entry);
775      gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
776		  && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
777      struct omp_for_data fd;
778      omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
779      startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
780      endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
781				OMP_CLAUSE__LOOPTEMP_);
782      startvar = OMP_CLAUSE_DECL (startvar);
783      endvar = OMP_CLAUSE_DECL (endvar);
784      step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
785      if (fd.loop.cond_code == LT_EXPR)
786	iflags |= GOMP_TASK_FLAG_UP;
787      tree tclauses = gimple_omp_for_clauses (g);
788      num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
789      if (num_tasks)
790	num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
791      else
792	{
793	  num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
794	  if (num_tasks)
795	    {
796	      iflags |= GOMP_TASK_FLAG_GRAINSIZE;
797	      num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
798	    }
799	  else
800	    num_tasks = integer_zero_node;
801	}
802      num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
803      if (ifc == NULL_TREE)
804	iflags |= GOMP_TASK_FLAG_IF;
805      if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
806	iflags |= GOMP_TASK_FLAG_NOGROUP;
807      ull = fd.iter_type == long_long_unsigned_type_node;
808    }
809  else if (priority)
810    iflags |= GOMP_TASK_FLAG_PRIORITY;
811
812  tree flags = build_int_cst (unsigned_type_node, iflags);
813
814  tree cond = boolean_true_node;
815  if (ifc)
816    {
817      if (taskloop_p)
818	{
819	  tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
820	  t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
821			       build_int_cst (unsigned_type_node,
822					      GOMP_TASK_FLAG_IF),
823			       build_int_cst (unsigned_type_node, 0));
824	  flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
825				   flags, t);
826	}
827      else
828	cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
829    }
830
831  if (finalc)
832    {
833      tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
834      t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
835			   build_int_cst (unsigned_type_node,
836					  GOMP_TASK_FLAG_FINAL),
837			   build_int_cst (unsigned_type_node, 0));
838      flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
839    }
840  if (depend)
841    depend = OMP_CLAUSE_DECL (depend);
842  else
843    depend = build_int_cst (ptr_type_node, 0);
844  if (priority)
845    priority = fold_convert (integer_type_node,
846			     OMP_CLAUSE_PRIORITY_EXPR (priority));
847  else
848    priority = integer_zero_node;
849
850  gsi = gsi_last_nondebug_bb (bb);
851  tree t = gimple_omp_task_data_arg (entry_stmt);
852  if (t == NULL)
853    t2 = null_pointer_node;
854  else
855    t2 = build_fold_addr_expr_loc (loc, t);
856  t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
857  t = gimple_omp_task_copy_fn (entry_stmt);
858  if (t == NULL)
859    t3 = null_pointer_node;
860  else
861    t3 = build_fold_addr_expr_loc (loc, t);
862
863  if (taskloop_p)
864    t = build_call_expr (ull
865			 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
866			 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
867			 11, t1, t2, t3,
868			 gimple_omp_task_arg_size (entry_stmt),
869			 gimple_omp_task_arg_align (entry_stmt), flags,
870			 num_tasks, priority, startvar, endvar, step);
871  else
872    t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
873			 9, t1, t2, t3,
874			 gimple_omp_task_arg_size (entry_stmt),
875			 gimple_omp_task_arg_align (entry_stmt), cond, flags,
876			 depend, priority);
877
878  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
879			    false, GSI_CONTINUE_LINKING);
880}
881
882/* Chain all the DECLs in LIST by their TREE_CHAIN fields.  */
883
884static tree
885vec2chain (vec<tree, va_gc> *v)
886{
887  tree chain = NULL_TREE, t;
888  unsigned ix;
889
890  FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
891    {
892      DECL_CHAIN (t) = chain;
893      chain = t;
894    }
895
896  return chain;
897}
898
899/* Remove barriers in REGION->EXIT's block.  Note that this is only
900   valid for GIMPLE_OMP_PARALLEL regions.  Since the end of a parallel region
901   is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
902   left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
903   removed.  */
904
905static void
906remove_exit_barrier (struct omp_region *region)
907{
908  gimple_stmt_iterator gsi;
909  basic_block exit_bb;
910  edge_iterator ei;
911  edge e;
912  gimple *stmt;
913  int any_addressable_vars = -1;
914
915  exit_bb = region->exit;
916
917  /* If the parallel region doesn't return, we don't have REGION->EXIT
918     block at all.  */
919  if (! exit_bb)
920    return;
921
922  /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN.  The
923     workshare's GIMPLE_OMP_RETURN will be in a preceding block.  The kinds of
924     statements that can appear in between are extremely limited -- no
925     memory operations at all.  Here, we allow nothing at all, so the
926     only thing we allow to precede this GIMPLE_OMP_RETURN is a label.  */
927  gsi = gsi_last_nondebug_bb (exit_bb);
928  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
929  gsi_prev_nondebug (&gsi);
930  if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
931    return;
932
933  FOR_EACH_EDGE (e, ei, exit_bb->preds)
934    {
935      gsi = gsi_last_nondebug_bb (e->src);
936      if (gsi_end_p (gsi))
937	continue;
938      stmt = gsi_stmt (gsi);
939      if (gimple_code (stmt) == GIMPLE_OMP_RETURN
940	  && !gimple_omp_return_nowait_p (stmt))
941	{
942	  /* OpenMP 3.0 tasks unfortunately prevent this optimization
943	     in many cases.  If there could be tasks queued, the barrier
944	     might be needed to let the tasks run before some local
945	     variable of the parallel that the task uses as shared
946	     runs out of scope.  The task can be spawned either
947	     from within current function (this would be easy to check)
948	     or from some function it calls and gets passed an address
949	     of such a variable.  */
950	  if (any_addressable_vars < 0)
951	    {
952	      gomp_parallel *parallel_stmt
953		= as_a <gomp_parallel *> (last_stmt (region->entry));
954	      tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
955	      tree local_decls, block, decl;
956	      unsigned ix;
957
958	      any_addressable_vars = 0;
959	      FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
960		if (TREE_ADDRESSABLE (decl))
961		  {
962		    any_addressable_vars = 1;
963		    break;
964		  }
965	      for (block = gimple_block (stmt);
966		   !any_addressable_vars
967		   && block
968		   && TREE_CODE (block) == BLOCK;
969		   block = BLOCK_SUPERCONTEXT (block))
970		{
971		  for (local_decls = BLOCK_VARS (block);
972		       local_decls;
973		       local_decls = DECL_CHAIN (local_decls))
974		    if (TREE_ADDRESSABLE (local_decls))
975		      {
976			any_addressable_vars = 1;
977			break;
978		      }
979		  if (block == gimple_block (parallel_stmt))
980		    break;
981		}
982	    }
983	  if (!any_addressable_vars)
984	    gimple_omp_return_set_nowait (stmt);
985	}
986    }
987}
988
989static void
990remove_exit_barriers (struct omp_region *region)
991{
992  if (region->type == GIMPLE_OMP_PARALLEL)
993    remove_exit_barrier (region);
994
995  if (region->inner)
996    {
997      region = region->inner;
998      remove_exit_barriers (region);
999      while (region->next)
1000	{
1001	  region = region->next;
1002	  remove_exit_barriers (region);
1003	}
1004    }
1005}
1006
1007/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1008   calls.  These can't be declared as const functions, but
1009   within one parallel body they are constant, so they can be
1010   transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1011   which are declared const.  Similarly for task body, except
1012   that in untied task omp_get_thread_num () can change at any task
1013   scheduling point.  */
1014
1015static void
1016optimize_omp_library_calls (gimple *entry_stmt)
1017{
1018  basic_block bb;
1019  gimple_stmt_iterator gsi;
1020  tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1021  tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1022  tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1023  tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1024  bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1025		      && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1026					  OMP_CLAUSE_UNTIED) != NULL);
1027
1028  FOR_EACH_BB_FN (bb, cfun)
1029    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1030      {
1031	gimple *call = gsi_stmt (gsi);
1032	tree decl;
1033
1034	if (is_gimple_call (call)
1035	    && (decl = gimple_call_fndecl (call))
1036	    && DECL_EXTERNAL (decl)
1037	    && TREE_PUBLIC (decl)
1038	    && DECL_INITIAL (decl) == NULL)
1039	  {
1040	    tree built_in;
1041
1042	    if (DECL_NAME (decl) == thr_num_id)
1043	      {
1044		/* In #pragma omp task untied omp_get_thread_num () can change
1045		   during the execution of the task region.  */
1046		if (untied_task)
1047		  continue;
1048		built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1049	      }
1050	    else if (DECL_NAME (decl) == num_thr_id)
1051	      built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1052	    else
1053	      continue;
1054
1055	    if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1056		|| gimple_call_num_args (call) != 0)
1057	      continue;
1058
1059	    if (flag_exceptions && !TREE_NOTHROW (decl))
1060	      continue;
1061
1062	    if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1063		|| !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1064					TREE_TYPE (TREE_TYPE (built_in))))
1065	      continue;
1066
1067	    gimple_call_set_fndecl (call, built_in);
1068	  }
1069      }
1070}
1071
1072/* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
1073   regimplified.  */
1074
1075static tree
1076expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1077{
1078  tree t = *tp;
1079
1080  /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
1081  if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1082    return t;
1083
1084  if (TREE_CODE (t) == ADDR_EXPR)
1085    recompute_tree_invariant_for_addr_expr (t);
1086
1087  *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1088  return NULL_TREE;
1089}
1090
1091/* Prepend or append TO = FROM assignment before or after *GSI_P.  */
1092
1093static void
1094expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1095			 bool after)
1096{
1097  bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1098  from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1099				   !after, after ? GSI_CONTINUE_LINKING
1100						 : GSI_SAME_STMT);
1101  gimple *stmt = gimple_build_assign (to, from);
1102  if (after)
1103    gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1104  else
1105    gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1106  if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1107      || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1108    {
1109      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1110      gimple_regimplify_operands (stmt, &gsi);
1111    }
1112}
1113
1114/* Expand the OpenMP parallel or task directive starting at REGION.  */
1115
1116static void
1117expand_omp_taskreg (struct omp_region *region)
1118{
1119  basic_block entry_bb, exit_bb, new_bb;
1120  struct function *child_cfun;
1121  tree child_fn, block, t;
1122  gimple_stmt_iterator gsi;
1123  gimple *entry_stmt, *stmt;
1124  edge e;
1125  vec<tree, va_gc> *ws_args;
1126
1127  entry_stmt = last_stmt (region->entry);
1128  child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1129  child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1130
1131  entry_bb = region->entry;
1132  if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1133    exit_bb = region->cont;
1134  else
1135    exit_bb = region->exit;
1136
1137  if (is_combined_parallel (region))
1138    ws_args = region->ws_args;
1139  else
1140    ws_args = NULL;
1141
1142  if (child_cfun->cfg)
1143    {
1144      /* Due to inlining, it may happen that we have already outlined
1145	 the region, in which case all we need to do is make the
1146	 sub-graph unreachable and emit the parallel call.  */
1147      edge entry_succ_e, exit_succ_e;
1148
1149      entry_succ_e = single_succ_edge (entry_bb);
1150
1151      gsi = gsi_last_nondebug_bb (entry_bb);
1152      gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1153		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1154      gsi_remove (&gsi, true);
1155
1156      new_bb = entry_bb;
1157      if (exit_bb)
1158	{
1159	  exit_succ_e = single_succ_edge (exit_bb);
1160	  make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1161	}
1162      remove_edge_and_dominated_blocks (entry_succ_e);
1163    }
1164  else
1165    {
1166      unsigned srcidx, dstidx, num;
1167
1168      /* If the parallel region needs data sent from the parent
1169	 function, then the very first statement (except possible
1170	 tree profile counter updates) of the parallel body
1171	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
1172	 &.OMP_DATA_O is passed as an argument to the child function,
1173	 we need to replace it with the argument as seen by the child
1174	 function.
1175
1176	 In most cases, this will end up being the identity assignment
1177	 .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
1178	 a function call that has been inlined, the original PARM_DECL
1179	 .OMP_DATA_I may have been converted into a different local
1180	 variable.  In which case, we need to keep the assignment.  */
1181      if (gimple_omp_taskreg_data_arg (entry_stmt))
1182	{
1183	  basic_block entry_succ_bb
1184	    = single_succ_p (entry_bb) ? single_succ (entry_bb)
1185				       : FALLTHRU_EDGE (entry_bb)->dest;
1186	  tree arg;
1187	  gimple *parcopy_stmt = NULL;
1188
1189	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1190	    {
1191	      gimple *stmt;
1192
1193	      gcc_assert (!gsi_end_p (gsi));
1194	      stmt = gsi_stmt (gsi);
1195	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
1196		continue;
1197
1198	      if (gimple_num_ops (stmt) == 2)
1199		{
1200		  tree arg = gimple_assign_rhs1 (stmt);
1201
1202		  /* We're ignore the subcode because we're
1203		     effectively doing a STRIP_NOPS.  */
1204
1205		  if (TREE_CODE (arg) == ADDR_EXPR
1206		      && TREE_OPERAND (arg, 0)
1207			== gimple_omp_taskreg_data_arg (entry_stmt))
1208		    {
1209		      parcopy_stmt = stmt;
1210		      break;
1211		    }
1212		}
1213	    }
1214
1215	  gcc_assert (parcopy_stmt != NULL);
1216	  arg = DECL_ARGUMENTS (child_fn);
1217
1218	  if (!gimple_in_ssa_p (cfun))
1219	    {
1220	      if (gimple_assign_lhs (parcopy_stmt) == arg)
1221		gsi_remove (&gsi, true);
1222	      else
1223		{
1224		  /* ?? Is setting the subcode really necessary ??  */
1225		  gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1226		  gimple_assign_set_rhs1 (parcopy_stmt, arg);
1227		}
1228	    }
1229	  else
1230	    {
1231	      tree lhs = gimple_assign_lhs (parcopy_stmt);
1232	      gcc_assert (SSA_NAME_VAR (lhs) == arg);
1233	      /* We'd like to set the rhs to the default def in the child_fn,
1234		 but it's too early to create ssa names in the child_fn.
1235		 Instead, we set the rhs to the parm.  In
1236		 move_sese_region_to_fn, we introduce a default def for the
1237		 parm, map the parm to it's default def, and once we encounter
1238		 this stmt, replace the parm with the default def.  */
1239	      gimple_assign_set_rhs1 (parcopy_stmt, arg);
1240	      update_stmt (parcopy_stmt);
1241	    }
1242	}
1243
1244      /* Declare local variables needed in CHILD_CFUN.  */
1245      block = DECL_INITIAL (child_fn);
1246      BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1247      /* The gimplifier could record temporaries in parallel/task block
1248	 rather than in containing function's local_decls chain,
1249	 which would mean cgraph missed finalizing them.  Do it now.  */
1250      for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1251	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1252	  varpool_node::finalize_decl (t);
1253      DECL_SAVED_TREE (child_fn) = NULL;
1254      /* We'll create a CFG for child_fn, so no gimple body is needed.  */
1255      gimple_set_body (child_fn, NULL);
1256      TREE_USED (block) = 1;
1257
1258      /* Reset DECL_CONTEXT on function arguments.  */
1259      for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1260	DECL_CONTEXT (t) = child_fn;
1261
1262      /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1263	 so that it can be moved to the child function.  */
1264      gsi = gsi_last_nondebug_bb (entry_bb);
1265      stmt = gsi_stmt (gsi);
1266      gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1267			   || gimple_code (stmt) == GIMPLE_OMP_TASK));
1268      e = split_block (entry_bb, stmt);
1269      gsi_remove (&gsi, true);
1270      entry_bb = e->dest;
1271      edge e2 = NULL;
1272      if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1273	single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1274      else
1275	{
1276	  e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1277	  gcc_assert (e2->dest == region->exit);
1278	  remove_edge (BRANCH_EDGE (entry_bb));
1279	  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1280	  gsi = gsi_last_nondebug_bb (region->exit);
1281	  gcc_assert (!gsi_end_p (gsi)
1282		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1283	  gsi_remove (&gsi, true);
1284	}
1285
1286      /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR.  */
1287      if (exit_bb)
1288	{
1289	  gsi = gsi_last_nondebug_bb (exit_bb);
1290	  gcc_assert (!gsi_end_p (gsi)
1291		      && (gimple_code (gsi_stmt (gsi))
1292			  == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1293	  stmt = gimple_build_return (NULL);
1294	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1295	  gsi_remove (&gsi, true);
1296	}
1297
1298      /* Move the parallel region into CHILD_CFUN.  */
1299
1300      if (gimple_in_ssa_p (cfun))
1301	{
1302	  init_tree_ssa (child_cfun);
1303	  init_ssa_operands (child_cfun);
1304	  child_cfun->gimple_df->in_ssa_p = true;
1305	  block = NULL_TREE;
1306	}
1307      else
1308	block = gimple_block (entry_stmt);
1309
1310      new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1311      if (exit_bb)
1312	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1313      if (e2)
1314	{
1315	  basic_block dest_bb = e2->dest;
1316	  if (!exit_bb)
1317	    make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1318	  remove_edge (e2);
1319	  set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1320	}
1321      /* When the OMP expansion process cannot guarantee an up-to-date
1322	 loop tree arrange for the child function to fixup loops.  */
1323      if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1324	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1325
1326      /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
1327      num = vec_safe_length (child_cfun->local_decls);
1328      for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1329	{
1330	  t = (*child_cfun->local_decls)[srcidx];
1331	  if (DECL_CONTEXT (t) == cfun->decl)
1332	    continue;
1333	  if (srcidx != dstidx)
1334	    (*child_cfun->local_decls)[dstidx] = t;
1335	  dstidx++;
1336	}
1337      if (dstidx != num)
1338	vec_safe_truncate (child_cfun->local_decls, dstidx);
1339
1340      /* Inform the callgraph about the new function.  */
1341      child_cfun->curr_properties = cfun->curr_properties;
1342      child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1343      child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1344      cgraph_node *node = cgraph_node::get_create (child_fn);
1345      node->parallelized_function = 1;
1346      cgraph_node::add_new_function (child_fn, true);
1347
1348      bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1349		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1350
1351      /* Fix the callgraph edges for child_cfun.  Those for cfun will be
1352	 fixed in a following pass.  */
1353      push_cfun (child_cfun);
1354      if (need_asm)
1355	assign_assembler_name_if_needed (child_fn);
1356
1357      if (optimize)
1358	optimize_omp_library_calls (entry_stmt);
1359      update_max_bb_count ();
1360      cgraph_edge::rebuild_edges ();
1361
1362      /* Some EH regions might become dead, see PR34608.  If
1363	 pass_cleanup_cfg isn't the first pass to happen with the
1364	 new child, these dead EH edges might cause problems.
1365	 Clean them up now.  */
1366      if (flag_exceptions)
1367	{
1368	  basic_block bb;
1369	  bool changed = false;
1370
1371	  FOR_EACH_BB_FN (bb, cfun)
1372	    changed |= gimple_purge_dead_eh_edges (bb);
1373	  if (changed)
1374	    cleanup_tree_cfg ();
1375	}
1376      if (gimple_in_ssa_p (cfun))
1377	update_ssa (TODO_update_ssa);
1378      if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1379	verify_loop_structure ();
1380      pop_cfun ();
1381
1382      if (dump_file && !gimple_in_ssa_p (cfun))
1383	{
1384	  omp_any_child_fn_dumped = true;
1385	  dump_function_header (dump_file, child_fn, dump_flags);
1386	  dump_function_to_file (child_fn, dump_file, dump_flags);
1387	}
1388    }
1389
1390  adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1391
1392  if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1393    expand_parallel_call (region, new_bb,
1394			  as_a <gomp_parallel *> (entry_stmt), ws_args);
1395  else
1396    expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1397  if (gimple_in_ssa_p (cfun))
1398    update_ssa (TODO_update_ssa_only_virtuals);
1399}
1400
1401/* Information about members of an OpenACC collapsed loop nest.  */
1402
1403struct oacc_collapse
1404{
1405  tree base;  /* Base value.  */
1406  tree iters; /* Number of steps.  */
1407  tree step;  /* Step size.  */
1408  tree tile;  /* Tile increment (if tiled).  */
1409  tree outer; /* Tile iterator var. */
1410};
1411
1412/* Helper for expand_oacc_for.  Determine collapsed loop information.
1413   Fill in COUNTS array.  Emit any initialization code before GSI.
1414   Return the calculated outer loop bound of BOUND_TYPE.  */
1415
1416static tree
1417expand_oacc_collapse_init (const struct omp_for_data *fd,
1418			   gimple_stmt_iterator *gsi,
1419			   oacc_collapse *counts, tree bound_type,
1420			   location_t loc)
1421{
1422  tree tiling = fd->tiling;
1423  tree total = build_int_cst (bound_type, 1);
1424  int ix;
1425
1426  gcc_assert (integer_onep (fd->loop.step));
1427  gcc_assert (integer_zerop (fd->loop.n1));
1428
1429  /* When tiling, the first operand of the tile clause applies to the
1430     innermost loop, and we work outwards from there.  Seems
1431     backwards, but whatever.  */
1432  for (ix = fd->collapse; ix--;)
1433    {
1434      const omp_for_data_loop *loop = &fd->loops[ix];
1435
1436      tree iter_type = TREE_TYPE (loop->v);
1437      tree diff_type = iter_type;
1438      tree plus_type = iter_type;
1439
1440      gcc_assert (loop->cond_code == fd->loop.cond_code);
1441
1442      if (POINTER_TYPE_P (iter_type))
1443	plus_type = sizetype;
1444      if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1445	diff_type = signed_type_for (diff_type);
1446      if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1447	diff_type = integer_type_node;
1448
1449      if (tiling)
1450	{
1451	  tree num = build_int_cst (integer_type_node, fd->collapse);
1452	  tree loop_no = build_int_cst (integer_type_node, ix);
1453	  tree tile = TREE_VALUE (tiling);
1454	  gcall *call
1455	    = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1456					  /* gwv-outer=*/integer_zero_node,
1457					  /* gwv-inner=*/integer_zero_node);
1458
1459	  counts[ix].outer = create_tmp_var (iter_type, ".outer");
1460	  counts[ix].tile = create_tmp_var (diff_type, ".tile");
1461	  gimple_call_set_lhs (call, counts[ix].tile);
1462	  gimple_set_location (call, loc);
1463	  gsi_insert_before (gsi, call, GSI_SAME_STMT);
1464
1465	  tiling = TREE_CHAIN (tiling);
1466	}
1467      else
1468	{
1469	  counts[ix].tile = NULL;
1470	  counts[ix].outer = loop->v;
1471	}
1472
1473      tree b = loop->n1;
1474      tree e = loop->n2;
1475      tree s = loop->step;
1476      bool up = loop->cond_code == LT_EXPR;
1477      tree dir = build_int_cst (diff_type, up ? +1 : -1);
1478      bool negating;
1479      tree expr;
1480
1481      b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1482				    true, GSI_SAME_STMT);
1483      e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1484				    true, GSI_SAME_STMT);
1485
1486      /* Convert the step, avoiding possible unsigned->signed overflow.  */
1487      negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1488      if (negating)
1489	s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1490      s = fold_convert (diff_type, s);
1491      if (negating)
1492	s = fold_build1 (NEGATE_EXPR, diff_type, s);
1493      s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1494				    true, GSI_SAME_STMT);
1495
1496      /* Determine the range, avoiding possible unsigned->signed overflow.  */
1497      negating = !up && TYPE_UNSIGNED (iter_type);
1498      expr = fold_build2 (MINUS_EXPR, plus_type,
1499			  fold_convert (plus_type, negating ? b : e),
1500			  fold_convert (plus_type, negating ? e : b));
1501      expr = fold_convert (diff_type, expr);
1502      if (negating)
1503	expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1504      tree range = force_gimple_operand_gsi
1505	(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1506
1507      /* Determine number of iterations.  */
1508      expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1509      expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1510      expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1511
1512      tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1513					     true, GSI_SAME_STMT);
1514
1515      counts[ix].base = b;
1516      counts[ix].iters = iters;
1517      counts[ix].step = s;
1518
1519      total = fold_build2 (MULT_EXPR, bound_type, total,
1520			   fold_convert (bound_type, iters));
1521    }
1522
1523  return total;
1524}
1525
1526/* Emit initializers for collapsed loop members.  INNER is true if
1527   this is for the element loop of a TILE.  IVAR is the outer
1528   loop iteration variable, from which collapsed loop iteration values
1529   are  calculated.  COUNTS array has been initialized by
1530   expand_oacc_collapse_inits.  */
1531
1532static void
1533expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1534			   gimple_stmt_iterator *gsi,
1535			   const oacc_collapse *counts, tree ivar)
1536{
1537  tree ivar_type = TREE_TYPE (ivar);
1538
1539  /*  The most rapidly changing iteration variable is the innermost
1540      one.  */
1541  for (int ix = fd->collapse; ix--;)
1542    {
1543      const omp_for_data_loop *loop = &fd->loops[ix];
1544      const oacc_collapse *collapse = &counts[ix];
1545      tree v = inner ? loop->v : collapse->outer;
1546      tree iter_type = TREE_TYPE (v);
1547      tree diff_type = TREE_TYPE (collapse->step);
1548      tree plus_type = iter_type;
1549      enum tree_code plus_code = PLUS_EXPR;
1550      tree expr;
1551
1552      if (POINTER_TYPE_P (iter_type))
1553	{
1554	  plus_code = POINTER_PLUS_EXPR;
1555	  plus_type = sizetype;
1556	}
1557
1558      expr = ivar;
1559      if (ix)
1560	{
1561	  tree mod = fold_convert (ivar_type, collapse->iters);
1562	  ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1563	  expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1564	  ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1565					   true, GSI_SAME_STMT);
1566	}
1567
1568      expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1569			  collapse->step);
1570      expr = fold_build2 (plus_code, iter_type,
1571			  inner ? collapse->outer : collapse->base,
1572			  fold_convert (plus_type, expr));
1573      expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1574				       true, GSI_SAME_STMT);
1575      gassign *ass = gimple_build_assign (v, expr);
1576      gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1577    }
1578}
1579
1580/* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
1581   of the combined collapse > 1 loop constructs, generate code like:
1582	if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1583	if (cond3 is <)
1584	  adj = STEP3 - 1;
1585	else
1586	  adj = STEP3 + 1;
1587	count3 = (adj + N32 - N31) / STEP3;
1588	if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1589	if (cond2 is <)
1590	  adj = STEP2 - 1;
1591	else
1592	  adj = STEP2 + 1;
1593	count2 = (adj + N22 - N21) / STEP2;
1594	if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1595	if (cond1 is <)
1596	  adj = STEP1 - 1;
1597	else
1598	  adj = STEP1 + 1;
1599	count1 = (adj + N12 - N11) / STEP1;
1600	count = count1 * count2 * count3;
1601   Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1602	count = 0;
1603   and set ZERO_ITER_BB to that bb.  If this isn't the outermost
1604   of the combined loop constructs, just initialize COUNTS array
1605   from the _looptemp_ clauses.  */
1606
1607/* NOTE: It *could* be better to moosh all of the BBs together,
1608   creating one larger BB with all the computation and the unexpected
1609   jump at the end.  I.e.
1610
1611   bool zero3, zero2, zero1, zero;
1612
1613   zero3 = N32 c3 N31;
1614   count3 = (N32 - N31) /[cl] STEP3;
1615   zero2 = N22 c2 N21;
1616   count2 = (N22 - N21) /[cl] STEP2;
1617   zero1 = N12 c1 N11;
1618   count1 = (N12 - N11) /[cl] STEP1;
1619   zero = zero3 || zero2 || zero1;
1620   count = count1 * count2 * count3;
1621   if (__builtin_expect(zero, false)) goto zero_iter_bb;
1622
1623   After all, we expect the zero=false, and thus we expect to have to
1624   evaluate all of the comparison expressions, so short-circuiting
1625   oughtn't be a win.  Since the condition isn't protecting a
1626   denominator, we're not concerned about divide-by-zero, so we can
1627   fully evaluate count even if a numerator turned out to be wrong.
1628
1629   It seems like putting this all together would create much better
1630   scheduling opportunities, and less pressure on the chip's branch
1631   predictor.  */
1632
1633static void
1634expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1635			    basic_block &entry_bb, tree *counts,
1636			    basic_block &zero_iter1_bb, int &first_zero_iter1,
1637			    basic_block &zero_iter2_bb, int &first_zero_iter2,
1638			    basic_block &l2_dom_bb)
1639{
1640  tree t, type = TREE_TYPE (fd->loop.v);
1641  edge e, ne;
1642  int i;
1643
1644  /* Collapsed loops need work for expansion into SSA form.  */
1645  gcc_assert (!gimple_in_ssa_p (cfun));
1646
1647  if (gimple_omp_for_combined_into_p (fd->for_stmt)
1648      && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1649    {
1650      gcc_assert (fd->ordered == 0);
1651      /* First two _looptemp_ clauses are for istart/iend, counts[0]
1652	 isn't supposed to be handled, as the inner loop doesn't
1653	 use it.  */
1654      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1655				     OMP_CLAUSE__LOOPTEMP_);
1656      gcc_assert (innerc);
1657      for (i = 0; i < fd->collapse; i++)
1658	{
1659	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1660				    OMP_CLAUSE__LOOPTEMP_);
1661	  gcc_assert (innerc);
1662	  if (i)
1663	    counts[i] = OMP_CLAUSE_DECL (innerc);
1664	  else
1665	    counts[0] = NULL_TREE;
1666	}
1667      return;
1668    }
1669
1670  for (i = fd->collapse; i < fd->ordered; i++)
1671    {
1672      tree itype = TREE_TYPE (fd->loops[i].v);
1673      counts[i] = NULL_TREE;
1674      t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1675		       fold_convert (itype, fd->loops[i].n1),
1676		       fold_convert (itype, fd->loops[i].n2));
1677      if (t && integer_zerop (t))
1678	{
1679	  for (i = fd->collapse; i < fd->ordered; i++)
1680	    counts[i] = build_int_cst (type, 0);
1681	  break;
1682	}
1683    }
1684  for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1685    {
1686      tree itype = TREE_TYPE (fd->loops[i].v);
1687
1688      if (i >= fd->collapse && counts[i])
1689	continue;
1690      if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1691	  && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1692				fold_convert (itype, fd->loops[i].n1),
1693				fold_convert (itype, fd->loops[i].n2)))
1694	      == NULL_TREE || !integer_onep (t)))
1695	{
1696	  gcond *cond_stmt;
1697	  tree n1, n2;
1698	  n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1699	  n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1700					 true, GSI_SAME_STMT);
1701	  n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1702	  n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1703					 true, GSI_SAME_STMT);
1704	  cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1705					 NULL_TREE, NULL_TREE);
1706	  gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1707	  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1708			 expand_omp_regimplify_p, NULL, NULL)
1709	      || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1710			    expand_omp_regimplify_p, NULL, NULL))
1711	    {
1712	      *gsi = gsi_for_stmt (cond_stmt);
1713	      gimple_regimplify_operands (cond_stmt, gsi);
1714	    }
1715	  e = split_block (entry_bb, cond_stmt);
1716	  basic_block &zero_iter_bb
1717	    = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1718	  int &first_zero_iter
1719	    = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1720	  if (zero_iter_bb == NULL)
1721	    {
1722	      gassign *assign_stmt;
1723	      first_zero_iter = i;
1724	      zero_iter_bb = create_empty_bb (entry_bb);
1725	      add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1726	      *gsi = gsi_after_labels (zero_iter_bb);
1727	      if (i < fd->collapse)
1728		assign_stmt = gimple_build_assign (fd->loop.n2,
1729						   build_zero_cst (type));
1730	      else
1731		{
1732		  counts[i] = create_tmp_reg (type, ".count");
1733		  assign_stmt
1734		    = gimple_build_assign (counts[i], build_zero_cst (type));
1735		}
1736	      gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1737	      set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1738				       entry_bb);
1739	    }
1740	  ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1741	  ne->probability = profile_probability::very_unlikely ();
1742	  e->flags = EDGE_TRUE_VALUE;
1743	  e->probability = ne->probability.invert ();
1744	  if (l2_dom_bb == NULL)
1745	    l2_dom_bb = entry_bb;
1746	  entry_bb = e->dest;
1747	  *gsi = gsi_last_nondebug_bb (entry_bb);
1748	}
1749
1750      if (POINTER_TYPE_P (itype))
1751	itype = signed_type_for (itype);
1752      t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1753				 ? -1 : 1));
1754      t = fold_build2 (PLUS_EXPR, itype,
1755		       fold_convert (itype, fd->loops[i].step), t);
1756      t = fold_build2 (PLUS_EXPR, itype, t,
1757		       fold_convert (itype, fd->loops[i].n2));
1758      t = fold_build2 (MINUS_EXPR, itype, t,
1759		       fold_convert (itype, fd->loops[i].n1));
1760      /* ?? We could probably use CEIL_DIV_EXPR instead of
1761	 TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
1762	 generate the same code in the end because generically we
1763	 don't know that the values involved must be negative for
1764	 GT??  */
1765      if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1766	t = fold_build2 (TRUNC_DIV_EXPR, itype,
1767			 fold_build1 (NEGATE_EXPR, itype, t),
1768			 fold_build1 (NEGATE_EXPR, itype,
1769				      fold_convert (itype,
1770						    fd->loops[i].step)));
1771      else
1772	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1773			 fold_convert (itype, fd->loops[i].step));
1774      t = fold_convert (type, t);
1775      if (TREE_CODE (t) == INTEGER_CST)
1776	counts[i] = t;
1777      else
1778	{
1779	  if (i < fd->collapse || i != first_zero_iter2)
1780	    counts[i] = create_tmp_reg (type, ".count");
1781	  expand_omp_build_assign (gsi, counts[i], t);
1782	}
1783      if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1784	{
1785	  if (i == 0)
1786	    t = counts[0];
1787	  else
1788	    t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1789	  expand_omp_build_assign (gsi, fd->loop.n2, t);
1790	}
1791    }
1792}
1793
1794/* Helper function for expand_omp_{for_*,simd}.  Generate code like:
1795	T = V;
1796	V3 = N31 + (T % count3) * STEP3;
1797	T = T / count3;
1798	V2 = N21 + (T % count2) * STEP2;
1799	T = T / count2;
1800	V1 = N11 + T * STEP1;
1801   if this loop doesn't have an inner loop construct combined with it.
1802   If it does have an inner loop construct combined with it and the
1803   iteration count isn't known constant, store values from counts array
1804   into its _looptemp_ temporaries instead.  */
1805
1806static void
1807expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1808			  tree *counts, gimple *inner_stmt, tree startvar)
1809{
1810  int i;
1811  if (gimple_omp_for_combined_p (fd->for_stmt))
1812    {
1813      /* If fd->loop.n2 is constant, then no propagation of the counts
1814	 is needed, they are constant.  */
1815      if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1816	return;
1817
1818      tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1819		     ? gimple_omp_taskreg_clauses (inner_stmt)
1820		     : gimple_omp_for_clauses (inner_stmt);
1821      /* First two _looptemp_ clauses are for istart/iend, counts[0]
1822	 isn't supposed to be handled, as the inner loop doesn't
1823	 use it.  */
1824      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1825      gcc_assert (innerc);
1826      for (i = 0; i < fd->collapse; i++)
1827	{
1828	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1829				    OMP_CLAUSE__LOOPTEMP_);
1830	  gcc_assert (innerc);
1831	  if (i)
1832	    {
1833	      tree tem = OMP_CLAUSE_DECL (innerc);
1834	      tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1835	      t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1836					    false, GSI_CONTINUE_LINKING);
1837	      gassign *stmt = gimple_build_assign (tem, t);
1838	      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1839	    }
1840	}
1841      return;
1842    }
1843
1844  tree type = TREE_TYPE (fd->loop.v);
1845  tree tem = create_tmp_reg (type, ".tem");
1846  gassign *stmt = gimple_build_assign (tem, startvar);
1847  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1848
1849  for (i = fd->collapse - 1; i >= 0; i--)
1850    {
1851      tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1852      itype = vtype;
1853      if (POINTER_TYPE_P (vtype))
1854	itype = signed_type_for (vtype);
1855      if (i != 0)
1856	t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1857      else
1858	t = tem;
1859      t = fold_convert (itype, t);
1860      t = fold_build2 (MULT_EXPR, itype, t,
1861		       fold_convert (itype, fd->loops[i].step));
1862      if (POINTER_TYPE_P (vtype))
1863	t = fold_build_pointer_plus (fd->loops[i].n1, t);
1864      else
1865	t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1866      t = force_gimple_operand_gsi (gsi, t,
1867				    DECL_P (fd->loops[i].v)
1868				    && TREE_ADDRESSABLE (fd->loops[i].v),
1869				    NULL_TREE, false,
1870				    GSI_CONTINUE_LINKING);
1871      stmt = gimple_build_assign (fd->loops[i].v, t);
1872      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1873      if (i != 0)
1874	{
1875	  t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1876	  t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1877					false, GSI_CONTINUE_LINKING);
1878	  stmt = gimple_build_assign (tem, t);
1879	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1880	}
1881    }
1882}
1883
1884/* Helper function for expand_omp_for_*.  Generate code like:
1885    L10:
1886	V3 += STEP3;
1887	if (V3 cond3 N32) goto BODY_BB; else goto L11;
1888    L11:
1889	V3 = N31;
1890	V2 += STEP2;
1891	if (V2 cond2 N22) goto BODY_BB; else goto L12;
1892    L12:
1893	V2 = N21;
1894	V1 += STEP1;
1895	goto BODY_BB;  */
1896
1897static basic_block
1898extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1899			     basic_block body_bb)
1900{
1901  basic_block last_bb, bb, collapse_bb = NULL;
1902  int i;
1903  gimple_stmt_iterator gsi;
1904  edge e;
1905  tree t;
1906  gimple *stmt;
1907
1908  last_bb = cont_bb;
1909  for (i = fd->collapse - 1; i >= 0; i--)
1910    {
1911      tree vtype = TREE_TYPE (fd->loops[i].v);
1912
1913      bb = create_empty_bb (last_bb);
1914      add_bb_to_loop (bb, last_bb->loop_father);
1915      gsi = gsi_start_bb (bb);
1916
1917      if (i < fd->collapse - 1)
1918	{
1919	  e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1920	  e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
1921
1922	  t = fd->loops[i + 1].n1;
1923	  t = force_gimple_operand_gsi (&gsi, t,
1924					DECL_P (fd->loops[i + 1].v)
1925					&& TREE_ADDRESSABLE (fd->loops[i
1926								       + 1].v),
1927					NULL_TREE, false,
1928					GSI_CONTINUE_LINKING);
1929	  stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1930	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1931	}
1932      else
1933	collapse_bb = bb;
1934
1935      set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1936
1937      if (POINTER_TYPE_P (vtype))
1938	t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1939      else
1940	t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1941      t = force_gimple_operand_gsi (&gsi, t,
1942				    DECL_P (fd->loops[i].v)
1943				    && TREE_ADDRESSABLE (fd->loops[i].v),
1944				    NULL_TREE, false, GSI_CONTINUE_LINKING);
1945      stmt = gimple_build_assign (fd->loops[i].v, t);
1946      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1947
1948      if (i > 0)
1949	{
1950	  t = fd->loops[i].n2;
1951	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1952					false, GSI_CONTINUE_LINKING);
1953	  tree v = fd->loops[i].v;
1954	  if (DECL_P (v) && TREE_ADDRESSABLE (v))
1955	    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1956					  false, GSI_CONTINUE_LINKING);
1957	  t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1958	  stmt = gimple_build_cond_empty (t);
1959	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1960	  if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
1961			 expand_omp_regimplify_p, NULL, NULL)
1962	      || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
1963			    expand_omp_regimplify_p, NULL, NULL))
1964	    gimple_regimplify_operands (stmt, &gsi);
1965	  e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1966	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
1967	}
1968      else
1969	make_edge (bb, body_bb, EDGE_FALLTHRU);
1970      last_bb = bb;
1971    }
1972
1973  return collapse_bb;
1974}
1975
1976/* Expand #pragma omp ordered depend(source).  */
1977
1978static void
1979expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1980			   tree *counts, location_t loc)
1981{
1982  enum built_in_function source_ix
1983    = fd->iter_type == long_integer_type_node
1984      ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1985  gimple *g
1986    = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1987			 build_fold_addr_expr (counts[fd->ordered]));
1988  gimple_set_location (g, loc);
1989  gsi_insert_before (gsi, g, GSI_SAME_STMT);
1990}
1991
1992/* Expand a single depend from #pragma omp ordered depend(sink:...).  */
1993
1994static void
1995expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1996			 tree *counts, tree c, location_t loc)
1997{
1998  auto_vec<tree, 10> args;
1999  enum built_in_function sink_ix
2000    = fd->iter_type == long_integer_type_node
2001      ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2002  tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2003  int i;
2004  gimple_stmt_iterator gsi2 = *gsi;
2005  bool warned_step = false;
2006
2007  for (i = 0; i < fd->ordered; i++)
2008    {
2009      tree step = NULL_TREE;
2010      off = TREE_PURPOSE (deps);
2011      if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2012	{
2013	  step = TREE_OPERAND (off, 1);
2014	  off = TREE_OPERAND (off, 0);
2015	}
2016      if (!integer_zerop (off))
2017	{
2018	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2019		      || fd->loops[i].cond_code == GT_EXPR);
2020	  bool forward = fd->loops[i].cond_code == LT_EXPR;
2021	  if (step)
2022	    {
2023	      /* Non-simple Fortran DO loops.  If step is variable,
2024		 we don't know at compile even the direction, so can't
2025		 warn.  */
2026	      if (TREE_CODE (step) != INTEGER_CST)
2027		break;
2028	      forward = tree_int_cst_sgn (step) != -1;
2029	    }
2030	  if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2031	    warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2032				"lexically later iteration");
2033	  break;
2034	}
2035      deps = TREE_CHAIN (deps);
2036    }
2037  /* If all offsets corresponding to the collapsed loops are zero,
2038     this depend clause can be ignored.  FIXME: but there is still a
2039     flush needed.  We need to emit one __sync_synchronize () for it
2040     though (perhaps conditionally)?  Solve this together with the
2041     conservative dependence folding optimization.
2042  if (i >= fd->collapse)
2043    return;  */
2044
2045  deps = OMP_CLAUSE_DECL (c);
2046  gsi_prev (&gsi2);
2047  edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2048  edge e2 = split_block_after_labels (e1->dest);
2049
2050  gsi2 = gsi_after_labels (e1->dest);
2051  *gsi = gsi_last_bb (e1->src);
2052  for (i = 0; i < fd->ordered; i++)
2053    {
2054      tree itype = TREE_TYPE (fd->loops[i].v);
2055      tree step = NULL_TREE;
2056      tree orig_off = NULL_TREE;
2057      if (POINTER_TYPE_P (itype))
2058	itype = sizetype;
2059      if (i)
2060	deps = TREE_CHAIN (deps);
2061      off = TREE_PURPOSE (deps);
2062      if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2063	{
2064	  step = TREE_OPERAND (off, 1);
2065	  off = TREE_OPERAND (off, 0);
2066	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2067		      && integer_onep (fd->loops[i].step)
2068		      && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2069	}
2070      tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2071      if (step)
2072	{
2073	  off = fold_convert_loc (loc, itype, off);
2074	  orig_off = off;
2075	  off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2076	}
2077
2078      if (integer_zerop (off))
2079	t = boolean_true_node;
2080      else
2081	{
2082	  tree a;
2083	  tree co = fold_convert_loc (loc, itype, off);
2084	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2085	    {
2086	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2087		co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2088	      a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2089				   TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2090				   co);
2091	    }
2092	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2093	    a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2094				 fd->loops[i].v, co);
2095	  else
2096	    a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2097				 fd->loops[i].v, co);
2098	  if (step)
2099	    {
2100	      tree t1, t2;
2101	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2102		t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2103				      fd->loops[i].n1);
2104	      else
2105		t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2106				      fd->loops[i].n2);
2107	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2108		t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2109				      fd->loops[i].n2);
2110	      else
2111		t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2112				      fd->loops[i].n1);
2113	      t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2114				   step, build_int_cst (TREE_TYPE (step), 0));
2115	      if (TREE_CODE (step) != INTEGER_CST)
2116		{
2117		  t1 = unshare_expr (t1);
2118		  t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2119						 false, GSI_CONTINUE_LINKING);
2120		  t2 = unshare_expr (t2);
2121		  t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2122						 false, GSI_CONTINUE_LINKING);
2123		}
2124	      t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2125				   t, t2, t1);
2126	    }
2127	  else if (fd->loops[i].cond_code == LT_EXPR)
2128	    {
2129	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2130		t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2131				     fd->loops[i].n1);
2132	      else
2133		t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2134				     fd->loops[i].n2);
2135	    }
2136	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2137	    t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2138				 fd->loops[i].n2);
2139	  else
2140	    t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2141				 fd->loops[i].n1);
2142	}
2143      if (cond)
2144	cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2145      else
2146	cond = t;
2147
2148      off = fold_convert_loc (loc, itype, off);
2149
2150      if (step
2151	  || (fd->loops[i].cond_code == LT_EXPR
2152	      ? !integer_onep (fd->loops[i].step)
2153	      : !integer_minus_onep (fd->loops[i].step)))
2154	{
2155	  if (step == NULL_TREE
2156	      && TYPE_UNSIGNED (itype)
2157	      && fd->loops[i].cond_code == GT_EXPR)
2158	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2159				 fold_build1_loc (loc, NEGATE_EXPR, itype,
2160						  s));
2161	  else
2162	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2163				 orig_off ? orig_off : off, s);
2164	  t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2165			       build_int_cst (itype, 0));
2166	  if (integer_zerop (t) && !warned_step)
2167	    {
2168	      warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2169				  "in the iteration space");
2170	      warned_step = true;
2171	    }
2172	  cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2173				  cond, t);
2174	}
2175
2176      if (i <= fd->collapse - 1 && fd->collapse > 1)
2177	t = fd->loop.v;
2178      else if (counts[i])
2179	t = counts[i];
2180      else
2181	{
2182	  t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2183			       fd->loops[i].v, fd->loops[i].n1);
2184	  t = fold_convert_loc (loc, fd->iter_type, t);
2185	}
2186      if (step)
2187	/* We have divided off by step already earlier.  */;
2188      else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2189	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2190			       fold_build1_loc (loc, NEGATE_EXPR, itype,
2191						s));
2192      else
2193	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2194      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2195	off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2196      off = fold_convert_loc (loc, fd->iter_type, off);
2197      if (i <= fd->collapse - 1 && fd->collapse > 1)
2198	{
2199	  if (i)
2200	    off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2201				   off);
2202	  if (i < fd->collapse - 1)
2203	    {
2204	      coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2205				      counts[i]);
2206	      continue;
2207	    }
2208	}
2209      off = unshare_expr (off);
2210      t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2211      t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2212				    true, GSI_SAME_STMT);
2213      args.safe_push (t);
2214    }
2215  gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2216  gimple_set_location (g, loc);
2217  gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2218
2219  cond = unshare_expr (cond);
2220  cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2221				   GSI_CONTINUE_LINKING);
2222  gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2223  edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2224  e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2225  e1->probability = e3->probability.invert ();
2226  e1->flags = EDGE_TRUE_VALUE;
2227  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2228
2229  *gsi = gsi_after_labels (e2->dest);
2230}
2231
2232/* Expand all #pragma omp ordered depend(source) and
2233   #pragma omp ordered depend(sink:...) constructs in the current
2234   #pragma omp for ordered(n) region.  */
2235
2236static void
2237expand_omp_ordered_source_sink (struct omp_region *region,
2238				struct omp_for_data *fd, tree *counts,
2239				basic_block cont_bb)
2240{
2241  struct omp_region *inner;
2242  int i;
2243  for (i = fd->collapse - 1; i < fd->ordered; i++)
2244    if (i == fd->collapse - 1 && fd->collapse > 1)
2245      counts[i] = NULL_TREE;
2246    else if (i >= fd->collapse && !cont_bb)
2247      counts[i] = build_zero_cst (fd->iter_type);
2248    else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2249	     && integer_onep (fd->loops[i].step))
2250      counts[i] = NULL_TREE;
2251    else
2252      counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2253  tree atype
2254    = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2255  counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2256  TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2257
2258  for (inner = region->inner; inner; inner = inner->next)
2259    if (inner->type == GIMPLE_OMP_ORDERED)
2260      {
2261	gomp_ordered *ord_stmt = inner->ord_stmt;
2262	gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2263	location_t loc = gimple_location (ord_stmt);
2264	tree c;
2265	for (c = gimple_omp_ordered_clauses (ord_stmt);
2266	     c; c = OMP_CLAUSE_CHAIN (c))
2267	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2268	    break;
2269	if (c)
2270	  expand_omp_ordered_source (&gsi, fd, counts, loc);
2271	for (c = gimple_omp_ordered_clauses (ord_stmt);
2272	     c; c = OMP_CLAUSE_CHAIN (c))
2273	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2274	    expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2275	gsi_remove (&gsi, true);
2276      }
2277}
2278
2279/* Wrap the body into fd->ordered - fd->collapse loops that aren't
2280   collapsed.  */
2281
2282static basic_block
2283expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2284			      basic_block cont_bb, basic_block body_bb,
2285			      bool ordered_lastprivate)
2286{
2287  if (fd->ordered == fd->collapse)
2288    return cont_bb;
2289
2290  if (!cont_bb)
2291    {
2292      gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2293      for (int i = fd->collapse; i < fd->ordered; i++)
2294	{
2295	  tree type = TREE_TYPE (fd->loops[i].v);
2296	  tree n1 = fold_convert (type, fd->loops[i].n1);
2297	  expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2298	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2299			      size_int (i - fd->collapse + 1),
2300			      NULL_TREE, NULL_TREE);
2301	  expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2302	}
2303      return NULL;
2304    }
2305
2306  for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2307    {
2308      tree t, type = TREE_TYPE (fd->loops[i].v);
2309      gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2310      expand_omp_build_assign (&gsi, fd->loops[i].v,
2311			       fold_convert (type, fd->loops[i].n1));
2312      if (counts[i])
2313	expand_omp_build_assign (&gsi, counts[i],
2314				 build_zero_cst (fd->iter_type));
2315      tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2316			  size_int (i - fd->collapse + 1),
2317			  NULL_TREE, NULL_TREE);
2318      expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2319      if (!gsi_end_p (gsi))
2320	gsi_prev (&gsi);
2321      else
2322	gsi = gsi_last_bb (body_bb);
2323      edge e1 = split_block (body_bb, gsi_stmt (gsi));
2324      basic_block new_body = e1->dest;
2325      if (body_bb == cont_bb)
2326	cont_bb = new_body;
2327      edge e2 = NULL;
2328      basic_block new_header;
2329      if (EDGE_COUNT (cont_bb->preds) > 0)
2330	{
2331	  gsi = gsi_last_bb (cont_bb);
2332	  if (POINTER_TYPE_P (type))
2333	    t = fold_build_pointer_plus (fd->loops[i].v,
2334					 fold_convert (sizetype,
2335						       fd->loops[i].step));
2336	  else
2337	    t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2338			     fold_convert (type, fd->loops[i].step));
2339	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2340	  if (counts[i])
2341	    {
2342	      t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2343			       build_int_cst (fd->iter_type, 1));
2344	      expand_omp_build_assign (&gsi, counts[i], t);
2345	      t = counts[i];
2346	    }
2347	  else
2348	    {
2349	      t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2350			       fd->loops[i].v, fd->loops[i].n1);
2351	      t = fold_convert (fd->iter_type, t);
2352	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2353					    true, GSI_SAME_STMT);
2354	    }
2355	  aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2356			 size_int (i - fd->collapse + 1),
2357			 NULL_TREE, NULL_TREE);
2358	  expand_omp_build_assign (&gsi, aref, t);
2359	  gsi_prev (&gsi);
2360	  e2 = split_block (cont_bb, gsi_stmt (gsi));
2361	  new_header = e2->dest;
2362	}
2363      else
2364	new_header = cont_bb;
2365      gsi = gsi_after_labels (new_header);
2366      tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2367					 true, GSI_SAME_STMT);
2368      tree n2
2369	= force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2370				    true, NULL_TREE, true, GSI_SAME_STMT);
2371      t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2372      gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2373      edge e3 = split_block (new_header, gsi_stmt (gsi));
2374      cont_bb = e3->dest;
2375      remove_edge (e1);
2376      make_edge (body_bb, new_header, EDGE_FALLTHRU);
2377      e3->flags = EDGE_FALSE_VALUE;
2378      e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2379      e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2380      e1->probability = e3->probability.invert ();
2381
2382      set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2383      set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2384
2385      if (e2)
2386	{
2387	  struct loop *loop = alloc_loop ();
2388	  loop->header = new_header;
2389	  loop->latch = e2->src;
2390	  add_loop (loop, body_bb->loop_father);
2391	}
2392    }
2393
2394  /* If there are any lastprivate clauses and it is possible some loops
2395     might have zero iterations, ensure all the decls are initialized,
2396     otherwise we could crash evaluating C++ class iterators with lastprivate
2397     clauses.  */
2398  bool need_inits = false;
2399  for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2400    if (need_inits)
2401      {
2402	tree type = TREE_TYPE (fd->loops[i].v);
2403	gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2404	expand_omp_build_assign (&gsi, fd->loops[i].v,
2405				 fold_convert (type, fd->loops[i].n1));
2406      }
2407    else
2408      {
2409	tree type = TREE_TYPE (fd->loops[i].v);
2410	tree this_cond = fold_build2 (fd->loops[i].cond_code,
2411				      boolean_type_node,
2412				      fold_convert (type, fd->loops[i].n1),
2413				      fold_convert (type, fd->loops[i].n2));
2414	if (!integer_onep (this_cond))
2415	  need_inits = true;
2416      }
2417
2418  return cont_bb;
2419}
2420
2421/* A subroutine of expand_omp_for.  Generate code for a parallel
2422   loop with any schedule.  Given parameters:
2423
2424	for (V = N1; V cond N2; V += STEP) BODY;
2425
2426   where COND is "<" or ">", we generate pseudocode
2427
2428	more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2429	if (more) goto L0; else goto L3;
2430    L0:
2431	V = istart0;
2432	iend = iend0;
2433    L1:
2434	BODY;
2435	V += STEP;
2436	if (V cond iend) goto L1; else goto L2;
2437    L2:
2438	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2439    L3:
2440
2441    If this is a combined omp parallel loop, instead of the call to
2442    GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2443    If this is gimple_omp_for_combined_p loop, then instead of assigning
2444    V and iend in L0 we assign the first two _looptemp_ clause decls of the
2445    inner GIMPLE_OMP_FOR and V += STEP; and
2446    if (V cond iend) goto L1; else goto L2; are removed.
2447
2448    For collapsed loops, given parameters:
2449      collapse(3)
2450      for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2451	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2452	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2453	    BODY;
2454
2455    we generate pseudocode
2456
2457	if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2458	if (cond3 is <)
2459	  adj = STEP3 - 1;
2460	else
2461	  adj = STEP3 + 1;
2462	count3 = (adj + N32 - N31) / STEP3;
2463	if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2464	if (cond2 is <)
2465	  adj = STEP2 - 1;
2466	else
2467	  adj = STEP2 + 1;
2468	count2 = (adj + N22 - N21) / STEP2;
2469	if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2470	if (cond1 is <)
2471	  adj = STEP1 - 1;
2472	else
2473	  adj = STEP1 + 1;
2474	count1 = (adj + N12 - N11) / STEP1;
2475	count = count1 * count2 * count3;
2476	goto Z1;
2477    Z0:
2478	count = 0;
2479    Z1:
2480	more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2481	if (more) goto L0; else goto L3;
2482    L0:
2483	V = istart0;
2484	T = V;
2485	V3 = N31 + (T % count3) * STEP3;
2486	T = T / count3;
2487	V2 = N21 + (T % count2) * STEP2;
2488	T = T / count2;
2489	V1 = N11 + T * STEP1;
2490	iend = iend0;
2491    L1:
2492	BODY;
2493	V += 1;
2494	if (V < iend) goto L10; else goto L2;
2495    L10:
2496	V3 += STEP3;
2497	if (V3 cond3 N32) goto L1; else goto L11;
2498    L11:
2499	V3 = N31;
2500	V2 += STEP2;
2501	if (V2 cond2 N22) goto L1; else goto L12;
2502    L12:
2503	V2 = N21;
2504	V1 += STEP1;
2505	goto L1;
2506    L2:
2507	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2508    L3:
2509
2510      */
2511
2512static void
2513expand_omp_for_generic (struct omp_region *region,
2514			struct omp_for_data *fd,
2515			enum built_in_function start_fn,
2516			enum built_in_function next_fn,
2517			gimple *inner_stmt)
2518{
2519  tree type, istart0, iend0, iend;
2520  tree t, vmain, vback, bias = NULL_TREE;
2521  basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2522  basic_block l2_bb = NULL, l3_bb = NULL;
2523  gimple_stmt_iterator gsi;
2524  gassign *assign_stmt;
2525  bool in_combined_parallel = is_combined_parallel (region);
2526  bool broken_loop = region->cont == NULL;
2527  edge e, ne;
2528  tree *counts = NULL;
2529  int i;
2530  bool ordered_lastprivate = false;
2531
2532  gcc_assert (!broken_loop || !in_combined_parallel);
2533  gcc_assert (fd->iter_type == long_integer_type_node
2534	      || !in_combined_parallel);
2535
2536  entry_bb = region->entry;
2537  cont_bb = region->cont;
2538  collapse_bb = NULL;
2539  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2540  gcc_assert (broken_loop
2541	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2542  l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2543  l1_bb = single_succ (l0_bb);
2544  if (!broken_loop)
2545    {
2546      l2_bb = create_empty_bb (cont_bb);
2547      gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2548		  || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2549		      == l1_bb));
2550      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2551    }
2552  else
2553    l2_bb = NULL;
2554  l3_bb = BRANCH_EDGE (entry_bb)->dest;
2555  exit_bb = region->exit;
2556
2557  gsi = gsi_last_nondebug_bb (entry_bb);
2558
2559  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2560  if (fd->ordered
2561      && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2562			  OMP_CLAUSE_LASTPRIVATE))
2563    ordered_lastprivate = false;
2564  if (fd->collapse > 1 || fd->ordered)
2565    {
2566      int first_zero_iter1 = -1, first_zero_iter2 = -1;
2567      basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2568
2569      counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2570      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2571				  zero_iter1_bb, first_zero_iter1,
2572				  zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2573
2574      if (zero_iter1_bb)
2575	{
2576	  /* Some counts[i] vars might be uninitialized if
2577	     some loop has zero iterations.  But the body shouldn't
2578	     be executed in that case, so just avoid uninit warnings.  */
2579	  for (i = first_zero_iter1;
2580	       i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2581	    if (SSA_VAR_P (counts[i]))
2582	      TREE_NO_WARNING (counts[i]) = 1;
2583	  gsi_prev (&gsi);
2584	  e = split_block (entry_bb, gsi_stmt (gsi));
2585	  entry_bb = e->dest;
2586	  make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2587	  gsi = gsi_last_nondebug_bb (entry_bb);
2588	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2589				   get_immediate_dominator (CDI_DOMINATORS,
2590							    zero_iter1_bb));
2591	}
2592      if (zero_iter2_bb)
2593	{
2594	  /* Some counts[i] vars might be uninitialized if
2595	     some loop has zero iterations.  But the body shouldn't
2596	     be executed in that case, so just avoid uninit warnings.  */
2597	  for (i = first_zero_iter2; i < fd->ordered; i++)
2598	    if (SSA_VAR_P (counts[i]))
2599	      TREE_NO_WARNING (counts[i]) = 1;
2600	  if (zero_iter1_bb)
2601	    make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2602	  else
2603	    {
2604	      gsi_prev (&gsi);
2605	      e = split_block (entry_bb, gsi_stmt (gsi));
2606	      entry_bb = e->dest;
2607	      make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2608	      gsi = gsi_last_nondebug_bb (entry_bb);
2609	      set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2610				       get_immediate_dominator
2611					 (CDI_DOMINATORS, zero_iter2_bb));
2612	    }
2613	}
2614      if (fd->collapse == 1)
2615	{
2616	  counts[0] = fd->loop.n2;
2617	  fd->loop = fd->loops[0];
2618	}
2619    }
2620
2621  type = TREE_TYPE (fd->loop.v);
2622  istart0 = create_tmp_var (fd->iter_type, ".istart0");
2623  iend0 = create_tmp_var (fd->iter_type, ".iend0");
2624  TREE_ADDRESSABLE (istart0) = 1;
2625  TREE_ADDRESSABLE (iend0) = 1;
2626
2627  /* See if we need to bias by LLONG_MIN.  */
2628  if (fd->iter_type == long_long_unsigned_type_node
2629      && TREE_CODE (type) == INTEGER_TYPE
2630      && !TYPE_UNSIGNED (type)
2631      && fd->ordered == 0)
2632    {
2633      tree n1, n2;
2634
2635      if (fd->loop.cond_code == LT_EXPR)
2636	{
2637	  n1 = fd->loop.n1;
2638	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2639	}
2640      else
2641	{
2642	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2643	  n2 = fd->loop.n1;
2644	}
2645      if (TREE_CODE (n1) != INTEGER_CST
2646	  || TREE_CODE (n2) != INTEGER_CST
2647	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2648	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2649    }
2650
2651  gimple_stmt_iterator gsif = gsi;
2652  gsi_prev (&gsif);
2653
2654  tree arr = NULL_TREE;
2655  if (in_combined_parallel)
2656    {
2657      gcc_assert (fd->ordered == 0);
2658      /* In a combined parallel loop, emit a call to
2659	 GOMP_loop_foo_next.  */
2660      t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2661			   build_fold_addr_expr (istart0),
2662			   build_fold_addr_expr (iend0));
2663    }
2664  else
2665    {
2666      tree t0, t1, t2, t3, t4;
2667      /* If this is not a combined parallel loop, emit a call to
2668	 GOMP_loop_foo_start in ENTRY_BB.  */
2669      t4 = build_fold_addr_expr (iend0);
2670      t3 = build_fold_addr_expr (istart0);
2671      if (fd->ordered)
2672	{
2673	  t0 = build_int_cst (unsigned_type_node,
2674			      fd->ordered - fd->collapse + 1);
2675	  arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2676							fd->ordered
2677							- fd->collapse + 1),
2678				".omp_counts");
2679	  DECL_NAMELESS (arr) = 1;
2680	  TREE_ADDRESSABLE (arr) = 1;
2681	  TREE_STATIC (arr) = 1;
2682	  vec<constructor_elt, va_gc> *v;
2683	  vec_alloc (v, fd->ordered - fd->collapse + 1);
2684	  int idx;
2685
2686	  for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2687	    {
2688	      tree c;
2689	      if (idx == 0 && fd->collapse > 1)
2690		c = fd->loop.n2;
2691	      else
2692		c = counts[idx + fd->collapse - 1];
2693	      tree purpose = size_int (idx);
2694	      CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2695	      if (TREE_CODE (c) != INTEGER_CST)
2696		TREE_STATIC (arr) = 0;
2697	    }
2698
2699	  DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2700	  if (!TREE_STATIC (arr))
2701	    force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2702						    void_type_node, arr),
2703				      true, NULL_TREE, true, GSI_SAME_STMT);
2704	  t1 = build_fold_addr_expr (arr);
2705	  t2 = NULL_TREE;
2706	}
2707      else
2708	{
2709	  t2 = fold_convert (fd->iter_type, fd->loop.step);
2710	  t1 = fd->loop.n2;
2711	  t0 = fd->loop.n1;
2712	  if (gimple_omp_for_combined_into_p (fd->for_stmt))
2713	    {
2714	      tree innerc
2715		= omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2716				   OMP_CLAUSE__LOOPTEMP_);
2717	      gcc_assert (innerc);
2718	      t0 = OMP_CLAUSE_DECL (innerc);
2719	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2720					OMP_CLAUSE__LOOPTEMP_);
2721	      gcc_assert (innerc);
2722	      t1 = OMP_CLAUSE_DECL (innerc);
2723	    }
2724	  if (POINTER_TYPE_P (TREE_TYPE (t0))
2725	      && TYPE_PRECISION (TREE_TYPE (t0))
2726		 != TYPE_PRECISION (fd->iter_type))
2727	    {
2728	      /* Avoid casting pointers to integer of a different size.  */
2729	      tree itype = signed_type_for (type);
2730	      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2731	      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2732	    }
2733	  else
2734	    {
2735	      t1 = fold_convert (fd->iter_type, t1);
2736	      t0 = fold_convert (fd->iter_type, t0);
2737	    }
2738	  if (bias)
2739	    {
2740	      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2741	      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2742	    }
2743	}
2744      if (fd->iter_type == long_integer_type_node || fd->ordered)
2745	{
2746	  if (fd->chunk_size)
2747	    {
2748	      t = fold_convert (fd->iter_type, fd->chunk_size);
2749	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2750	      if (fd->ordered)
2751		t = build_call_expr (builtin_decl_explicit (start_fn),
2752				     5, t0, t1, t, t3, t4);
2753	      else
2754		t = build_call_expr (builtin_decl_explicit (start_fn),
2755				     6, t0, t1, t2, t, t3, t4);
2756	    }
2757	  else if (fd->ordered)
2758	    t = build_call_expr (builtin_decl_explicit (start_fn),
2759				 4, t0, t1, t3, t4);
2760	  else
2761	    t = build_call_expr (builtin_decl_explicit (start_fn),
2762				 5, t0, t1, t2, t3, t4);
2763	}
2764      else
2765	{
2766	  tree t5;
2767	  tree c_bool_type;
2768	  tree bfn_decl;
2769
2770	  /* The GOMP_loop_ull_*start functions have additional boolean
2771	     argument, true for < loops and false for > loops.
2772	     In Fortran, the C bool type can be different from
2773	     boolean_type_node.  */
2774	  bfn_decl = builtin_decl_explicit (start_fn);
2775	  c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2776	  t5 = build_int_cst (c_bool_type,
2777			      fd->loop.cond_code == LT_EXPR ? 1 : 0);
2778	  if (fd->chunk_size)
2779	    {
2780	      tree bfn_decl = builtin_decl_explicit (start_fn);
2781	      t = fold_convert (fd->iter_type, fd->chunk_size);
2782	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2783	      t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2784	    }
2785	  else
2786	    t = build_call_expr (builtin_decl_explicit (start_fn),
2787				 6, t5, t0, t1, t2, t3, t4);
2788	}
2789    }
2790  if (TREE_TYPE (t) != boolean_type_node)
2791    t = fold_build2 (NE_EXPR, boolean_type_node,
2792		     t, build_int_cst (TREE_TYPE (t), 0));
2793  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2794				true, GSI_SAME_STMT);
2795  if (arr && !TREE_STATIC (arr))
2796    {
2797      tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2798      TREE_THIS_VOLATILE (clobber) = 1;
2799      gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2800			 GSI_SAME_STMT);
2801    }
2802  gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2803
2804  /* Remove the GIMPLE_OMP_FOR statement.  */
2805  gsi_remove (&gsi, true);
2806
2807  if (gsi_end_p (gsif))
2808    gsif = gsi_after_labels (gsi_bb (gsif));
2809  gsi_next (&gsif);
2810
2811  /* Iteration setup for sequential loop goes in L0_BB.  */
2812  tree startvar = fd->loop.v;
2813  tree endvar = NULL_TREE;
2814
2815  if (gimple_omp_for_combined_p (fd->for_stmt))
2816    {
2817      gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2818		  && gimple_omp_for_kind (inner_stmt)
2819		     == GF_OMP_FOR_KIND_SIMD);
2820      tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2821				     OMP_CLAUSE__LOOPTEMP_);
2822      gcc_assert (innerc);
2823      startvar = OMP_CLAUSE_DECL (innerc);
2824      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2825				OMP_CLAUSE__LOOPTEMP_);
2826      gcc_assert (innerc);
2827      endvar = OMP_CLAUSE_DECL (innerc);
2828    }
2829
2830  gsi = gsi_start_bb (l0_bb);
2831  t = istart0;
2832  if (fd->ordered && fd->collapse == 1)
2833    t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2834		     fold_convert (fd->iter_type, fd->loop.step));
2835  else if (bias)
2836    t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2837  if (fd->ordered && fd->collapse == 1)
2838    {
2839      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2840	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2841			 fd->loop.n1, fold_convert (sizetype, t));
2842      else
2843	{
2844	  t = fold_convert (TREE_TYPE (startvar), t);
2845	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2846			   fd->loop.n1, t);
2847	}
2848    }
2849  else
2850    {
2851      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2852	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2853      t = fold_convert (TREE_TYPE (startvar), t);
2854    }
2855  t = force_gimple_operand_gsi (&gsi, t,
2856				DECL_P (startvar)
2857				&& TREE_ADDRESSABLE (startvar),
2858				NULL_TREE, false, GSI_CONTINUE_LINKING);
2859  assign_stmt = gimple_build_assign (startvar, t);
2860  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2861
2862  t = iend0;
2863  if (fd->ordered && fd->collapse == 1)
2864    t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2865		     fold_convert (fd->iter_type, fd->loop.step));
2866  else if (bias)
2867    t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2868  if (fd->ordered && fd->collapse == 1)
2869    {
2870      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2871	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2872			 fd->loop.n1, fold_convert (sizetype, t));
2873      else
2874	{
2875	  t = fold_convert (TREE_TYPE (startvar), t);
2876	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2877			   fd->loop.n1, t);
2878	}
2879    }
2880  else
2881    {
2882      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2883	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2884      t = fold_convert (TREE_TYPE (startvar), t);
2885    }
2886  iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2887				   false, GSI_CONTINUE_LINKING);
2888  if (endvar)
2889    {
2890      assign_stmt = gimple_build_assign (endvar, iend);
2891      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2892      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2893	assign_stmt = gimple_build_assign (fd->loop.v, iend);
2894      else
2895	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2896      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2897    }
2898  /* Handle linear clause adjustments.  */
2899  tree itercnt = NULL_TREE;
2900  if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2901    for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2902	 c; c = OMP_CLAUSE_CHAIN (c))
2903      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2904	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2905	{
2906	  tree d = OMP_CLAUSE_DECL (c);
2907	  bool is_ref = omp_is_reference (d);
2908	  tree t = d, a, dest;
2909	  if (is_ref)
2910	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2911	  tree type = TREE_TYPE (t);
2912	  if (POINTER_TYPE_P (type))
2913	    type = sizetype;
2914	  dest = unshare_expr (t);
2915	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
2916	  expand_omp_build_assign (&gsif, v, t);
2917	  if (itercnt == NULL_TREE)
2918	    {
2919	      itercnt = startvar;
2920	      tree n1 = fd->loop.n1;
2921	      if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2922		{
2923		  itercnt
2924		    = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2925				    itercnt);
2926		  n1 = fold_convert (TREE_TYPE (itercnt), n1);
2927		}
2928	      itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2929				     itercnt, n1);
2930	      itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2931				     itercnt, fd->loop.step);
2932	      itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2933						  NULL_TREE, false,
2934						  GSI_CONTINUE_LINKING);
2935	    }
2936	  a = fold_build2 (MULT_EXPR, type,
2937			   fold_convert (type, itercnt),
2938			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2939	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2940			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2941	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2942					false, GSI_CONTINUE_LINKING);
2943	  assign_stmt = gimple_build_assign (dest, t);
2944	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2945	}
2946  if (fd->collapse > 1)
2947    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2948
2949  if (fd->ordered)
2950    {
2951      /* Until now, counts array contained number of iterations or
2952	 variable containing it for ith loop.  From now on, we need
2953	 those counts only for collapsed loops, and only for the 2nd
2954	 till the last collapsed one.  Move those one element earlier,
2955	 we'll use counts[fd->collapse - 1] for the first source/sink
2956	 iteration counter and so on and counts[fd->ordered]
2957	 as the array holding the current counter values for
2958	 depend(source).  */
2959      if (fd->collapse > 1)
2960	memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2961      if (broken_loop)
2962	{
2963	  int i;
2964	  for (i = fd->collapse; i < fd->ordered; i++)
2965	    {
2966	      tree type = TREE_TYPE (fd->loops[i].v);
2967	      tree this_cond
2968		= fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2969			       fold_convert (type, fd->loops[i].n1),
2970			       fold_convert (type, fd->loops[i].n2));
2971	      if (!integer_onep (this_cond))
2972		break;
2973	    }
2974	  if (i < fd->ordered)
2975	    {
2976	      cont_bb
2977		= create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2978	      add_bb_to_loop (cont_bb, l1_bb->loop_father);
2979	      gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2980	      gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2981	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2982	      make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2983	      make_edge (cont_bb, l1_bb, 0);
2984	      l2_bb = create_empty_bb (cont_bb);
2985	      broken_loop = false;
2986	    }
2987	}
2988      expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2989      cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2990					      ordered_lastprivate);
2991      if (counts[fd->collapse - 1])
2992	{
2993	  gcc_assert (fd->collapse == 1);
2994	  gsi = gsi_last_bb (l0_bb);
2995	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2996				   istart0, true);
2997	  gsi = gsi_last_bb (cont_bb);
2998	  t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2999			   build_int_cst (fd->iter_type, 1));
3000	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3001	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3002			      size_zero_node, NULL_TREE, NULL_TREE);
3003	  expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3004	  t = counts[fd->collapse - 1];
3005	}
3006      else if (fd->collapse > 1)
3007	t = fd->loop.v;
3008      else
3009	{
3010	  t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3011			   fd->loops[0].v, fd->loops[0].n1);
3012	  t = fold_convert (fd->iter_type, t);
3013	}
3014      gsi = gsi_last_bb (l0_bb);
3015      tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3016			  size_zero_node, NULL_TREE, NULL_TREE);
3017      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3018				    false, GSI_CONTINUE_LINKING);
3019      expand_omp_build_assign (&gsi, aref, t, true);
3020    }
3021
3022  if (!broken_loop)
3023    {
3024      /* Code to control the increment and predicate for the sequential
3025	 loop goes in the CONT_BB.  */
3026      gsi = gsi_last_nondebug_bb (cont_bb);
3027      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3028      gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3029      vmain = gimple_omp_continue_control_use (cont_stmt);
3030      vback = gimple_omp_continue_control_def (cont_stmt);
3031
3032      if (!gimple_omp_for_combined_p (fd->for_stmt))
3033	{
3034	  if (POINTER_TYPE_P (type))
3035	    t = fold_build_pointer_plus (vmain, fd->loop.step);
3036	  else
3037	    t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3038	  t = force_gimple_operand_gsi (&gsi, t,
3039					DECL_P (vback)
3040					&& TREE_ADDRESSABLE (vback),
3041					NULL_TREE, true, GSI_SAME_STMT);
3042	  assign_stmt = gimple_build_assign (vback, t);
3043	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3044
3045	  if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3046	    {
3047	      tree tem;
3048	      if (fd->collapse > 1)
3049		tem = fd->loop.v;
3050	      else
3051		{
3052		  tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3053				     fd->loops[0].v, fd->loops[0].n1);
3054		  tem = fold_convert (fd->iter_type, tem);
3055		}
3056	      tree aref = build4 (ARRAY_REF, fd->iter_type,
3057				  counts[fd->ordered], size_zero_node,
3058				  NULL_TREE, NULL_TREE);
3059	      tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3060					      true, GSI_SAME_STMT);
3061	      expand_omp_build_assign (&gsi, aref, tem);
3062	    }
3063
3064	  t = build2 (fd->loop.cond_code, boolean_type_node,
3065		      DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3066		      iend);
3067	  gcond *cond_stmt = gimple_build_cond_empty (t);
3068	  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3069	}
3070
3071      /* Remove GIMPLE_OMP_CONTINUE.  */
3072      gsi_remove (&gsi, true);
3073
3074      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3075	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3076
3077      /* Emit code to get the next parallel iteration in L2_BB.  */
3078      gsi = gsi_start_bb (l2_bb);
3079
3080      t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3081			   build_fold_addr_expr (istart0),
3082			   build_fold_addr_expr (iend0));
3083      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3084				    false, GSI_CONTINUE_LINKING);
3085      if (TREE_TYPE (t) != boolean_type_node)
3086	t = fold_build2 (NE_EXPR, boolean_type_node,
3087			 t, build_int_cst (TREE_TYPE (t), 0));
3088      gcond *cond_stmt = gimple_build_cond_empty (t);
3089      gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3090    }
3091
3092  /* Add the loop cleanup function.  */
3093  gsi = gsi_last_nondebug_bb (exit_bb);
3094  if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3095    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3096  else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3097    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3098  else
3099    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3100  gcall *call_stmt = gimple_build_call (t, 0);
3101  if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3102    gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3103  gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3104  if (fd->ordered)
3105    {
3106      tree arr = counts[fd->ordered];
3107      tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3108      TREE_THIS_VOLATILE (clobber) = 1;
3109      gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3110			GSI_SAME_STMT);
3111    }
3112  gsi_remove (&gsi, true);
3113
3114  /* Connect the new blocks.  */
3115  find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3116  find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3117
3118  if (!broken_loop)
3119    {
3120      gimple_seq phis;
3121
3122      e = find_edge (cont_bb, l3_bb);
3123      ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3124
3125      phis = phi_nodes (l3_bb);
3126      for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3127	{
3128	  gimple *phi = gsi_stmt (gsi);
3129	  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3130		   PHI_ARG_DEF_FROM_EDGE (phi, e));
3131	}
3132      remove_edge (e);
3133
3134      make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3135      e = find_edge (cont_bb, l1_bb);
3136      if (e == NULL)
3137	{
3138	  e = BRANCH_EDGE (cont_bb);
3139	  gcc_assert (single_succ (e->dest) == l1_bb);
3140	}
3141      if (gimple_omp_for_combined_p (fd->for_stmt))
3142	{
3143	  remove_edge (e);
3144	  e = NULL;
3145	}
3146      else if (fd->collapse > 1)
3147	{
3148	  remove_edge (e);
3149	  e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3150	}
3151      else
3152	e->flags = EDGE_TRUE_VALUE;
3153      if (e)
3154	{
3155	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3156	  find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3157	}
3158      else
3159	{
3160	  e = find_edge (cont_bb, l2_bb);
3161	  e->flags = EDGE_FALLTHRU;
3162	}
3163      make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3164
3165      if (gimple_in_ssa_p (cfun))
3166	{
3167	  /* Add phis to the outer loop that connect to the phis in the inner,
3168	     original loop, and move the loop entry value of the inner phi to
3169	     the loop entry value of the outer phi.  */
3170	  gphi_iterator psi;
3171	  for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3172	    {
3173	      source_location locus;
3174	      gphi *nphi;
3175	      gphi *exit_phi = psi.phi ();
3176
3177	      if (virtual_operand_p (gimple_phi_result (exit_phi)))
3178		continue;
3179
3180	      edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3181	      tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3182
3183	      basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3184	      edge latch_to_l1 = find_edge (latch, l1_bb);
3185	      gphi *inner_phi
3186		= find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3187
3188	      tree t = gimple_phi_result (exit_phi);
3189	      tree new_res = copy_ssa_name (t, NULL);
3190	      nphi = create_phi_node (new_res, l0_bb);
3191
3192	      edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3193	      t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3194	      locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3195	      edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3196	      add_phi_arg (nphi, t, entry_to_l0, locus);
3197
3198	      edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3199	      add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3200
3201	      add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3202	    }
3203	}
3204
3205      set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3206			       recompute_dominator (CDI_DOMINATORS, l2_bb));
3207      set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3208			       recompute_dominator (CDI_DOMINATORS, l3_bb));
3209      set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3210			       recompute_dominator (CDI_DOMINATORS, l0_bb));
3211      set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3212			       recompute_dominator (CDI_DOMINATORS, l1_bb));
3213
3214      /* We enter expand_omp_for_generic with a loop.  This original loop may
3215	 have its own loop struct, or it may be part of an outer loop struct
3216	 (which may be the fake loop).  */
3217      struct loop *outer_loop = entry_bb->loop_father;
3218      bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3219
3220      add_bb_to_loop (l2_bb, outer_loop);
3221
3222      /* We've added a new loop around the original loop.  Allocate the
3223	 corresponding loop struct.  */
3224      struct loop *new_loop = alloc_loop ();
3225      new_loop->header = l0_bb;
3226      new_loop->latch = l2_bb;
3227      add_loop (new_loop, outer_loop);
3228
3229      /* Allocate a loop structure for the original loop unless we already
3230	 had one.  */
3231      if (!orig_loop_has_loop_struct
3232	  && !gimple_omp_for_combined_p (fd->for_stmt))
3233	{
3234	  struct loop *orig_loop = alloc_loop ();
3235	  orig_loop->header = l1_bb;
3236	  /* The loop may have multiple latches.  */
3237	  add_loop (orig_loop, new_loop);
3238	}
3239    }
3240}
3241
3242/* A subroutine of expand_omp_for.  Generate code for a parallel
3243   loop with static schedule and no specified chunk size.  Given
3244   parameters:
3245
3246	for (V = N1; V cond N2; V += STEP) BODY;
3247
3248   where COND is "<" or ">", we generate pseudocode
3249
3250	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3251	if (cond is <)
3252	  adj = STEP - 1;
3253	else
3254	  adj = STEP + 1;
3255	if ((__typeof (V)) -1 > 0 && cond is >)
3256	  n = -(adj + N2 - N1) / -STEP;
3257	else
3258	  n = (adj + N2 - N1) / STEP;
3259	q = n / nthreads;
3260	tt = n % nthreads;
3261	if (threadid < tt) goto L3; else goto L4;
3262    L3:
3263	tt = 0;
3264	q = q + 1;
3265    L4:
3266	s0 = q * threadid + tt;
3267	e0 = s0 + q;
3268	V = s0 * STEP + N1;
3269	if (s0 >= e0) goto L2; else goto L0;
3270    L0:
3271	e = e0 * STEP + N1;
3272    L1:
3273	BODY;
3274	V += STEP;
3275	if (V cond e) goto L1;
3276    L2:
3277*/
3278
3279static void
3280expand_omp_for_static_nochunk (struct omp_region *region,
3281			       struct omp_for_data *fd,
3282			       gimple *inner_stmt)
3283{
3284  tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3285  tree type, itype, vmain, vback;
3286  basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3287  basic_block body_bb, cont_bb, collapse_bb = NULL;
3288  basic_block fin_bb;
3289  gimple_stmt_iterator gsi;
3290  edge ep;
3291  bool broken_loop = region->cont == NULL;
3292  tree *counts = NULL;
3293  tree n1, n2, step;
3294
3295  itype = type = TREE_TYPE (fd->loop.v);
3296  if (POINTER_TYPE_P (type))
3297    itype = signed_type_for (type);
3298
3299  entry_bb = region->entry;
3300  cont_bb = region->cont;
3301  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3302  fin_bb = BRANCH_EDGE (entry_bb)->dest;
3303  gcc_assert (broken_loop
3304	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3305  seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3306  body_bb = single_succ (seq_start_bb);
3307  if (!broken_loop)
3308    {
3309      gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3310		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3311      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3312    }
3313  exit_bb = region->exit;
3314
3315  /* Iteration space partitioning goes in ENTRY_BB.  */
3316  gsi = gsi_last_nondebug_bb (entry_bb);
3317  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3318
3319  if (fd->collapse > 1)
3320    {
3321      int first_zero_iter = -1, dummy = -1;
3322      basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3323
3324      counts = XALLOCAVEC (tree, fd->collapse);
3325      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3326				  fin_bb, first_zero_iter,
3327				  dummy_bb, dummy, l2_dom_bb);
3328      t = NULL_TREE;
3329    }
3330  else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3331    t = integer_one_node;
3332  else
3333    t = fold_binary (fd->loop.cond_code, boolean_type_node,
3334		     fold_convert (type, fd->loop.n1),
3335		     fold_convert (type, fd->loop.n2));
3336  if (fd->collapse == 1
3337      && TYPE_UNSIGNED (type)
3338      && (t == NULL_TREE || !integer_onep (t)))
3339    {
3340      n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3341      n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3342				     true, GSI_SAME_STMT);
3343      n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3344      n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3345				     true, GSI_SAME_STMT);
3346      gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3347						 NULL_TREE, NULL_TREE);
3348      gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3349      if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3350		     expand_omp_regimplify_p, NULL, NULL)
3351	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3352			expand_omp_regimplify_p, NULL, NULL))
3353	{
3354	  gsi = gsi_for_stmt (cond_stmt);
3355	  gimple_regimplify_operands (cond_stmt, &gsi);
3356	}
3357      ep = split_block (entry_bb, cond_stmt);
3358      ep->flags = EDGE_TRUE_VALUE;
3359      entry_bb = ep->dest;
3360      ep->probability = profile_probability::very_likely ();
3361      ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3362      ep->probability = profile_probability::very_unlikely ();
3363      if (gimple_in_ssa_p (cfun))
3364	{
3365	  int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3366	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3367	       !gsi_end_p (gpi); gsi_next (&gpi))
3368	    {
3369	      gphi *phi = gpi.phi ();
3370	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3371			   ep, UNKNOWN_LOCATION);
3372	    }
3373	}
3374      gsi = gsi_last_bb (entry_bb);
3375    }
3376
3377  switch (gimple_omp_for_kind (fd->for_stmt))
3378    {
3379    case GF_OMP_FOR_KIND_FOR:
3380      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3381      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3382      break;
3383    case GF_OMP_FOR_KIND_DISTRIBUTE:
3384      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3385      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3386      break;
3387    default:
3388      gcc_unreachable ();
3389    }
3390  nthreads = build_call_expr (nthreads, 0);
3391  nthreads = fold_convert (itype, nthreads);
3392  nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3393				       true, GSI_SAME_STMT);
3394  threadid = build_call_expr (threadid, 0);
3395  threadid = fold_convert (itype, threadid);
3396  threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3397				       true, GSI_SAME_STMT);
3398
3399  n1 = fd->loop.n1;
3400  n2 = fd->loop.n2;
3401  step = fd->loop.step;
3402  if (gimple_omp_for_combined_into_p (fd->for_stmt))
3403    {
3404      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3405				     OMP_CLAUSE__LOOPTEMP_);
3406      gcc_assert (innerc);
3407      n1 = OMP_CLAUSE_DECL (innerc);
3408      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3409				OMP_CLAUSE__LOOPTEMP_);
3410      gcc_assert (innerc);
3411      n2 = OMP_CLAUSE_DECL (innerc);
3412    }
3413  n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3414				 true, NULL_TREE, true, GSI_SAME_STMT);
3415  n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3416				 true, NULL_TREE, true, GSI_SAME_STMT);
3417  step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3418				   true, NULL_TREE, true, GSI_SAME_STMT);
3419
3420  t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3421  t = fold_build2 (PLUS_EXPR, itype, step, t);
3422  t = fold_build2 (PLUS_EXPR, itype, t, n2);
3423  t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3424  if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3425    t = fold_build2 (TRUNC_DIV_EXPR, itype,
3426		     fold_build1 (NEGATE_EXPR, itype, t),
3427		     fold_build1 (NEGATE_EXPR, itype, step));
3428  else
3429    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3430  t = fold_convert (itype, t);
3431  n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3432
3433  q = create_tmp_reg (itype, "q");
3434  t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3435  t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3436  gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3437
3438  tt = create_tmp_reg (itype, "tt");
3439  t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3440  t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3441  gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3442
3443  t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3444  gcond *cond_stmt = gimple_build_cond_empty (t);
3445  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3446
3447  second_bb = split_block (entry_bb, cond_stmt)->dest;
3448  gsi = gsi_last_nondebug_bb (second_bb);
3449  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3450
3451  gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3452		     GSI_SAME_STMT);
3453  gassign *assign_stmt
3454    = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3455  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3456
3457  third_bb = split_block (second_bb, assign_stmt)->dest;
3458  gsi = gsi_last_nondebug_bb (third_bb);
3459  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3460
3461  t = build2 (MULT_EXPR, itype, q, threadid);
3462  t = build2 (PLUS_EXPR, itype, t, tt);
3463  s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3464
3465  t = fold_build2 (PLUS_EXPR, itype, s0, q);
3466  e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3467
3468  t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3469  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3470
3471  /* Remove the GIMPLE_OMP_FOR statement.  */
3472  gsi_remove (&gsi, true);
3473
3474  /* Setup code for sequential iteration goes in SEQ_START_BB.  */
3475  gsi = gsi_start_bb (seq_start_bb);
3476
3477  tree startvar = fd->loop.v;
3478  tree endvar = NULL_TREE;
3479
3480  if (gimple_omp_for_combined_p (fd->for_stmt))
3481    {
3482      tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3483		     ? gimple_omp_parallel_clauses (inner_stmt)
3484		     : gimple_omp_for_clauses (inner_stmt);
3485      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3486      gcc_assert (innerc);
3487      startvar = OMP_CLAUSE_DECL (innerc);
3488      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3489				OMP_CLAUSE__LOOPTEMP_);
3490      gcc_assert (innerc);
3491      endvar = OMP_CLAUSE_DECL (innerc);
3492      if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3493	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3494	{
3495	  int i;
3496	  for (i = 1; i < fd->collapse; i++)
3497	    {
3498	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3499					OMP_CLAUSE__LOOPTEMP_);
3500	      gcc_assert (innerc);
3501	    }
3502	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3503				    OMP_CLAUSE__LOOPTEMP_);
3504	  if (innerc)
3505	    {
3506	      /* If needed (distribute parallel for with lastprivate),
3507		 propagate down the total number of iterations.  */
3508	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3509				     fd->loop.n2);
3510	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3511					    GSI_CONTINUE_LINKING);
3512	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3513	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3514	    }
3515	}
3516    }
3517  t = fold_convert (itype, s0);
3518  t = fold_build2 (MULT_EXPR, itype, t, step);
3519  if (POINTER_TYPE_P (type))
3520    t = fold_build_pointer_plus (n1, t);
3521  else
3522    t = fold_build2 (PLUS_EXPR, type, t, n1);
3523  t = fold_convert (TREE_TYPE (startvar), t);
3524  t = force_gimple_operand_gsi (&gsi, t,
3525				DECL_P (startvar)
3526				&& TREE_ADDRESSABLE (startvar),
3527				NULL_TREE, false, GSI_CONTINUE_LINKING);
3528  assign_stmt = gimple_build_assign (startvar, t);
3529  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3530
3531  t = fold_convert (itype, e0);
3532  t = fold_build2 (MULT_EXPR, itype, t, step);
3533  if (POINTER_TYPE_P (type))
3534    t = fold_build_pointer_plus (n1, t);
3535  else
3536    t = fold_build2 (PLUS_EXPR, type, t, n1);
3537  t = fold_convert (TREE_TYPE (startvar), t);
3538  e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3539				false, GSI_CONTINUE_LINKING);
3540  if (endvar)
3541    {
3542      assign_stmt = gimple_build_assign (endvar, e);
3543      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3544      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3545	assign_stmt = gimple_build_assign (fd->loop.v, e);
3546      else
3547	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3548      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3549    }
3550  /* Handle linear clause adjustments.  */
3551  tree itercnt = NULL_TREE;
3552  if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3553    for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3554	 c; c = OMP_CLAUSE_CHAIN (c))
3555      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3556	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3557	{
3558	  tree d = OMP_CLAUSE_DECL (c);
3559	  bool is_ref = omp_is_reference (d);
3560	  tree t = d, a, dest;
3561	  if (is_ref)
3562	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3563	  if (itercnt == NULL_TREE)
3564	    {
3565	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
3566		{
3567		  itercnt = fold_build2 (MINUS_EXPR, itype,
3568					 fold_convert (itype, n1),
3569					 fold_convert (itype, fd->loop.n1));
3570		  itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3571		  itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3572		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3573						      NULL_TREE, false,
3574						      GSI_CONTINUE_LINKING);
3575		}
3576	      else
3577		itercnt = s0;
3578	    }
3579	  tree type = TREE_TYPE (t);
3580	  if (POINTER_TYPE_P (type))
3581	    type = sizetype;
3582	  a = fold_build2 (MULT_EXPR, type,
3583			   fold_convert (type, itercnt),
3584			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3585	  dest = unshare_expr (t);
3586	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3587			   : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3588	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3589					false, GSI_CONTINUE_LINKING);
3590	  assign_stmt = gimple_build_assign (dest, t);
3591	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3592	}
3593  if (fd->collapse > 1)
3594    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3595
3596  if (!broken_loop)
3597    {
3598      /* The code controlling the sequential loop replaces the
3599	 GIMPLE_OMP_CONTINUE.  */
3600      gsi = gsi_last_nondebug_bb (cont_bb);
3601      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3602      gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3603      vmain = gimple_omp_continue_control_use (cont_stmt);
3604      vback = gimple_omp_continue_control_def (cont_stmt);
3605
3606      if (!gimple_omp_for_combined_p (fd->for_stmt))
3607	{
3608	  if (POINTER_TYPE_P (type))
3609	    t = fold_build_pointer_plus (vmain, step);
3610	  else
3611	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
3612	  t = force_gimple_operand_gsi (&gsi, t,
3613					DECL_P (vback)
3614					&& TREE_ADDRESSABLE (vback),
3615					NULL_TREE, true, GSI_SAME_STMT);
3616	  assign_stmt = gimple_build_assign (vback, t);
3617	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3618
3619	  t = build2 (fd->loop.cond_code, boolean_type_node,
3620		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
3621		      ? t : vback, e);
3622	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3623	}
3624
3625      /* Remove the GIMPLE_OMP_CONTINUE statement.  */
3626      gsi_remove (&gsi, true);
3627
3628      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3629	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3630    }
3631
3632  /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
3633  gsi = gsi_last_nondebug_bb (exit_bb);
3634  if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3635    {
3636      t = gimple_omp_return_lhs (gsi_stmt (gsi));
3637      gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3638    }
3639  gsi_remove (&gsi, true);
3640
3641  /* Connect all the blocks.  */
3642  ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3643  ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3644  ep = find_edge (entry_bb, second_bb);
3645  ep->flags = EDGE_TRUE_VALUE;
3646  ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3647  find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3648  find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3649
3650  if (!broken_loop)
3651    {
3652      ep = find_edge (cont_bb, body_bb);
3653      if (ep == NULL)
3654	{
3655	  ep = BRANCH_EDGE (cont_bb);
3656	  gcc_assert (single_succ (ep->dest) == body_bb);
3657	}
3658      if (gimple_omp_for_combined_p (fd->for_stmt))
3659	{
3660	  remove_edge (ep);
3661	  ep = NULL;
3662	}
3663      else if (fd->collapse > 1)
3664	{
3665	  remove_edge (ep);
3666	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3667	}
3668      else
3669	ep->flags = EDGE_TRUE_VALUE;
3670      find_edge (cont_bb, fin_bb)->flags
3671	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3672    }
3673
3674  set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3675  set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3676  set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3677
3678  set_immediate_dominator (CDI_DOMINATORS, body_bb,
3679			   recompute_dominator (CDI_DOMINATORS, body_bb));
3680  set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3681			   recompute_dominator (CDI_DOMINATORS, fin_bb));
3682
3683  struct loop *loop = body_bb->loop_father;
3684  if (loop != entry_bb->loop_father)
3685    {
3686      gcc_assert (broken_loop || loop->header == body_bb);
3687      gcc_assert (broken_loop
3688		  || loop->latch == region->cont
3689		  || single_pred (loop->latch) == region->cont);
3690      return;
3691    }
3692
3693  if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3694    {
3695      loop = alloc_loop ();
3696      loop->header = body_bb;
3697      if (collapse_bb == NULL)
3698	loop->latch = cont_bb;
3699      add_loop (loop, body_bb->loop_father);
3700    }
3701}
3702
3703/* Return phi in E->DEST with ARG on edge E.  */
3704
3705static gphi *
3706find_phi_with_arg_on_edge (tree arg, edge e)
3707{
3708  basic_block bb = e->dest;
3709
3710  for (gphi_iterator gpi = gsi_start_phis (bb);
3711       !gsi_end_p (gpi);
3712       gsi_next (&gpi))
3713    {
3714      gphi *phi = gpi.phi ();
3715      if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3716	return phi;
3717    }
3718
3719  return NULL;
3720}
3721
3722/* A subroutine of expand_omp_for.  Generate code for a parallel
3723   loop with static schedule and a specified chunk size.  Given
3724   parameters:
3725
3726	for (V = N1; V cond N2; V += STEP) BODY;
3727
3728   where COND is "<" or ">", we generate pseudocode
3729
3730	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3731	if (cond is <)
3732	  adj = STEP - 1;
3733	else
3734	  adj = STEP + 1;
3735	if ((__typeof (V)) -1 > 0 && cond is >)
3736	  n = -(adj + N2 - N1) / -STEP;
3737	else
3738	  n = (adj + N2 - N1) / STEP;
3739	trip = 0;
3740	V = threadid * CHUNK * STEP + N1;  -- this extra definition of V is
3741					      here so that V is defined
3742					      if the loop is not entered
3743    L0:
3744	s0 = (trip * nthreads + threadid) * CHUNK;
3745	e0 = min (s0 + CHUNK, n);
3746	if (s0 < n) goto L1; else goto L4;
3747    L1:
3748	V = s0 * STEP + N1;
3749	e = e0 * STEP + N1;
3750    L2:
3751	BODY;
3752	V += STEP;
3753	if (V cond e) goto L2; else goto L3;
3754    L3:
3755	trip += 1;
3756	goto L0;
3757    L4:
3758*/
3759
3760static void
3761expand_omp_for_static_chunk (struct omp_region *region,
3762			     struct omp_for_data *fd, gimple *inner_stmt)
3763{
3764  tree n, s0, e0, e, t;
3765  tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3766  tree type, itype, vmain, vback, vextra;
3767  basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3768  basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3769  gimple_stmt_iterator gsi;
3770  edge se;
3771  bool broken_loop = region->cont == NULL;
3772  tree *counts = NULL;
3773  tree n1, n2, step;
3774
3775  itype = type = TREE_TYPE (fd->loop.v);
3776  if (POINTER_TYPE_P (type))
3777    itype = signed_type_for (type);
3778
3779  entry_bb = region->entry;
3780  se = split_block (entry_bb, last_stmt (entry_bb));
3781  entry_bb = se->src;
3782  iter_part_bb = se->dest;
3783  cont_bb = region->cont;
3784  gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3785  fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3786  gcc_assert (broken_loop
3787	      || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3788  seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3789  body_bb = single_succ (seq_start_bb);
3790  if (!broken_loop)
3791    {
3792      gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3793		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3794      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3795      trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3796    }
3797  exit_bb = region->exit;
3798
3799  /* Trip and adjustment setup goes in ENTRY_BB.  */
3800  gsi = gsi_last_nondebug_bb (entry_bb);
3801  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3802
3803  if (fd->collapse > 1)
3804    {
3805      int first_zero_iter = -1, dummy = -1;
3806      basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3807
3808      counts = XALLOCAVEC (tree, fd->collapse);
3809      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3810				  fin_bb, first_zero_iter,
3811				  dummy_bb, dummy, l2_dom_bb);
3812      t = NULL_TREE;
3813    }
3814  else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3815    t = integer_one_node;
3816  else
3817    t = fold_binary (fd->loop.cond_code, boolean_type_node,
3818		     fold_convert (type, fd->loop.n1),
3819		     fold_convert (type, fd->loop.n2));
3820  if (fd->collapse == 1
3821      && TYPE_UNSIGNED (type)
3822      && (t == NULL_TREE || !integer_onep (t)))
3823    {
3824      n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3825      n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3826				     true, GSI_SAME_STMT);
3827      n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3828      n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3829				     true, GSI_SAME_STMT);
3830      gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3831						 NULL_TREE, NULL_TREE);
3832      gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3833      if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3834		     expand_omp_regimplify_p, NULL, NULL)
3835	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3836			expand_omp_regimplify_p, NULL, NULL))
3837	{
3838	  gsi = gsi_for_stmt (cond_stmt);
3839	  gimple_regimplify_operands (cond_stmt, &gsi);
3840	}
3841      se = split_block (entry_bb, cond_stmt);
3842      se->flags = EDGE_TRUE_VALUE;
3843      entry_bb = se->dest;
3844      se->probability = profile_probability::very_likely ();
3845      se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3846      se->probability = profile_probability::very_unlikely ();
3847      if (gimple_in_ssa_p (cfun))
3848	{
3849	  int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3850	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3851	       !gsi_end_p (gpi); gsi_next (&gpi))
3852	    {
3853	      gphi *phi = gpi.phi ();
3854	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3855			   se, UNKNOWN_LOCATION);
3856	    }
3857	}
3858      gsi = gsi_last_bb (entry_bb);
3859    }
3860
3861  switch (gimple_omp_for_kind (fd->for_stmt))
3862    {
3863    case GF_OMP_FOR_KIND_FOR:
3864      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3865      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3866      break;
3867    case GF_OMP_FOR_KIND_DISTRIBUTE:
3868      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3869      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3870      break;
3871    default:
3872      gcc_unreachable ();
3873    }
3874  nthreads = build_call_expr (nthreads, 0);
3875  nthreads = fold_convert (itype, nthreads);
3876  nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3877				       true, GSI_SAME_STMT);
3878  threadid = build_call_expr (threadid, 0);
3879  threadid = fold_convert (itype, threadid);
3880  threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3881				       true, GSI_SAME_STMT);
3882
3883  n1 = fd->loop.n1;
3884  n2 = fd->loop.n2;
3885  step = fd->loop.step;
3886  if (gimple_omp_for_combined_into_p (fd->for_stmt))
3887    {
3888      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3889				     OMP_CLAUSE__LOOPTEMP_);
3890      gcc_assert (innerc);
3891      n1 = OMP_CLAUSE_DECL (innerc);
3892      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3893				OMP_CLAUSE__LOOPTEMP_);
3894      gcc_assert (innerc);
3895      n2 = OMP_CLAUSE_DECL (innerc);
3896    }
3897  n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3898				 true, NULL_TREE, true, GSI_SAME_STMT);
3899  n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3900				 true, NULL_TREE, true, GSI_SAME_STMT);
3901  step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3902				   true, NULL_TREE, true, GSI_SAME_STMT);
3903  tree chunk_size = fold_convert (itype, fd->chunk_size);
3904  chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3905  chunk_size
3906    = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3907				GSI_SAME_STMT);
3908
3909  t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3910  t = fold_build2 (PLUS_EXPR, itype, step, t);
3911  t = fold_build2 (PLUS_EXPR, itype, t, n2);
3912  t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3913  if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3914    t = fold_build2 (TRUNC_DIV_EXPR, itype,
3915		     fold_build1 (NEGATE_EXPR, itype, t),
3916		     fold_build1 (NEGATE_EXPR, itype, step));
3917  else
3918    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3919  t = fold_convert (itype, t);
3920  n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3921				true, GSI_SAME_STMT);
3922
3923  trip_var = create_tmp_reg (itype, ".trip");
3924  if (gimple_in_ssa_p (cfun))
3925    {
3926      trip_init = make_ssa_name (trip_var);
3927      trip_main = make_ssa_name (trip_var);
3928      trip_back = make_ssa_name (trip_var);
3929    }
3930  else
3931    {
3932      trip_init = trip_var;
3933      trip_main = trip_var;
3934      trip_back = trip_var;
3935    }
3936
3937  gassign *assign_stmt
3938    = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3939  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3940
3941  t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3942  t = fold_build2 (MULT_EXPR, itype, t, step);
3943  if (POINTER_TYPE_P (type))
3944    t = fold_build_pointer_plus (n1, t);
3945  else
3946    t = fold_build2 (PLUS_EXPR, type, t, n1);
3947  vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3948				     true, GSI_SAME_STMT);
3949
3950  /* Remove the GIMPLE_OMP_FOR.  */
3951  gsi_remove (&gsi, true);
3952
3953  gimple_stmt_iterator gsif = gsi;
3954
3955  /* Iteration space partitioning goes in ITER_PART_BB.  */
3956  gsi = gsi_last_bb (iter_part_bb);
3957
3958  t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3959  t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3960  t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3961  s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3962				 false, GSI_CONTINUE_LINKING);
3963
3964  t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3965  t = fold_build2 (MIN_EXPR, itype, t, n);
3966  e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3967				 false, GSI_CONTINUE_LINKING);
3968
3969  t = build2 (LT_EXPR, boolean_type_node, s0, n);
3970  gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3971
3972  /* Setup code for sequential iteration goes in SEQ_START_BB.  */
3973  gsi = gsi_start_bb (seq_start_bb);
3974
3975  tree startvar = fd->loop.v;
3976  tree endvar = NULL_TREE;
3977
3978  if (gimple_omp_for_combined_p (fd->for_stmt))
3979    {
3980      tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3981		     ? gimple_omp_parallel_clauses (inner_stmt)
3982		     : gimple_omp_for_clauses (inner_stmt);
3983      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3984      gcc_assert (innerc);
3985      startvar = OMP_CLAUSE_DECL (innerc);
3986      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3987				OMP_CLAUSE__LOOPTEMP_);
3988      gcc_assert (innerc);
3989      endvar = OMP_CLAUSE_DECL (innerc);
3990      if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3991	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3992	{
3993	  int i;
3994	  for (i = 1; i < fd->collapse; i++)
3995	    {
3996	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3997					OMP_CLAUSE__LOOPTEMP_);
3998	      gcc_assert (innerc);
3999	    }
4000	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4001				    OMP_CLAUSE__LOOPTEMP_);
4002	  if (innerc)
4003	    {
4004	      /* If needed (distribute parallel for with lastprivate),
4005		 propagate down the total number of iterations.  */
4006	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4007				     fd->loop.n2);
4008	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4009					    GSI_CONTINUE_LINKING);
4010	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4011	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4012	    }
4013	}
4014    }
4015
4016  t = fold_convert (itype, s0);
4017  t = fold_build2 (MULT_EXPR, itype, t, step);
4018  if (POINTER_TYPE_P (type))
4019    t = fold_build_pointer_plus (n1, t);
4020  else
4021    t = fold_build2 (PLUS_EXPR, type, t, n1);
4022  t = fold_convert (TREE_TYPE (startvar), t);
4023  t = force_gimple_operand_gsi (&gsi, t,
4024				DECL_P (startvar)
4025				&& TREE_ADDRESSABLE (startvar),
4026				NULL_TREE, false, GSI_CONTINUE_LINKING);
4027  assign_stmt = gimple_build_assign (startvar, t);
4028  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4029
4030  t = fold_convert (itype, e0);
4031  t = fold_build2 (MULT_EXPR, itype, t, step);
4032  if (POINTER_TYPE_P (type))
4033    t = fold_build_pointer_plus (n1, t);
4034  else
4035    t = fold_build2 (PLUS_EXPR, type, t, n1);
4036  t = fold_convert (TREE_TYPE (startvar), t);
4037  e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4038				false, GSI_CONTINUE_LINKING);
4039  if (endvar)
4040    {
4041      assign_stmt = gimple_build_assign (endvar, e);
4042      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4043      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4044	assign_stmt = gimple_build_assign (fd->loop.v, e);
4045      else
4046	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4047      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4048    }
4049  /* Handle linear clause adjustments.  */
4050  tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4051  if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4052    for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4053	 c; c = OMP_CLAUSE_CHAIN (c))
4054      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4055	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4056	{
4057	  tree d = OMP_CLAUSE_DECL (c);
4058	  bool is_ref = omp_is_reference (d);
4059	  tree t = d, a, dest;
4060	  if (is_ref)
4061	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4062	  tree type = TREE_TYPE (t);
4063	  if (POINTER_TYPE_P (type))
4064	    type = sizetype;
4065	  dest = unshare_expr (t);
4066	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
4067	  expand_omp_build_assign (&gsif, v, t);
4068	  if (itercnt == NULL_TREE)
4069	    {
4070	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
4071		{
4072		  itercntbias
4073		    = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4074				   fold_convert (itype, fd->loop.n1));
4075		  itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4076					     itercntbias, step);
4077		  itercntbias
4078		    = force_gimple_operand_gsi (&gsif, itercntbias, true,
4079						NULL_TREE, true,
4080						GSI_SAME_STMT);
4081		  itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4082		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4083						      NULL_TREE, false,
4084						      GSI_CONTINUE_LINKING);
4085		}
4086	      else
4087		itercnt = s0;
4088	    }
4089	  a = fold_build2 (MULT_EXPR, type,
4090			   fold_convert (type, itercnt),
4091			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4092	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4093			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4094	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4095					false, GSI_CONTINUE_LINKING);
4096	  assign_stmt = gimple_build_assign (dest, t);
4097	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4098	}
4099  if (fd->collapse > 1)
4100    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4101
4102  if (!broken_loop)
4103    {
4104      /* The code controlling the sequential loop goes in CONT_BB,
4105	 replacing the GIMPLE_OMP_CONTINUE.  */
4106      gsi = gsi_last_nondebug_bb (cont_bb);
4107      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4108      vmain = gimple_omp_continue_control_use (cont_stmt);
4109      vback = gimple_omp_continue_control_def (cont_stmt);
4110
4111      if (!gimple_omp_for_combined_p (fd->for_stmt))
4112	{
4113	  if (POINTER_TYPE_P (type))
4114	    t = fold_build_pointer_plus (vmain, step);
4115	  else
4116	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
4117	  if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4118	    t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4119					  true, GSI_SAME_STMT);
4120	  assign_stmt = gimple_build_assign (vback, t);
4121	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4122
4123	  if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4124	    t = build2 (EQ_EXPR, boolean_type_node,
4125			build_int_cst (itype, 0),
4126			build_int_cst (itype, 1));
4127	  else
4128	    t = build2 (fd->loop.cond_code, boolean_type_node,
4129			DECL_P (vback) && TREE_ADDRESSABLE (vback)
4130			? t : vback, e);
4131	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4132	}
4133
4134      /* Remove GIMPLE_OMP_CONTINUE.  */
4135      gsi_remove (&gsi, true);
4136
4137      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4138	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4139
4140      /* Trip update code goes into TRIP_UPDATE_BB.  */
4141      gsi = gsi_start_bb (trip_update_bb);
4142
4143      t = build_int_cst (itype, 1);
4144      t = build2 (PLUS_EXPR, itype, trip_main, t);
4145      assign_stmt = gimple_build_assign (trip_back, t);
4146      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4147    }
4148
4149  /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
4150  gsi = gsi_last_nondebug_bb (exit_bb);
4151  if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4152    {
4153      t = gimple_omp_return_lhs (gsi_stmt (gsi));
4154      gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4155    }
4156  gsi_remove (&gsi, true);
4157
4158  /* Connect the new blocks.  */
4159  find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4160  find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4161
4162  if (!broken_loop)
4163    {
4164      se = find_edge (cont_bb, body_bb);
4165      if (se == NULL)
4166	{
4167	  se = BRANCH_EDGE (cont_bb);
4168	  gcc_assert (single_succ (se->dest) == body_bb);
4169	}
4170      if (gimple_omp_for_combined_p (fd->for_stmt))
4171	{
4172	  remove_edge (se);
4173	  se = NULL;
4174	}
4175      else if (fd->collapse > 1)
4176	{
4177	  remove_edge (se);
4178	  se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4179	}
4180      else
4181	se->flags = EDGE_TRUE_VALUE;
4182      find_edge (cont_bb, trip_update_bb)->flags
4183	= se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4184
4185      redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4186				iter_part_bb);
4187    }
4188
4189  if (gimple_in_ssa_p (cfun))
4190    {
4191      gphi_iterator psi;
4192      gphi *phi;
4193      edge re, ene;
4194      edge_var_map *vm;
4195      size_t i;
4196
4197      gcc_assert (fd->collapse == 1 && !broken_loop);
4198
4199      /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4200	 remove arguments of the phi nodes in fin_bb.  We need to create
4201	 appropriate phi nodes in iter_part_bb instead.  */
4202      se = find_edge (iter_part_bb, fin_bb);
4203      re = single_succ_edge (trip_update_bb);
4204      vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4205      ene = single_succ_edge (entry_bb);
4206
4207      psi = gsi_start_phis (fin_bb);
4208      for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4209	   gsi_next (&psi), ++i)
4210	{
4211	  gphi *nphi;
4212	  source_location locus;
4213
4214	  phi = psi.phi ();
4215	  if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4216			       redirect_edge_var_map_def (vm), 0))
4217	    continue;
4218
4219	  t = gimple_phi_result (phi);
4220	  gcc_assert (t == redirect_edge_var_map_result (vm));
4221
4222	  if (!single_pred_p (fin_bb))
4223	    t = copy_ssa_name (t, phi);
4224
4225	  nphi = create_phi_node (t, iter_part_bb);
4226
4227	  t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4228	  locus = gimple_phi_arg_location_from_edge (phi, se);
4229
4230	  /* A special case -- fd->loop.v is not yet computed in
4231	     iter_part_bb, we need to use vextra instead.  */
4232	  if (t == fd->loop.v)
4233	    t = vextra;
4234	  add_phi_arg (nphi, t, ene, locus);
4235	  locus = redirect_edge_var_map_location (vm);
4236	  tree back_arg = redirect_edge_var_map_def (vm);
4237	  add_phi_arg (nphi, back_arg, re, locus);
4238	  edge ce = find_edge (cont_bb, body_bb);
4239	  if (ce == NULL)
4240	    {
4241	      ce = BRANCH_EDGE (cont_bb);
4242	      gcc_assert (single_succ (ce->dest) == body_bb);
4243	      ce = single_succ_edge (ce->dest);
4244	    }
4245	  gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4246	  gcc_assert (inner_loop_phi != NULL);
4247	  add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4248		       find_edge (seq_start_bb, body_bb), locus);
4249
4250	  if (!single_pred_p (fin_bb))
4251	    add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4252	}
4253      gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4254      redirect_edge_var_map_clear (re);
4255      if (single_pred_p (fin_bb))
4256	while (1)
4257	  {
4258	    psi = gsi_start_phis (fin_bb);
4259	    if (gsi_end_p (psi))
4260	      break;
4261	    remove_phi_node (&psi, false);
4262	  }
4263
4264      /* Make phi node for trip.  */
4265      phi = create_phi_node (trip_main, iter_part_bb);
4266      add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4267		   UNKNOWN_LOCATION);
4268      add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4269		   UNKNOWN_LOCATION);
4270    }
4271
4272  if (!broken_loop)
4273    set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4274  set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4275			   recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4276  set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4277			   recompute_dominator (CDI_DOMINATORS, fin_bb));
4278  set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4279			   recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4280  set_immediate_dominator (CDI_DOMINATORS, body_bb,
4281			   recompute_dominator (CDI_DOMINATORS, body_bb));
4282
4283  if (!broken_loop)
4284    {
4285      struct loop *loop = body_bb->loop_father;
4286      struct loop *trip_loop = alloc_loop ();
4287      trip_loop->header = iter_part_bb;
4288      trip_loop->latch = trip_update_bb;
4289      add_loop (trip_loop, iter_part_bb->loop_father);
4290
4291      if (loop != entry_bb->loop_father)
4292	{
4293	  gcc_assert (loop->header == body_bb);
4294	  gcc_assert (loop->latch == region->cont
4295		      || single_pred (loop->latch) == region->cont);
4296	  trip_loop->inner = loop;
4297	  return;
4298	}
4299
4300      if (!gimple_omp_for_combined_p (fd->for_stmt))
4301	{
4302	  loop = alloc_loop ();
4303	  loop->header = body_bb;
4304	  if (collapse_bb == NULL)
4305	    loop->latch = cont_bb;
4306	  add_loop (loop, trip_loop);
4307	}
4308    }
4309}
4310
4311/* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
4312   loop.  Given parameters:
4313
4314	for (V = N1; V cond N2; V += STEP) BODY;
4315
4316   where COND is "<" or ">", we generate pseudocode
4317
4318	V = N1;
4319	goto L1;
4320    L0:
4321	BODY;
4322	V += STEP;
4323    L1:
4324	if (V cond N2) goto L0; else goto L2;
4325    L2:
4326
4327    For collapsed loops, given parameters:
4328      collapse(3)
4329      for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4330	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4331	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4332	    BODY;
4333
4334    we generate pseudocode
4335
4336	if (cond3 is <)
4337	  adj = STEP3 - 1;
4338	else
4339	  adj = STEP3 + 1;
4340	count3 = (adj + N32 - N31) / STEP3;
4341	if (cond2 is <)
4342	  adj = STEP2 - 1;
4343	else
4344	  adj = STEP2 + 1;
4345	count2 = (adj + N22 - N21) / STEP2;
4346	if (cond1 is <)
4347	  adj = STEP1 - 1;
4348	else
4349	  adj = STEP1 + 1;
4350	count1 = (adj + N12 - N11) / STEP1;
4351	count = count1 * count2 * count3;
4352	V = 0;
4353	V1 = N11;
4354	V2 = N21;
4355	V3 = N31;
4356	goto L1;
4357    L0:
4358	BODY;
4359	V += 1;
4360	V3 += STEP3;
4361	V2 += (V3 cond3 N32) ? 0 : STEP2;
4362	V3 = (V3 cond3 N32) ? V3 : N31;
4363	V1 += (V2 cond2 N22) ? 0 : STEP1;
4364	V2 = (V2 cond2 N22) ? V2 : N21;
4365    L1:
4366	if (V < count) goto L0; else goto L2;
4367    L2:
4368
4369      */
4370
4371static void
4372expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4373{
4374  tree type, t;
4375  basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4376  gimple_stmt_iterator gsi;
4377  gimple *stmt;
4378  gcond *cond_stmt;
4379  bool broken_loop = region->cont == NULL;
4380  edge e, ne;
4381  tree *counts = NULL;
4382  int i;
4383  int safelen_int = INT_MAX;
4384  tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4385				  OMP_CLAUSE_SAFELEN);
4386  tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4387				  OMP_CLAUSE__SIMDUID_);
4388  tree n1, n2;
4389
4390  if (safelen)
4391    {
4392      poly_uint64 val;
4393      safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4394      if (!poly_int_tree_p (safelen, &val))
4395	safelen_int = 0;
4396      else
4397	safelen_int = MIN (constant_lower_bound (val), INT_MAX);
4398      if (safelen_int == 1)
4399	safelen_int = 0;
4400    }
4401  type = TREE_TYPE (fd->loop.v);
4402  entry_bb = region->entry;
4403  cont_bb = region->cont;
4404  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4405  gcc_assert (broken_loop
4406	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4407  l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4408  if (!broken_loop)
4409    {
4410      gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4411      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4412      l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4413      l2_bb = BRANCH_EDGE (entry_bb)->dest;
4414    }
4415  else
4416    {
4417      BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4418      l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4419      l2_bb = single_succ (l1_bb);
4420    }
4421  exit_bb = region->exit;
4422  l2_dom_bb = NULL;
4423
4424  gsi = gsi_last_nondebug_bb (entry_bb);
4425
4426  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4427  /* Not needed in SSA form right now.  */
4428  gcc_assert (!gimple_in_ssa_p (cfun));
4429  if (fd->collapse > 1)
4430    {
4431      int first_zero_iter = -1, dummy = -1;
4432      basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4433
4434      counts = XALLOCAVEC (tree, fd->collapse);
4435      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4436				  zero_iter_bb, first_zero_iter,
4437				  dummy_bb, dummy, l2_dom_bb);
4438    }
4439  if (l2_dom_bb == NULL)
4440    l2_dom_bb = l1_bb;
4441
4442  n1 = fd->loop.n1;
4443  n2 = fd->loop.n2;
4444  if (gimple_omp_for_combined_into_p (fd->for_stmt))
4445    {
4446      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4447				     OMP_CLAUSE__LOOPTEMP_);
4448      gcc_assert (innerc);
4449      n1 = OMP_CLAUSE_DECL (innerc);
4450      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4451				OMP_CLAUSE__LOOPTEMP_);
4452      gcc_assert (innerc);
4453      n2 = OMP_CLAUSE_DECL (innerc);
4454    }
4455  tree step = fd->loop.step;
4456
4457  bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4458				  OMP_CLAUSE__SIMT_);
4459  if (is_simt)
4460    {
4461      cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4462      is_simt = safelen_int > 1;
4463    }
4464  tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4465  if (is_simt)
4466    {
4467      simt_lane = create_tmp_var (unsigned_type_node);
4468      gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4469      gimple_call_set_lhs (g, simt_lane);
4470      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4471      tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4472				 fold_convert (TREE_TYPE (step), simt_lane));
4473      n1 = fold_convert (type, n1);
4474      if (POINTER_TYPE_P (type))
4475	n1 = fold_build_pointer_plus (n1, offset);
4476      else
4477	n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4478
4479      /* Collapsed loops not handled for SIMT yet: limit to one lane only.  */
4480      if (fd->collapse > 1)
4481	simt_maxlane = build_one_cst (unsigned_type_node);
4482      else if (safelen_int < omp_max_simt_vf ())
4483	simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4484      tree vf
4485	= build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4486					unsigned_type_node, 0);
4487      if (simt_maxlane)
4488	vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4489      vf = fold_convert (TREE_TYPE (step), vf);
4490      step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4491    }
4492
4493  expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4494  if (fd->collapse > 1)
4495    {
4496      if (gimple_omp_for_combined_into_p (fd->for_stmt))
4497	{
4498	  gsi_prev (&gsi);
4499	  expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4500	  gsi_next (&gsi);
4501	}
4502      else
4503	for (i = 0; i < fd->collapse; i++)
4504	  {
4505	    tree itype = TREE_TYPE (fd->loops[i].v);
4506	    if (POINTER_TYPE_P (itype))
4507	      itype = signed_type_for (itype);
4508	    t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4509	    expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4510	  }
4511    }
4512
4513  /* Remove the GIMPLE_OMP_FOR statement.  */
4514  gsi_remove (&gsi, true);
4515
4516  if (!broken_loop)
4517    {
4518      /* Code to control the increment goes in the CONT_BB.  */
4519      gsi = gsi_last_nondebug_bb (cont_bb);
4520      stmt = gsi_stmt (gsi);
4521      gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4522
4523      if (POINTER_TYPE_P (type))
4524	t = fold_build_pointer_plus (fd->loop.v, step);
4525      else
4526	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4527      expand_omp_build_assign (&gsi, fd->loop.v, t);
4528
4529      if (fd->collapse > 1)
4530	{
4531	  i = fd->collapse - 1;
4532	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4533	    {
4534	      t = fold_convert (sizetype, fd->loops[i].step);
4535	      t = fold_build_pointer_plus (fd->loops[i].v, t);
4536	    }
4537	  else
4538	    {
4539	      t = fold_convert (TREE_TYPE (fd->loops[i].v),
4540				fd->loops[i].step);
4541	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4542			       fd->loops[i].v, t);
4543	    }
4544	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4545
4546	  for (i = fd->collapse - 1; i > 0; i--)
4547	    {
4548	      tree itype = TREE_TYPE (fd->loops[i].v);
4549	      tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4550	      if (POINTER_TYPE_P (itype2))
4551		itype2 = signed_type_for (itype2);
4552	      t = fold_convert (itype2, fd->loops[i - 1].step);
4553	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4554					    GSI_SAME_STMT);
4555	      t = build3 (COND_EXPR, itype2,
4556			  build2 (fd->loops[i].cond_code, boolean_type_node,
4557				  fd->loops[i].v,
4558				  fold_convert (itype, fd->loops[i].n2)),
4559			  build_int_cst (itype2, 0), t);
4560	      if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4561		t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4562	      else
4563		t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4564	      expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4565
4566	      t = fold_convert (itype, fd->loops[i].n1);
4567	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4568					    GSI_SAME_STMT);
4569	      t = build3 (COND_EXPR, itype,
4570			  build2 (fd->loops[i].cond_code, boolean_type_node,
4571				  fd->loops[i].v,
4572				  fold_convert (itype, fd->loops[i].n2)),
4573			  fd->loops[i].v, t);
4574	      expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4575	    }
4576	}
4577
4578      /* Remove GIMPLE_OMP_CONTINUE.  */
4579      gsi_remove (&gsi, true);
4580    }
4581
4582  /* Emit the condition in L1_BB.  */
4583  gsi = gsi_start_bb (l1_bb);
4584
4585  t = fold_convert (type, n2);
4586  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4587				false, GSI_CONTINUE_LINKING);
4588  tree v = fd->loop.v;
4589  if (DECL_P (v) && TREE_ADDRESSABLE (v))
4590    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4591				  false, GSI_CONTINUE_LINKING);
4592  t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4593  cond_stmt = gimple_build_cond_empty (t);
4594  gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4595  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4596		 NULL, NULL)
4597      || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4598		    NULL, NULL))
4599    {
4600      gsi = gsi_for_stmt (cond_stmt);
4601      gimple_regimplify_operands (cond_stmt, &gsi);
4602    }
4603
4604  /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop.  */
4605  if (is_simt)
4606    {
4607      gsi = gsi_start_bb (l2_bb);
4608      step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4609      if (POINTER_TYPE_P (type))
4610	t = fold_build_pointer_plus (fd->loop.v, step);
4611      else
4612	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4613      expand_omp_build_assign (&gsi, fd->loop.v, t);
4614    }
4615
4616  /* Remove GIMPLE_OMP_RETURN.  */
4617  gsi = gsi_last_nondebug_bb (exit_bb);
4618  gsi_remove (&gsi, true);
4619
4620  /* Connect the new blocks.  */
4621  remove_edge (FALLTHRU_EDGE (entry_bb));
4622
4623  if (!broken_loop)
4624    {
4625      remove_edge (BRANCH_EDGE (entry_bb));
4626      make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4627
4628      e = BRANCH_EDGE (l1_bb);
4629      ne = FALLTHRU_EDGE (l1_bb);
4630      e->flags = EDGE_TRUE_VALUE;
4631    }
4632  else
4633    {
4634      single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4635
4636      ne = single_succ_edge (l1_bb);
4637      e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4638
4639    }
4640  ne->flags = EDGE_FALSE_VALUE;
4641  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4642  ne->probability = e->probability.invert ();
4643
4644  set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4645  set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4646
4647  if (simt_maxlane)
4648    {
4649      cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4650				     NULL_TREE, NULL_TREE);
4651      gsi = gsi_last_bb (entry_bb);
4652      gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4653      make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4654      FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4655      FALLTHRU_EDGE (entry_bb)->probability
4656	 = profile_probability::guessed_always ().apply_scale (7, 8);
4657      BRANCH_EDGE (entry_bb)->probability
4658	 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4659      l2_dom_bb = entry_bb;
4660    }
4661  set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4662
4663  if (!broken_loop)
4664    {
4665      struct loop *loop = alloc_loop ();
4666      loop->header = l1_bb;
4667      loop->latch = cont_bb;
4668      add_loop (loop, l1_bb->loop_father);
4669      loop->safelen = safelen_int;
4670      if (simduid)
4671	{
4672	  loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4673	  cfun->has_simduid_loops = true;
4674	}
4675      /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4676	 the loop.  */
4677      if ((flag_tree_loop_vectorize
4678	   || !global_options_set.x_flag_tree_loop_vectorize)
4679	  && flag_tree_loop_optimize
4680	  && loop->safelen > 1)
4681	{
4682	  loop->force_vectorize = true;
4683	  cfun->has_force_vectorize_loops = true;
4684	}
4685    }
4686  else if (simduid)
4687    cfun->has_simduid_loops = true;
4688}
4689
4690/* Taskloop construct is represented after gimplification with
4691   two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4692   in between them.  This routine expands the outer GIMPLE_OMP_FOR,
4693   which should just compute all the needed loop temporaries
4694   for GIMPLE_OMP_TASK.  */
4695
4696static void
4697expand_omp_taskloop_for_outer (struct omp_region *region,
4698			       struct omp_for_data *fd,
4699			       gimple *inner_stmt)
4700{
4701  tree type, bias = NULL_TREE;
4702  basic_block entry_bb, cont_bb, exit_bb;
4703  gimple_stmt_iterator gsi;
4704  gassign *assign_stmt;
4705  tree *counts = NULL;
4706  int i;
4707
4708  gcc_assert (inner_stmt);
4709  gcc_assert (region->cont);
4710  gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4711	      && gimple_omp_task_taskloop_p (inner_stmt));
4712  type = TREE_TYPE (fd->loop.v);
4713
4714  /* See if we need to bias by LLONG_MIN.  */
4715  if (fd->iter_type == long_long_unsigned_type_node
4716      && TREE_CODE (type) == INTEGER_TYPE
4717      && !TYPE_UNSIGNED (type))
4718    {
4719      tree n1, n2;
4720
4721      if (fd->loop.cond_code == LT_EXPR)
4722	{
4723	  n1 = fd->loop.n1;
4724	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4725	}
4726      else
4727	{
4728	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4729	  n2 = fd->loop.n1;
4730	}
4731      if (TREE_CODE (n1) != INTEGER_CST
4732	  || TREE_CODE (n2) != INTEGER_CST
4733	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4734	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4735    }
4736
4737  entry_bb = region->entry;
4738  cont_bb = region->cont;
4739  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4740  gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4741  exit_bb = region->exit;
4742
4743  gsi = gsi_last_nondebug_bb (entry_bb);
4744  gimple *for_stmt = gsi_stmt (gsi);
4745  gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4746  if (fd->collapse > 1)
4747    {
4748      int first_zero_iter = -1, dummy = -1;
4749      basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4750
4751      counts = XALLOCAVEC (tree, fd->collapse);
4752      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4753				  zero_iter_bb, first_zero_iter,
4754				  dummy_bb, dummy, l2_dom_bb);
4755
4756      if (zero_iter_bb)
4757	{
4758	  /* Some counts[i] vars might be uninitialized if
4759	     some loop has zero iterations.  But the body shouldn't
4760	     be executed in that case, so just avoid uninit warnings.  */
4761	  for (i = first_zero_iter; i < fd->collapse; i++)
4762	    if (SSA_VAR_P (counts[i]))
4763	      TREE_NO_WARNING (counts[i]) = 1;
4764	  gsi_prev (&gsi);
4765	  edge e = split_block (entry_bb, gsi_stmt (gsi));
4766	  entry_bb = e->dest;
4767	  make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4768	  gsi = gsi_last_bb (entry_bb);
4769	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4770				   get_immediate_dominator (CDI_DOMINATORS,
4771							    zero_iter_bb));
4772	}
4773    }
4774
4775  tree t0, t1;
4776  t1 = fd->loop.n2;
4777  t0 = fd->loop.n1;
4778  if (POINTER_TYPE_P (TREE_TYPE (t0))
4779      && TYPE_PRECISION (TREE_TYPE (t0))
4780	 != TYPE_PRECISION (fd->iter_type))
4781    {
4782      /* Avoid casting pointers to integer of a different size.  */
4783      tree itype = signed_type_for (type);
4784      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4785      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4786    }
4787  else
4788    {
4789      t1 = fold_convert (fd->iter_type, t1);
4790      t0 = fold_convert (fd->iter_type, t0);
4791    }
4792  if (bias)
4793    {
4794      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4795      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4796    }
4797
4798  tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4799				 OMP_CLAUSE__LOOPTEMP_);
4800  gcc_assert (innerc);
4801  tree startvar = OMP_CLAUSE_DECL (innerc);
4802  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4803  gcc_assert (innerc);
4804  tree endvar = OMP_CLAUSE_DECL (innerc);
4805  if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4806    {
4807      gcc_assert (innerc);
4808      for (i = 1; i < fd->collapse; i++)
4809	{
4810	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4811				    OMP_CLAUSE__LOOPTEMP_);
4812	  gcc_assert (innerc);
4813	}
4814      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4815				OMP_CLAUSE__LOOPTEMP_);
4816      if (innerc)
4817	{
4818	  /* If needed (inner taskloop has lastprivate clause), propagate
4819	     down the total number of iterations.  */
4820	  tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4821					     NULL_TREE, false,
4822					     GSI_CONTINUE_LINKING);
4823	  assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4824	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4825	}
4826    }
4827
4828  t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
4829				 GSI_CONTINUE_LINKING);
4830  assign_stmt = gimple_build_assign (startvar, t0);
4831  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4832
4833  t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
4834				 GSI_CONTINUE_LINKING);
4835  assign_stmt = gimple_build_assign (endvar, t1);
4836  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4837  if (fd->collapse > 1)
4838    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4839
4840  /* Remove the GIMPLE_OMP_FOR statement.  */
4841  gsi = gsi_for_stmt (for_stmt);
4842  gsi_remove (&gsi, true);
4843
4844  gsi = gsi_last_nondebug_bb (cont_bb);
4845  gsi_remove (&gsi, true);
4846
4847  gsi = gsi_last_nondebug_bb (exit_bb);
4848  gsi_remove (&gsi, true);
4849
4850  FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
4851  remove_edge (BRANCH_EDGE (entry_bb));
4852  FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
4853  remove_edge (BRANCH_EDGE (cont_bb));
4854  set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
4855  set_immediate_dominator (CDI_DOMINATORS, region->entry,
4856			   recompute_dominator (CDI_DOMINATORS, region->entry));
4857}
4858
4859/* Taskloop construct is represented after gimplification with
4860   two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4861   in between them.  This routine expands the inner GIMPLE_OMP_FOR.
4862   GOMP_taskloop{,_ull} function arranges for each task to be given just
4863   a single range of iterations.  */
4864
4865static void
4866expand_omp_taskloop_for_inner (struct omp_region *region,
4867			       struct omp_for_data *fd,
4868			       gimple *inner_stmt)
4869{
4870  tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
4871  basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
4872  basic_block fin_bb;
4873  gimple_stmt_iterator gsi;
4874  edge ep;
4875  bool broken_loop = region->cont == NULL;
4876  tree *counts = NULL;
4877  tree n1, n2, step;
4878
4879  itype = type = TREE_TYPE (fd->loop.v);
4880  if (POINTER_TYPE_P (type))
4881    itype = signed_type_for (type);
4882
4883  /* See if we need to bias by LLONG_MIN.  */
4884  if (fd->iter_type == long_long_unsigned_type_node
4885      && TREE_CODE (type) == INTEGER_TYPE
4886      && !TYPE_UNSIGNED (type))
4887    {
4888      tree n1, n2;
4889
4890      if (fd->loop.cond_code == LT_EXPR)
4891	{
4892	  n1 = fd->loop.n1;
4893	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4894	}
4895      else
4896	{
4897	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4898	  n2 = fd->loop.n1;
4899	}
4900      if (TREE_CODE (n1) != INTEGER_CST
4901	  || TREE_CODE (n2) != INTEGER_CST
4902	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4903	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4904    }
4905
4906  entry_bb = region->entry;
4907  cont_bb = region->cont;
4908  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4909  fin_bb = BRANCH_EDGE (entry_bb)->dest;
4910  gcc_assert (broken_loop
4911	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4912  body_bb = FALLTHRU_EDGE (entry_bb)->dest;
4913  if (!broken_loop)
4914    {
4915      gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
4916      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4917    }
4918  exit_bb = region->exit;
4919
4920  /* Iteration space partitioning goes in ENTRY_BB.  */
4921  gsi = gsi_last_nondebug_bb (entry_bb);
4922  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4923
4924  if (fd->collapse > 1)
4925    {
4926      int first_zero_iter = -1, dummy = -1;
4927      basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4928
4929      counts = XALLOCAVEC (tree, fd->collapse);
4930      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4931				  fin_bb, first_zero_iter,
4932				  dummy_bb, dummy, l2_dom_bb);
4933      t = NULL_TREE;
4934    }
4935  else
4936    t = integer_one_node;
4937
4938  step = fd->loop.step;
4939  tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4940				 OMP_CLAUSE__LOOPTEMP_);
4941  gcc_assert (innerc);
4942  n1 = OMP_CLAUSE_DECL (innerc);
4943  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4944  gcc_assert (innerc);
4945  n2 = OMP_CLAUSE_DECL (innerc);
4946  if (bias)
4947    {
4948      n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
4949      n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
4950    }
4951  n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4952				 true, NULL_TREE, true, GSI_SAME_STMT);
4953  n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4954				 true, NULL_TREE, true, GSI_SAME_STMT);
4955  step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4956				   true, NULL_TREE, true, GSI_SAME_STMT);
4957
4958  tree startvar = fd->loop.v;
4959  tree endvar = NULL_TREE;
4960
4961  if (gimple_omp_for_combined_p (fd->for_stmt))
4962    {
4963      tree clauses = gimple_omp_for_clauses (inner_stmt);
4964      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4965      gcc_assert (innerc);
4966      startvar = OMP_CLAUSE_DECL (innerc);
4967      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4968				OMP_CLAUSE__LOOPTEMP_);
4969      gcc_assert (innerc);
4970      endvar = OMP_CLAUSE_DECL (innerc);
4971    }
4972  t = fold_convert (TREE_TYPE (startvar), n1);
4973  t = force_gimple_operand_gsi (&gsi, t,
4974				DECL_P (startvar)
4975				&& TREE_ADDRESSABLE (startvar),
4976				NULL_TREE, false, GSI_CONTINUE_LINKING);
4977  gimple *assign_stmt = gimple_build_assign (startvar, t);
4978  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4979
4980  t = fold_convert (TREE_TYPE (startvar), n2);
4981  e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4982				false, GSI_CONTINUE_LINKING);
4983  if (endvar)
4984    {
4985      assign_stmt = gimple_build_assign (endvar, e);
4986      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4987      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4988	assign_stmt = gimple_build_assign (fd->loop.v, e);
4989      else
4990	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4991      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4992    }
4993  if (fd->collapse > 1)
4994    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4995
4996  if (!broken_loop)
4997    {
4998      /* The code controlling the sequential loop replaces the
4999	 GIMPLE_OMP_CONTINUE.  */
5000      gsi = gsi_last_nondebug_bb (cont_bb);
5001      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5002      gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5003      vmain = gimple_omp_continue_control_use (cont_stmt);
5004      vback = gimple_omp_continue_control_def (cont_stmt);
5005
5006      if (!gimple_omp_for_combined_p (fd->for_stmt))
5007	{
5008	  if (POINTER_TYPE_P (type))
5009	    t = fold_build_pointer_plus (vmain, step);
5010	  else
5011	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
5012	  t = force_gimple_operand_gsi (&gsi, t,
5013					DECL_P (vback)
5014					&& TREE_ADDRESSABLE (vback),
5015					NULL_TREE, true, GSI_SAME_STMT);
5016	  assign_stmt = gimple_build_assign (vback, t);
5017	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5018
5019	  t = build2 (fd->loop.cond_code, boolean_type_node,
5020		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
5021		      ? t : vback, e);
5022	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5023	}
5024
5025      /* Remove the GIMPLE_OMP_CONTINUE statement.  */
5026      gsi_remove (&gsi, true);
5027
5028      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5029	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5030    }
5031
5032  /* Remove the GIMPLE_OMP_FOR statement.  */
5033  gsi = gsi_for_stmt (fd->for_stmt);
5034  gsi_remove (&gsi, true);
5035
5036  /* Remove the GIMPLE_OMP_RETURN statement.  */
5037  gsi = gsi_last_nondebug_bb (exit_bb);
5038  gsi_remove (&gsi, true);
5039
5040  FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5041  if (!broken_loop)
5042    remove_edge (BRANCH_EDGE (entry_bb));
5043  else
5044    {
5045      remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5046      region->outer->cont = NULL;
5047    }
5048
5049  /* Connect all the blocks.  */
5050  if (!broken_loop)
5051    {
5052      ep = find_edge (cont_bb, body_bb);
5053      if (gimple_omp_for_combined_p (fd->for_stmt))
5054	{
5055	  remove_edge (ep);
5056	  ep = NULL;
5057	}
5058      else if (fd->collapse > 1)
5059	{
5060	  remove_edge (ep);
5061	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5062	}
5063      else
5064	ep->flags = EDGE_TRUE_VALUE;
5065      find_edge (cont_bb, fin_bb)->flags
5066	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5067    }
5068
5069  set_immediate_dominator (CDI_DOMINATORS, body_bb,
5070			   recompute_dominator (CDI_DOMINATORS, body_bb));
5071  if (!broken_loop)
5072    set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5073			     recompute_dominator (CDI_DOMINATORS, fin_bb));
5074
5075  if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5076    {
5077      struct loop *loop = alloc_loop ();
5078      loop->header = body_bb;
5079      if (collapse_bb == NULL)
5080	loop->latch = cont_bb;
5081      add_loop (loop, body_bb->loop_father);
5082    }
5083}
5084
5085/* A subroutine of expand_omp_for.  Generate code for an OpenACC
5086   partitioned loop.  The lowering here is abstracted, in that the
5087   loop parameters are passed through internal functions, which are
5088   further lowered by oacc_device_lower, once we get to the target
5089   compiler.  The loop is of the form:
5090
5091   for (V = B; V LTGT E; V += S) {BODY}
5092
5093   where LTGT is < or >.  We may have a specified chunking size, CHUNKING
5094   (constant 0 for no chunking) and we will have a GWV partitioning
5095   mask, specifying dimensions over which the loop is to be
5096   partitioned (see note below).  We generate code that looks like
5097   (this ignores tiling):
5098
5099   <entry_bb> [incoming FALL->body, BRANCH->exit]
5100     typedef signedintify (typeof (V)) T;  // underlying signed integral type
5101     T range = E - B;
5102     T chunk_no = 0;
5103     T DIR = LTGT == '<' ? +1 : -1;
5104     T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5105     T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5106
5107   <head_bb> [created by splitting end of entry_bb]
5108     T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5109     T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5110     if (!(offset LTGT bound)) goto bottom_bb;
5111
5112   <body_bb> [incoming]
5113     V = B + offset;
5114     {BODY}
5115
5116   <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5117     offset += step;
5118     if (offset LTGT bound) goto body_bb; [*]
5119
5120   <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5121     chunk_no++;
5122     if (chunk < chunk_max) goto head_bb;
5123
5124   <exit_bb> [incoming]
5125     V = B + ((range -/+ 1) / S +/- 1) * S [*]
5126
5127   [*] Needed if V live at end of loop.  */
5128
5129static void
5130expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5131{
5132  tree v = fd->loop.v;
5133  enum tree_code cond_code = fd->loop.cond_code;
5134  enum tree_code plus_code = PLUS_EXPR;
5135
5136  tree chunk_size = integer_minus_one_node;
5137  tree gwv = integer_zero_node;
5138  tree iter_type = TREE_TYPE (v);
5139  tree diff_type = iter_type;
5140  tree plus_type = iter_type;
5141  struct oacc_collapse *counts = NULL;
5142
5143  gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5144		       == GF_OMP_FOR_KIND_OACC_LOOP);
5145  gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5146  gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5147
5148  if (POINTER_TYPE_P (iter_type))
5149    {
5150      plus_code = POINTER_PLUS_EXPR;
5151      plus_type = sizetype;
5152    }
5153  if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5154    diff_type = signed_type_for (diff_type);
5155  if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5156    diff_type = integer_type_node;
5157
5158  basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5159  basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5160  basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE  */
5161  basic_block bottom_bb = NULL;
5162
5163  /* entry_bb has two sucessors; the branch edge is to the exit
5164     block,  fallthrough edge to body.  */
5165  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5166	      && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5167
5168  /* If cont_bb non-NULL, it has 2 successors.  The branch successor is
5169     body_bb, or to a block whose only successor is the body_bb.  Its
5170     fallthrough successor is the final block (same as the branch
5171     successor of the entry_bb).  */
5172  if (cont_bb)
5173    {
5174      basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5175      basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5176
5177      gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5178      gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5179    }
5180  else
5181    gcc_assert (!gimple_in_ssa_p (cfun));
5182
5183  /* The exit block only has entry_bb and cont_bb as predecessors.  */
5184  gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5185
5186  tree chunk_no;
5187  tree chunk_max = NULL_TREE;
5188  tree bound, offset;
5189  tree step = create_tmp_var (diff_type, ".step");
5190  bool up = cond_code == LT_EXPR;
5191  tree dir = build_int_cst (diff_type, up ? +1 : -1);
5192  bool chunking = !gimple_in_ssa_p (cfun);
5193  bool negating;
5194
5195  /* Tiling vars.  */
5196  tree tile_size = NULL_TREE;
5197  tree element_s = NULL_TREE;
5198  tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5199  basic_block elem_body_bb = NULL;
5200  basic_block elem_cont_bb = NULL;
5201
5202  /* SSA instances.  */
5203  tree offset_incr = NULL_TREE;
5204  tree offset_init = NULL_TREE;
5205
5206  gimple_stmt_iterator gsi;
5207  gassign *ass;
5208  gcall *call;
5209  gimple *stmt;
5210  tree expr;
5211  location_t loc;
5212  edge split, be, fte;
5213
5214  /* Split the end of entry_bb to create head_bb.  */
5215  split = split_block (entry_bb, last_stmt (entry_bb));
5216  basic_block head_bb = split->dest;
5217  entry_bb = split->src;
5218
5219  /* Chunk setup goes at end of entry_bb, replacing the omp_for.  */
5220  gsi = gsi_last_nondebug_bb (entry_bb);
5221  gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5222  loc = gimple_location (for_stmt);
5223
5224  if (gimple_in_ssa_p (cfun))
5225    {
5226      offset_init = gimple_omp_for_index (for_stmt, 0);
5227      gcc_assert (integer_zerop (fd->loop.n1));
5228      /* The SSA parallelizer does gang parallelism.  */
5229      gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5230    }
5231
5232  if (fd->collapse > 1 || fd->tiling)
5233    {
5234      gcc_assert (!gimple_in_ssa_p (cfun) && up);
5235      counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5236      tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5237					      TREE_TYPE (fd->loop.n2), loc);
5238
5239      if (SSA_VAR_P (fd->loop.n2))
5240	{
5241	  total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5242					    true, GSI_SAME_STMT);
5243	  ass = gimple_build_assign (fd->loop.n2, total);
5244	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5245	}
5246    }
5247
5248  tree b = fd->loop.n1;
5249  tree e = fd->loop.n2;
5250  tree s = fd->loop.step;
5251
5252  b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5253  e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5254
5255  /* Convert the step, avoiding possible unsigned->signed overflow.  */
5256  negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5257  if (negating)
5258    s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5259  s = fold_convert (diff_type, s);
5260  if (negating)
5261    s = fold_build1 (NEGATE_EXPR, diff_type, s);
5262  s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5263
5264  if (!chunking)
5265    chunk_size = integer_zero_node;
5266  expr = fold_convert (diff_type, chunk_size);
5267  chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5268					 NULL_TREE, true, GSI_SAME_STMT);
5269
5270  if (fd->tiling)
5271    {
5272      /* Determine the tile size and element step,
5273	 modify the outer loop step size.  */
5274      tile_size = create_tmp_var (diff_type, ".tile_size");
5275      expr = build_int_cst (diff_type, 1);
5276      for (int ix = 0; ix < fd->collapse; ix++)
5277	expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5278      expr = force_gimple_operand_gsi (&gsi, expr, true,
5279				       NULL_TREE, true, GSI_SAME_STMT);
5280      ass = gimple_build_assign (tile_size, expr);
5281      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5282
5283      element_s = create_tmp_var (diff_type, ".element_s");
5284      ass = gimple_build_assign (element_s, s);
5285      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5286
5287      expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5288      s = force_gimple_operand_gsi (&gsi, expr, true,
5289				    NULL_TREE, true, GSI_SAME_STMT);
5290    }
5291
5292  /* Determine the range, avoiding possible unsigned->signed overflow.  */
5293  negating = !up && TYPE_UNSIGNED (iter_type);
5294  expr = fold_build2 (MINUS_EXPR, plus_type,
5295		      fold_convert (plus_type, negating ? b : e),
5296		      fold_convert (plus_type, negating ? e : b));
5297  expr = fold_convert (diff_type, expr);
5298  if (negating)
5299    expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5300  tree range = force_gimple_operand_gsi (&gsi, expr, true,
5301					 NULL_TREE, true, GSI_SAME_STMT);
5302
5303  chunk_no = build_int_cst (diff_type, 0);
5304  if (chunking)
5305    {
5306      gcc_assert (!gimple_in_ssa_p (cfun));
5307
5308      expr = chunk_no;
5309      chunk_max = create_tmp_var (diff_type, ".chunk_max");
5310      chunk_no = create_tmp_var (diff_type, ".chunk_no");
5311
5312      ass = gimple_build_assign (chunk_no, expr);
5313      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5314
5315      call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5316					 build_int_cst (integer_type_node,
5317							IFN_GOACC_LOOP_CHUNKS),
5318					 dir, range, s, chunk_size, gwv);
5319      gimple_call_set_lhs (call, chunk_max);
5320      gimple_set_location (call, loc);
5321      gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5322    }
5323  else
5324    chunk_size = chunk_no;
5325
5326  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5327				     build_int_cst (integer_type_node,
5328						    IFN_GOACC_LOOP_STEP),
5329				     dir, range, s, chunk_size, gwv);
5330  gimple_call_set_lhs (call, step);
5331  gimple_set_location (call, loc);
5332  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5333
5334  /* Remove the GIMPLE_OMP_FOR.  */
5335  gsi_remove (&gsi, true);
5336
5337  /* Fixup edges from head_bb.  */
5338  be = BRANCH_EDGE (head_bb);
5339  fte = FALLTHRU_EDGE (head_bb);
5340  be->flags |= EDGE_FALSE_VALUE;
5341  fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5342
5343  basic_block body_bb = fte->dest;
5344
5345  if (gimple_in_ssa_p (cfun))
5346    {
5347      gsi = gsi_last_nondebug_bb (cont_bb);
5348      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5349
5350      offset = gimple_omp_continue_control_use (cont_stmt);
5351      offset_incr = gimple_omp_continue_control_def (cont_stmt);
5352    }
5353  else
5354    {
5355      offset = create_tmp_var (diff_type, ".offset");
5356      offset_init = offset_incr = offset;
5357    }
5358  bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5359
5360  /* Loop offset & bound go into head_bb.  */
5361  gsi = gsi_start_bb (head_bb);
5362
5363  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5364				     build_int_cst (integer_type_node,
5365						    IFN_GOACC_LOOP_OFFSET),
5366				     dir, range, s,
5367				     chunk_size, gwv, chunk_no);
5368  gimple_call_set_lhs (call, offset_init);
5369  gimple_set_location (call, loc);
5370  gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5371
5372  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5373				     build_int_cst (integer_type_node,
5374						    IFN_GOACC_LOOP_BOUND),
5375				     dir, range, s,
5376				     chunk_size, gwv, offset_init);
5377  gimple_call_set_lhs (call, bound);
5378  gimple_set_location (call, loc);
5379  gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5380
5381  expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5382  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5383		    GSI_CONTINUE_LINKING);
5384
5385  /* V assignment goes into body_bb.  */
5386  if (!gimple_in_ssa_p (cfun))
5387    {
5388      gsi = gsi_start_bb (body_bb);
5389
5390      expr = build2 (plus_code, iter_type, b,
5391		     fold_convert (plus_type, offset));
5392      expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5393				       true, GSI_SAME_STMT);
5394      ass = gimple_build_assign (v, expr);
5395      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5396
5397      if (fd->collapse > 1 || fd->tiling)
5398	expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5399
5400      if (fd->tiling)
5401	{
5402	  /* Determine the range of the element loop -- usually simply
5403	     the tile_size, but could be smaller if the final
5404	     iteration of the outer loop is a partial tile.  */
5405	  tree e_range = create_tmp_var (diff_type, ".e_range");
5406
5407	  expr = build2 (MIN_EXPR, diff_type,
5408			 build2 (MINUS_EXPR, diff_type, bound, offset),
5409			 build2 (MULT_EXPR, diff_type, tile_size,
5410				 element_s));
5411	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5412					   true, GSI_SAME_STMT);
5413	  ass = gimple_build_assign (e_range, expr);
5414	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5415
5416	  /* Determine bound, offset & step of inner loop. */
5417	  e_bound = create_tmp_var (diff_type, ".e_bound");
5418	  e_offset = create_tmp_var (diff_type, ".e_offset");
5419	  e_step = create_tmp_var (diff_type, ".e_step");
5420
5421	  /* Mark these as element loops.  */
5422	  tree t, e_gwv = integer_minus_one_node;
5423	  tree chunk = build_int_cst (diff_type, 0); /* Never chunked.  */
5424
5425	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5426	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5427					     element_s, chunk, e_gwv, chunk);
5428	  gimple_call_set_lhs (call, e_offset);
5429	  gimple_set_location (call, loc);
5430	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5431
5432	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5433	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5434					     element_s, chunk, e_gwv, e_offset);
5435	  gimple_call_set_lhs (call, e_bound);
5436	  gimple_set_location (call, loc);
5437	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5438
5439	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5440	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5441					     element_s, chunk, e_gwv);
5442	  gimple_call_set_lhs (call, e_step);
5443	  gimple_set_location (call, loc);
5444	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5445
5446	  /* Add test and split block.  */
5447	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5448	  stmt = gimple_build_cond_empty (expr);
5449	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5450	  split = split_block (body_bb, stmt);
5451	  elem_body_bb = split->dest;
5452	  if (cont_bb == body_bb)
5453	    cont_bb = elem_body_bb;
5454	  body_bb = split->src;
5455
5456	  split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5457
5458	  /* Add a dummy exit for the tiled block when cont_bb is missing.  */
5459	  if (cont_bb == NULL)
5460	    {
5461	      edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5462	      e->probability = profile_probability::even ();
5463	      split->probability = profile_probability::even ();
5464	    }
5465
5466	  /* Initialize the user's loop vars.  */
5467	  gsi = gsi_start_bb (elem_body_bb);
5468	  expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5469	}
5470    }
5471
5472  /* Loop increment goes into cont_bb.  If this is not a loop, we
5473     will have spawned threads as if it was, and each one will
5474     execute one iteration.  The specification is not explicit about
5475     whether such constructs are ill-formed or not, and they can
5476     occur, especially when noreturn routines are involved.  */
5477  if (cont_bb)
5478    {
5479      gsi = gsi_last_nondebug_bb (cont_bb);
5480      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5481      loc = gimple_location (cont_stmt);
5482
5483      if (fd->tiling)
5484	{
5485	  /* Insert element loop increment and test.  */
5486	  expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5487	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5488					   true, GSI_SAME_STMT);
5489	  ass = gimple_build_assign (e_offset, expr);
5490	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5491	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5492
5493	  stmt = gimple_build_cond_empty (expr);
5494	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5495	  split = split_block (cont_bb, stmt);
5496	  elem_cont_bb = split->src;
5497	  cont_bb = split->dest;
5498
5499	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5500	  split->probability = profile_probability::unlikely ().guessed ();
5501	  edge latch_edge
5502	    = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5503	  latch_edge->probability = profile_probability::likely ().guessed ();
5504
5505	  edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5506	  skip_edge->probability = profile_probability::unlikely ().guessed ();
5507	  edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5508	  loop_entry_edge->probability
5509	    = profile_probability::likely ().guessed ();
5510
5511	  gsi = gsi_for_stmt (cont_stmt);
5512	}
5513
5514      /* Increment offset.  */
5515      if (gimple_in_ssa_p (cfun))
5516	expr = build2 (plus_code, iter_type, offset,
5517		       fold_convert (plus_type, step));
5518      else
5519	expr = build2 (PLUS_EXPR, diff_type, offset, step);
5520      expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5521				       true, GSI_SAME_STMT);
5522      ass = gimple_build_assign (offset_incr, expr);
5523      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5524      expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5525      gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5526
5527      /*  Remove the GIMPLE_OMP_CONTINUE.  */
5528      gsi_remove (&gsi, true);
5529
5530      /* Fixup edges from cont_bb.  */
5531      be = BRANCH_EDGE (cont_bb);
5532      fte = FALLTHRU_EDGE (cont_bb);
5533      be->flags |= EDGE_TRUE_VALUE;
5534      fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5535
5536      if (chunking)
5537	{
5538	  /* Split the beginning of exit_bb to make bottom_bb.  We
5539	     need to insert a nop at the start, because splitting is
5540	     after a stmt, not before.  */
5541	  gsi = gsi_start_bb (exit_bb);
5542	  stmt = gimple_build_nop ();
5543	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5544	  split = split_block (exit_bb, stmt);
5545	  bottom_bb = split->src;
5546	  exit_bb = split->dest;
5547	  gsi = gsi_last_bb (bottom_bb);
5548
5549	  /* Chunk increment and test goes into bottom_bb.  */
5550	  expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5551			 build_int_cst (diff_type, 1));
5552	  ass = gimple_build_assign (chunk_no, expr);
5553	  gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5554
5555	  /* Chunk test at end of bottom_bb.  */
5556	  expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5557	  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5558			    GSI_CONTINUE_LINKING);
5559
5560	  /* Fixup edges from bottom_bb.  */
5561	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5562	  split->probability = profile_probability::unlikely ().guessed ();
5563	  edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5564	  latch_edge->probability = profile_probability::likely ().guessed ();
5565	}
5566    }
5567
5568  gsi = gsi_last_nondebug_bb (exit_bb);
5569  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5570  loc = gimple_location (gsi_stmt (gsi));
5571
5572  if (!gimple_in_ssa_p (cfun))
5573    {
5574      /* Insert the final value of V, in case it is live.  This is the
5575	 value for the only thread that survives past the join.  */
5576      expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5577      expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5578      expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5579      expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5580      expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5581      expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5582				       true, GSI_SAME_STMT);
5583      ass = gimple_build_assign (v, expr);
5584      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5585    }
5586
5587  /* Remove the OMP_RETURN.  */
5588  gsi_remove (&gsi, true);
5589
5590  if (cont_bb)
5591    {
5592      /* We now have one, two or three nested loops.  Update the loop
5593	 structures.  */
5594      struct loop *parent = entry_bb->loop_father;
5595      struct loop *body = body_bb->loop_father;
5596
5597      if (chunking)
5598	{
5599	  struct loop *chunk_loop = alloc_loop ();
5600	  chunk_loop->header = head_bb;
5601	  chunk_loop->latch = bottom_bb;
5602	  add_loop (chunk_loop, parent);
5603	  parent = chunk_loop;
5604	}
5605      else if (parent != body)
5606	{
5607	  gcc_assert (body->header == body_bb);
5608	  gcc_assert (body->latch == cont_bb
5609		      || single_pred (body->latch) == cont_bb);
5610	  parent = NULL;
5611	}
5612
5613      if (parent)
5614	{
5615	  struct loop *body_loop = alloc_loop ();
5616	  body_loop->header = body_bb;
5617	  body_loop->latch = cont_bb;
5618	  add_loop (body_loop, parent);
5619
5620	  if (fd->tiling)
5621	    {
5622	      /* Insert tiling's element loop.  */
5623	      struct loop *inner_loop = alloc_loop ();
5624	      inner_loop->header = elem_body_bb;
5625	      inner_loop->latch = elem_cont_bb;
5626	      add_loop (inner_loop, body_loop);
5627	    }
5628	}
5629    }
5630}
5631
5632/* Expand the OMP loop defined by REGION.  */
5633
5634static void
5635expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5636{
5637  struct omp_for_data fd;
5638  struct omp_for_data_loop *loops;
5639
5640  loops
5641    = (struct omp_for_data_loop *)
5642      alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5643	      * sizeof (struct omp_for_data_loop));
5644  omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5645			&fd, loops);
5646  region->sched_kind = fd.sched_kind;
5647  region->sched_modifiers = fd.sched_modifiers;
5648
5649  gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5650  BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5651  FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5652  if (region->cont)
5653    {
5654      gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5655      BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5656      FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5657    }
5658  else
5659    /* If there isn't a continue then this is a degerate case where
5660       the introduction of abnormal edges during lowering will prevent
5661       original loops from being detected.  Fix that up.  */
5662    loops_state_set (LOOPS_NEED_FIXUP);
5663
5664  if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5665    expand_omp_simd (region, &fd);
5666  else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5667    {
5668      gcc_assert (!inner_stmt);
5669      expand_oacc_for (region, &fd);
5670    }
5671  else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5672    {
5673      if (gimple_omp_for_combined_into_p (fd.for_stmt))
5674	expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5675      else
5676	expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5677    }
5678  else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5679	   && !fd.have_ordered)
5680    {
5681      if (fd.chunk_size == NULL)
5682	expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5683      else
5684	expand_omp_for_static_chunk (region, &fd, inner_stmt);
5685    }
5686  else
5687    {
5688      int fn_index, start_ix, next_ix;
5689
5690      gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5691		  == GF_OMP_FOR_KIND_FOR);
5692      if (fd.chunk_size == NULL
5693	  && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5694	fd.chunk_size = integer_zero_node;
5695      gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5696      switch (fd.sched_kind)
5697	{
5698	case OMP_CLAUSE_SCHEDULE_RUNTIME:
5699	  fn_index = 3;
5700	  break;
5701	case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5702	case OMP_CLAUSE_SCHEDULE_GUIDED:
5703	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5704	      && !fd.ordered
5705	      && !fd.have_ordered)
5706	    {
5707	      fn_index = 3 + fd.sched_kind;
5708	      break;
5709	    }
5710	  /* FALLTHRU */
5711	default:
5712	  fn_index = fd.sched_kind;
5713	  break;
5714	}
5715      if (!fd.ordered)
5716	fn_index += fd.have_ordered * 6;
5717      if (fd.ordered)
5718	start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5719      else
5720	start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5721      next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5722      if (fd.iter_type == long_long_unsigned_type_node)
5723	{
5724	  start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5725			- (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5726	  next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5727		      - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5728	}
5729      expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5730			      (enum built_in_function) next_ix, inner_stmt);
5731    }
5732
5733  if (gimple_in_ssa_p (cfun))
5734    update_ssa (TODO_update_ssa_only_virtuals);
5735}
5736
5737/* Expand code for an OpenMP sections directive.  In pseudo code, we generate
5738
5739	v = GOMP_sections_start (n);
5740    L0:
5741	switch (v)
5742	  {
5743	  case 0:
5744	    goto L2;
5745	  case 1:
5746	    section 1;
5747	    goto L1;
5748	  case 2:
5749	    ...
5750	  case n:
5751	    ...
5752	  default:
5753	    abort ();
5754	  }
5755    L1:
5756	v = GOMP_sections_next ();
5757	goto L0;
5758    L2:
5759	reduction;
5760
5761    If this is a combined parallel sections, replace the call to
5762    GOMP_sections_start with call to GOMP_sections_next.  */
5763
5764static void
5765expand_omp_sections (struct omp_region *region)
5766{
5767  tree t, u, vin = NULL, vmain, vnext, l2;
5768  unsigned len;
5769  basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5770  gimple_stmt_iterator si, switch_si;
5771  gomp_sections *sections_stmt;
5772  gimple *stmt;
5773  gomp_continue *cont;
5774  edge_iterator ei;
5775  edge e;
5776  struct omp_region *inner;
5777  unsigned i, casei;
5778  bool exit_reachable = region->cont != NULL;
5779
5780  gcc_assert (region->exit != NULL);
5781  entry_bb = region->entry;
5782  l0_bb = single_succ (entry_bb);
5783  l1_bb = region->cont;
5784  l2_bb = region->exit;
5785  if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5786    l2 = gimple_block_label (l2_bb);
5787  else
5788    {
5789      /* This can happen if there are reductions.  */
5790      len = EDGE_COUNT (l0_bb->succs);
5791      gcc_assert (len > 0);
5792      e = EDGE_SUCC (l0_bb, len - 1);
5793      si = gsi_last_nondebug_bb (e->dest);
5794      l2 = NULL_TREE;
5795      if (gsi_end_p (si)
5796	  || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5797	l2 = gimple_block_label (e->dest);
5798      else
5799	FOR_EACH_EDGE (e, ei, l0_bb->succs)
5800	  {
5801	    si = gsi_last_nondebug_bb (e->dest);
5802	    if (gsi_end_p (si)
5803		|| gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5804	      {
5805		l2 = gimple_block_label (e->dest);
5806		break;
5807	      }
5808	  }
5809    }
5810  if (exit_reachable)
5811    default_bb = create_empty_bb (l1_bb->prev_bb);
5812  else
5813    default_bb = create_empty_bb (l0_bb);
5814
5815  /* We will build a switch() with enough cases for all the
5816     GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5817     and a default case to abort if something goes wrong.  */
5818  len = EDGE_COUNT (l0_bb->succs);
5819
5820  /* Use vec::quick_push on label_vec throughout, since we know the size
5821     in advance.  */
5822  auto_vec<tree> label_vec (len);
5823
5824  /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5825     GIMPLE_OMP_SECTIONS statement.  */
5826  si = gsi_last_nondebug_bb (entry_bb);
5827  sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5828  gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5829  vin = gimple_omp_sections_control (sections_stmt);
5830  if (!is_combined_parallel (region))
5831    {
5832      /* If we are not inside a combined parallel+sections region,
5833	 call GOMP_sections_start.  */
5834      t = build_int_cst (unsigned_type_node, len - 1);
5835      u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5836      stmt = gimple_build_call (u, 1, t);
5837    }
5838  else
5839    {
5840      /* Otherwise, call GOMP_sections_next.  */
5841      u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5842      stmt = gimple_build_call (u, 0);
5843    }
5844  gimple_call_set_lhs (stmt, vin);
5845  gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5846  gsi_remove (&si, true);
5847
5848  /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5849     L0_BB.  */
5850  switch_si = gsi_last_nondebug_bb (l0_bb);
5851  gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
5852  if (exit_reachable)
5853    {
5854      cont = as_a <gomp_continue *> (last_stmt (l1_bb));
5855      gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
5856      vmain = gimple_omp_continue_control_use (cont);
5857      vnext = gimple_omp_continue_control_def (cont);
5858    }
5859  else
5860    {
5861      vmain = vin;
5862      vnext = NULL_TREE;
5863    }
5864
5865  t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
5866  label_vec.quick_push (t);
5867  i = 1;
5868
5869  /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.  */
5870  for (inner = region->inner, casei = 1;
5871       inner;
5872       inner = inner->next, i++, casei++)
5873    {
5874      basic_block s_entry_bb, s_exit_bb;
5875
5876      /* Skip optional reduction region.  */
5877      if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
5878	{
5879	  --i;
5880	  --casei;
5881	  continue;
5882	}
5883
5884      s_entry_bb = inner->entry;
5885      s_exit_bb = inner->exit;
5886
5887      t = gimple_block_label (s_entry_bb);
5888      u = build_int_cst (unsigned_type_node, casei);
5889      u = build_case_label (u, NULL, t);
5890      label_vec.quick_push (u);
5891
5892      si = gsi_last_nondebug_bb (s_entry_bb);
5893      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
5894      gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
5895      gsi_remove (&si, true);
5896      single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
5897
5898      if (s_exit_bb == NULL)
5899	continue;
5900
5901      si = gsi_last_nondebug_bb (s_exit_bb);
5902      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5903      gsi_remove (&si, true);
5904
5905      single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
5906    }
5907
5908  /* Error handling code goes in DEFAULT_BB.  */
5909  t = gimple_block_label (default_bb);
5910  u = build_case_label (NULL, NULL, t);
5911  make_edge (l0_bb, default_bb, 0);
5912  add_bb_to_loop (default_bb, current_loops->tree_root);
5913
5914  stmt = gimple_build_switch (vmain, u, label_vec);
5915  gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
5916  gsi_remove (&switch_si, true);
5917
5918  si = gsi_start_bb (default_bb);
5919  stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
5920  gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
5921
5922  if (exit_reachable)
5923    {
5924      tree bfn_decl;
5925
5926      /* Code to get the next section goes in L1_BB.  */
5927      si = gsi_last_nondebug_bb (l1_bb);
5928      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
5929
5930      bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5931      stmt = gimple_build_call (bfn_decl, 0);
5932      gimple_call_set_lhs (stmt, vnext);
5933      gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5934      gsi_remove (&si, true);
5935
5936      single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
5937    }
5938
5939  /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB.  */
5940  si = gsi_last_nondebug_bb (l2_bb);
5941  if (gimple_omp_return_nowait_p (gsi_stmt (si)))
5942    t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
5943  else if (gimple_omp_return_lhs (gsi_stmt (si)))
5944    t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
5945  else
5946    t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
5947  stmt = gimple_build_call (t, 0);
5948  if (gimple_omp_return_lhs (gsi_stmt (si)))
5949    gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
5950  gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5951  gsi_remove (&si, true);
5952
5953  set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
5954}
5955
5956/* Expand code for an OpenMP single directive.  We've already expanded
5957   much of the code, here we simply place the GOMP_barrier call.  */
5958
5959static void
5960expand_omp_single (struct omp_region *region)
5961{
5962  basic_block entry_bb, exit_bb;
5963  gimple_stmt_iterator si;
5964
5965  entry_bb = region->entry;
5966  exit_bb = region->exit;
5967
5968  si = gsi_last_nondebug_bb (entry_bb);
5969  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
5970  gsi_remove (&si, true);
5971  single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5972
5973  si = gsi_last_nondebug_bb (exit_bb);
5974  if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
5975    {
5976      tree t = gimple_omp_return_lhs (gsi_stmt (si));
5977      gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
5978    }
5979  gsi_remove (&si, true);
5980  single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5981}
5982
5983/* Generic expansion for OpenMP synchronization directives: master,
5984   ordered and critical.  All we need to do here is remove the entry
5985   and exit markers for REGION.  */
5986
5987static void
5988expand_omp_synch (struct omp_region *region)
5989{
5990  basic_block entry_bb, exit_bb;
5991  gimple_stmt_iterator si;
5992
5993  entry_bb = region->entry;
5994  exit_bb = region->exit;
5995
5996  si = gsi_last_nondebug_bb (entry_bb);
5997  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
5998	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
5999	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6000	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6001	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6002	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6003  gsi_remove (&si, true);
6004  single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6005
6006  if (exit_bb)
6007    {
6008      si = gsi_last_nondebug_bb (exit_bb);
6009      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6010      gsi_remove (&si, true);
6011      single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6012    }
6013}
6014
6015/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6016   operation as a normal volatile load.  */
6017
6018static bool
6019expand_omp_atomic_load (basic_block load_bb, tree addr,
6020			tree loaded_val, int index)
6021{
6022  enum built_in_function tmpbase;
6023  gimple_stmt_iterator gsi;
6024  basic_block store_bb;
6025  location_t loc;
6026  gimple *stmt;
6027  tree decl, call, type, itype;
6028
6029  gsi = gsi_last_nondebug_bb (load_bb);
6030  stmt = gsi_stmt (gsi);
6031  gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6032  loc = gimple_location (stmt);
6033
6034  /* ??? If the target does not implement atomic_load_optab[mode], and mode
6035     is smaller than word size, then expand_atomic_load assumes that the load
6036     is atomic.  We could avoid the builtin entirely in this case.  */
6037
6038  tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6039  decl = builtin_decl_explicit (tmpbase);
6040  if (decl == NULL_TREE)
6041    return false;
6042
6043  type = TREE_TYPE (loaded_val);
6044  itype = TREE_TYPE (TREE_TYPE (decl));
6045
6046  call = build_call_expr_loc (loc, decl, 2, addr,
6047			      build_int_cst (NULL,
6048					     gimple_omp_atomic_seq_cst_p (stmt)
6049					     ? MEMMODEL_SEQ_CST
6050					     : MEMMODEL_RELAXED));
6051  if (!useless_type_conversion_p (type, itype))
6052    call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6053  call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6054
6055  force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6056  gsi_remove (&gsi, true);
6057
6058  store_bb = single_succ (load_bb);
6059  gsi = gsi_last_nondebug_bb (store_bb);
6060  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6061  gsi_remove (&gsi, true);
6062
6063  if (gimple_in_ssa_p (cfun))
6064    update_ssa (TODO_update_ssa_no_phi);
6065
6066  return true;
6067}
6068
6069/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6070   operation as a normal volatile store.  */
6071
6072static bool
6073expand_omp_atomic_store (basic_block load_bb, tree addr,
6074			 tree loaded_val, tree stored_val, int index)
6075{
6076  enum built_in_function tmpbase;
6077  gimple_stmt_iterator gsi;
6078  basic_block store_bb = single_succ (load_bb);
6079  location_t loc;
6080  gimple *stmt;
6081  tree decl, call, type, itype;
6082  machine_mode imode;
6083  bool exchange;
6084
6085  gsi = gsi_last_nondebug_bb (load_bb);
6086  stmt = gsi_stmt (gsi);
6087  gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6088
6089  /* If the load value is needed, then this isn't a store but an exchange.  */
6090  exchange = gimple_omp_atomic_need_value_p (stmt);
6091
6092  gsi = gsi_last_nondebug_bb (store_bb);
6093  stmt = gsi_stmt (gsi);
6094  gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6095  loc = gimple_location (stmt);
6096
6097  /* ??? If the target does not implement atomic_store_optab[mode], and mode
6098     is smaller than word size, then expand_atomic_store assumes that the store
6099     is atomic.  We could avoid the builtin entirely in this case.  */
6100
6101  tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6102  tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6103  decl = builtin_decl_explicit (tmpbase);
6104  if (decl == NULL_TREE)
6105    return false;
6106
6107  type = TREE_TYPE (stored_val);
6108
6109  /* Dig out the type of the function's second argument.  */
6110  itype = TREE_TYPE (decl);
6111  itype = TYPE_ARG_TYPES (itype);
6112  itype = TREE_CHAIN (itype);
6113  itype = TREE_VALUE (itype);
6114  imode = TYPE_MODE (itype);
6115
6116  if (exchange && !can_atomic_exchange_p (imode, true))
6117    return false;
6118
6119  if (!useless_type_conversion_p (itype, type))
6120    stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6121  call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6122			      build_int_cst (NULL,
6123					     gimple_omp_atomic_seq_cst_p (stmt)
6124					     ? MEMMODEL_SEQ_CST
6125					     : MEMMODEL_RELAXED));
6126  if (exchange)
6127    {
6128      if (!useless_type_conversion_p (type, itype))
6129	call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6130      call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6131    }
6132
6133  force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6134  gsi_remove (&gsi, true);
6135
6136  /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above.  */
6137  gsi = gsi_last_nondebug_bb (load_bb);
6138  gsi_remove (&gsi, true);
6139
6140  if (gimple_in_ssa_p (cfun))
6141    update_ssa (TODO_update_ssa_no_phi);
6142
6143  return true;
6144}
6145
6146/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6147   operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
6148   size of the data type, and thus usable to find the index of the builtin
6149   decl.  Returns false if the expression is not of the proper form.  */
6150
6151static bool
6152expand_omp_atomic_fetch_op (basic_block load_bb,
6153			    tree addr, tree loaded_val,
6154			    tree stored_val, int index)
6155{
6156  enum built_in_function oldbase, newbase, tmpbase;
6157  tree decl, itype, call;
6158  tree lhs, rhs;
6159  basic_block store_bb = single_succ (load_bb);
6160  gimple_stmt_iterator gsi;
6161  gimple *stmt;
6162  location_t loc;
6163  enum tree_code code;
6164  bool need_old, need_new;
6165  machine_mode imode;
6166  bool seq_cst;
6167
6168  /* We expect to find the following sequences:
6169
6170   load_bb:
6171       GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6172
6173   store_bb:
6174       val = tmp OP something; (or: something OP tmp)
6175       GIMPLE_OMP_STORE (val)
6176
6177  ???FIXME: Allow a more flexible sequence.
6178  Perhaps use data flow to pick the statements.
6179
6180  */
6181
6182  gsi = gsi_after_labels (store_bb);
6183  stmt = gsi_stmt (gsi);
6184  if (is_gimple_debug (stmt))
6185    {
6186      gsi_next_nondebug (&gsi);
6187      if (gsi_end_p (gsi))
6188	return false;
6189      stmt = gsi_stmt (gsi);
6190    }
6191  loc = gimple_location (stmt);
6192  if (!is_gimple_assign (stmt))
6193    return false;
6194  gsi_next_nondebug (&gsi);
6195  if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6196    return false;
6197  need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6198  need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6199  seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6200  gcc_checking_assert (!need_old || !need_new);
6201
6202  if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6203    return false;
6204
6205  /* Check for one of the supported fetch-op operations.  */
6206  code = gimple_assign_rhs_code (stmt);
6207  switch (code)
6208    {
6209    case PLUS_EXPR:
6210    case POINTER_PLUS_EXPR:
6211      oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6212      newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6213      break;
6214    case MINUS_EXPR:
6215      oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6216      newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6217      break;
6218    case BIT_AND_EXPR:
6219      oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6220      newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6221      break;
6222    case BIT_IOR_EXPR:
6223      oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6224      newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6225      break;
6226    case BIT_XOR_EXPR:
6227      oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6228      newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6229      break;
6230    default:
6231      return false;
6232    }
6233
6234  /* Make sure the expression is of the proper form.  */
6235  if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6236    rhs = gimple_assign_rhs2 (stmt);
6237  else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6238	   && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6239    rhs = gimple_assign_rhs1 (stmt);
6240  else
6241    return false;
6242
6243  tmpbase = ((enum built_in_function)
6244	     ((need_new ? newbase : oldbase) + index + 1));
6245  decl = builtin_decl_explicit (tmpbase);
6246  if (decl == NULL_TREE)
6247    return false;
6248  itype = TREE_TYPE (TREE_TYPE (decl));
6249  imode = TYPE_MODE (itype);
6250
6251  /* We could test all of the various optabs involved, but the fact of the
6252     matter is that (with the exception of i486 vs i586 and xadd) all targets
6253     that support any atomic operaton optab also implements compare-and-swap.
6254     Let optabs.c take care of expanding any compare-and-swap loop.  */
6255  if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6256    return false;
6257
6258  gsi = gsi_last_nondebug_bb (load_bb);
6259  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6260
6261  /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6262     It only requires that the operation happen atomically.  Thus we can
6263     use the RELAXED memory model.  */
6264  call = build_call_expr_loc (loc, decl, 3, addr,
6265			      fold_convert_loc (loc, itype, rhs),
6266			      build_int_cst (NULL,
6267					     seq_cst ? MEMMODEL_SEQ_CST
6268						     : MEMMODEL_RELAXED));
6269
6270  if (need_old || need_new)
6271    {
6272      lhs = need_old ? loaded_val : stored_val;
6273      call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6274      call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6275    }
6276  else
6277    call = fold_convert_loc (loc, void_type_node, call);
6278  force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6279  gsi_remove (&gsi, true);
6280
6281  gsi = gsi_last_nondebug_bb (store_bb);
6282  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6283  gsi_remove (&gsi, true);
6284  gsi = gsi_last_nondebug_bb (store_bb);
6285  stmt = gsi_stmt (gsi);
6286  gsi_remove (&gsi, true);
6287
6288  if (gimple_in_ssa_p (cfun))
6289    {
6290      release_defs (stmt);
6291      update_ssa (TODO_update_ssa_no_phi);
6292    }
6293
6294  return true;
6295}
6296
6297/* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6298
6299      oldval = *addr;
6300      repeat:
6301	newval = rhs;	 // with oldval replacing *addr in rhs
6302	oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6303	if (oldval != newval)
6304	  goto repeat;
6305
6306   INDEX is log2 of the size of the data type, and thus usable to find the
6307   index of the builtin decl.  */
6308
6309static bool
6310expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6311			    tree addr, tree loaded_val, tree stored_val,
6312			    int index)
6313{
6314  tree loadedi, storedi, initial, new_storedi, old_vali;
6315  tree type, itype, cmpxchg, iaddr, atype;
6316  gimple_stmt_iterator si;
6317  basic_block loop_header = single_succ (load_bb);
6318  gimple *phi, *stmt;
6319  edge e;
6320  enum built_in_function fncode;
6321
6322  /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6323     order to use the RELAXED memory model effectively.  */
6324  fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6325				    + index + 1);
6326  cmpxchg = builtin_decl_explicit (fncode);
6327  if (cmpxchg == NULL_TREE)
6328    return false;
6329  type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6330  atype = type;
6331  itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6332
6333  if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6334      || !can_atomic_load_p (TYPE_MODE (itype)))
6335    return false;
6336
6337  /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
6338  si = gsi_last_nondebug_bb (load_bb);
6339  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6340
6341  /* For floating-point values, we'll need to view-convert them to integers
6342     so that we can perform the atomic compare and swap.  Simplify the
6343     following code by always setting up the "i"ntegral variables.  */
6344  if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6345    {
6346      tree iaddr_val;
6347
6348      iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6349							   true));
6350      atype = itype;
6351      iaddr_val
6352	= force_gimple_operand_gsi (&si,
6353				    fold_convert (TREE_TYPE (iaddr), addr),
6354				    false, NULL_TREE, true, GSI_SAME_STMT);
6355      stmt = gimple_build_assign (iaddr, iaddr_val);
6356      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6357      loadedi = create_tmp_var (itype);
6358      if (gimple_in_ssa_p (cfun))
6359	loadedi = make_ssa_name (loadedi);
6360    }
6361  else
6362    {
6363      iaddr = addr;
6364      loadedi = loaded_val;
6365    }
6366
6367  fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6368  tree loaddecl = builtin_decl_explicit (fncode);
6369  if (loaddecl)
6370    initial
6371      = fold_convert (atype,
6372		      build_call_expr (loaddecl, 2, iaddr,
6373				       build_int_cst (NULL_TREE,
6374						      MEMMODEL_RELAXED)));
6375  else
6376    {
6377      tree off
6378	= build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
6379						      true), 0);
6380      initial = build2 (MEM_REF, atype, iaddr, off);
6381    }
6382
6383  initial
6384    = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6385				GSI_SAME_STMT);
6386
6387  /* Move the value to the LOADEDI temporary.  */
6388  if (gimple_in_ssa_p (cfun))
6389    {
6390      gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6391      phi = create_phi_node (loadedi, loop_header);
6392      SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6393	       initial);
6394    }
6395  else
6396    gsi_insert_before (&si,
6397		       gimple_build_assign (loadedi, initial),
6398		       GSI_SAME_STMT);
6399  if (loadedi != loaded_val)
6400    {
6401      gimple_stmt_iterator gsi2;
6402      tree x;
6403
6404      x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6405      gsi2 = gsi_start_bb (loop_header);
6406      if (gimple_in_ssa_p (cfun))
6407	{
6408	  gassign *stmt;
6409	  x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6410					true, GSI_SAME_STMT);
6411	  stmt = gimple_build_assign (loaded_val, x);
6412	  gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6413	}
6414      else
6415	{
6416	  x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6417	  force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6418				    true, GSI_SAME_STMT);
6419	}
6420    }
6421  gsi_remove (&si, true);
6422
6423  si = gsi_last_nondebug_bb (store_bb);
6424  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6425
6426  if (iaddr == addr)
6427    storedi = stored_val;
6428  else
6429    storedi
6430      = force_gimple_operand_gsi (&si,
6431				  build1 (VIEW_CONVERT_EXPR, itype,
6432					  stored_val), true, NULL_TREE, true,
6433				  GSI_SAME_STMT);
6434
6435  /* Build the compare&swap statement.  */
6436  new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6437  new_storedi = force_gimple_operand_gsi (&si,
6438					  fold_convert (TREE_TYPE (loadedi),
6439							new_storedi),
6440					  true, NULL_TREE,
6441					  true, GSI_SAME_STMT);
6442
6443  if (gimple_in_ssa_p (cfun))
6444    old_vali = loadedi;
6445  else
6446    {
6447      old_vali = create_tmp_var (TREE_TYPE (loadedi));
6448      stmt = gimple_build_assign (old_vali, loadedi);
6449      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6450
6451      stmt = gimple_build_assign (loadedi, new_storedi);
6452      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6453    }
6454
6455  /* Note that we always perform the comparison as an integer, even for
6456     floating point.  This allows the atomic operation to properly
6457     succeed even with NaNs and -0.0.  */
6458  tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6459  stmt = gimple_build_cond_empty (ne);
6460  gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6461
6462  /* Update cfg.  */
6463  e = single_succ_edge (store_bb);
6464  e->flags &= ~EDGE_FALLTHRU;
6465  e->flags |= EDGE_FALSE_VALUE;
6466  /* Expect no looping.  */
6467  e->probability = profile_probability::guessed_always ();
6468
6469  e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6470  e->probability = profile_probability::guessed_never ();
6471
6472  /* Copy the new value to loadedi (we already did that before the condition
6473     if we are not in SSA).  */
6474  if (gimple_in_ssa_p (cfun))
6475    {
6476      phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6477      SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6478    }
6479
6480  /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
6481  gsi_remove (&si, true);
6482
6483  struct loop *loop = alloc_loop ();
6484  loop->header = loop_header;
6485  loop->latch = store_bb;
6486  add_loop (loop, loop_header->loop_father);
6487
6488  if (gimple_in_ssa_p (cfun))
6489    update_ssa (TODO_update_ssa_no_phi);
6490
6491  return true;
6492}
6493
6494/* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6495
6496				  GOMP_atomic_start ();
6497				  *addr = rhs;
6498				  GOMP_atomic_end ();
6499
6500   The result is not globally atomic, but works so long as all parallel
6501   references are within #pragma omp atomic directives.  According to
6502   responses received from omp@openmp.org, appears to be within spec.
6503   Which makes sense, since that's how several other compilers handle
6504   this situation as well.
6505   LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6506   expanding.  STORED_VAL is the operand of the matching
6507   GIMPLE_OMP_ATOMIC_STORE.
6508
6509   We replace
6510   GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6511   loaded_val = *addr;
6512
6513   and replace
6514   GIMPLE_OMP_ATOMIC_STORE (stored_val)  with
6515   *addr = stored_val;
6516*/
6517
6518static bool
6519expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6520			 tree addr, tree loaded_val, tree stored_val)
6521{
6522  gimple_stmt_iterator si;
6523  gassign *stmt;
6524  tree t;
6525
6526  si = gsi_last_nondebug_bb (load_bb);
6527  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6528
6529  t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6530  t = build_call_expr (t, 0);
6531  force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6532
6533  tree mem = build_simple_mem_ref (addr);
6534  TREE_TYPE (mem) = TREE_TYPE (loaded_val);
6535  TREE_OPERAND (mem, 1)
6536    = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
6537						 true),
6538		    TREE_OPERAND (mem, 1));
6539  stmt = gimple_build_assign (loaded_val, mem);
6540  gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6541  gsi_remove (&si, true);
6542
6543  si = gsi_last_nondebug_bb (store_bb);
6544  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6545
6546  stmt = gimple_build_assign (unshare_expr (mem), stored_val);
6547  gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6548
6549  t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6550  t = build_call_expr (t, 0);
6551  force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6552  gsi_remove (&si, true);
6553
6554  if (gimple_in_ssa_p (cfun))
6555    update_ssa (TODO_update_ssa_no_phi);
6556  return true;
6557}
6558
6559/* Expand an GIMPLE_OMP_ATOMIC statement.  We try to expand
6560   using expand_omp_atomic_fetch_op.  If it failed, we try to
6561   call expand_omp_atomic_pipeline, and if it fails too, the
6562   ultimate fallback is wrapping the operation in a mutex
6563   (expand_omp_atomic_mutex).  REGION is the atomic region built
6564   by build_omp_regions_1().  */
6565
6566static void
6567expand_omp_atomic (struct omp_region *region)
6568{
6569  basic_block load_bb = region->entry, store_bb = region->exit;
6570  gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6571  gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6572  tree loaded_val = gimple_omp_atomic_load_lhs (load);
6573  tree addr = gimple_omp_atomic_load_rhs (load);
6574  tree stored_val = gimple_omp_atomic_store_val (store);
6575  tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6576  HOST_WIDE_INT index;
6577
6578  /* Make sure the type is one of the supported sizes.  */
6579  index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6580  index = exact_log2 (index);
6581  if (index >= 0 && index <= 4)
6582    {
6583      unsigned int align = TYPE_ALIGN_UNIT (type);
6584
6585      /* __sync builtins require strict data alignment.  */
6586      if (exact_log2 (align) >= index)
6587	{
6588	  /* Atomic load.  */
6589	  scalar_mode smode;
6590	  if (loaded_val == stored_val
6591	      && (is_int_mode (TYPE_MODE (type), &smode)
6592		  || is_float_mode (TYPE_MODE (type), &smode))
6593	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6594	      && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6595	    return;
6596
6597	  /* Atomic store.  */
6598	  if ((is_int_mode (TYPE_MODE (type), &smode)
6599	       || is_float_mode (TYPE_MODE (type), &smode))
6600	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6601	      && store_bb == single_succ (load_bb)
6602	      && first_stmt (store_bb) == store
6603	      && expand_omp_atomic_store (load_bb, addr, loaded_val,
6604					  stored_val, index))
6605	    return;
6606
6607	  /* When possible, use specialized atomic update functions.  */
6608	  if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6609	      && store_bb == single_succ (load_bb)
6610	      && expand_omp_atomic_fetch_op (load_bb, addr,
6611					     loaded_val, stored_val, index))
6612	    return;
6613
6614	  /* If we don't have specialized __sync builtins, try and implement
6615	     as a compare and swap loop.  */
6616	  if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6617					  loaded_val, stored_val, index))
6618	    return;
6619	}
6620    }
6621
6622  /* The ultimate fallback is wrapping the operation in a mutex.  */
6623  expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6624}
6625
6626/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6627   at REGION_EXIT.  */
6628
6629static void
6630mark_loops_in_oacc_kernels_region (basic_block region_entry,
6631				   basic_block region_exit)
6632{
6633  struct loop *outer = region_entry->loop_father;
6634  gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6635
6636  /* Don't parallelize the kernels region if it contains more than one outer
6637     loop.  */
6638  unsigned int nr_outer_loops = 0;
6639  struct loop *single_outer = NULL;
6640  for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6641    {
6642      gcc_assert (loop_outer (loop) == outer);
6643
6644      if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6645	continue;
6646
6647      if (region_exit != NULL
6648	  && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6649	continue;
6650
6651      nr_outer_loops++;
6652      single_outer = loop;
6653    }
6654  if (nr_outer_loops != 1)
6655    return;
6656
6657  for (struct loop *loop = single_outer->inner;
6658       loop != NULL;
6659       loop = loop->inner)
6660    if (loop->next)
6661      return;
6662
6663  /* Mark the loops in the region.  */
6664  for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6665    loop->in_oacc_kernels_region = true;
6666}
6667
6668/* Types used to pass grid and wortkgroup sizes to kernel invocation.  */
6669
6670struct GTY(()) grid_launch_attributes_trees
6671{
6672  tree kernel_dim_array_type;
6673  tree kernel_lattrs_dimnum_decl;
6674  tree kernel_lattrs_grid_decl;
6675  tree kernel_lattrs_group_decl;
6676  tree kernel_launch_attributes_type;
6677};
6678
6679static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6680
6681/* Create types used to pass kernel launch attributes to target.  */
6682
6683static void
6684grid_create_kernel_launch_attr_types (void)
6685{
6686  if (grid_attr_trees)
6687    return;
6688  grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6689
6690  tree dim_arr_index_type
6691    = build_index_type (build_int_cst (integer_type_node, 2));
6692  grid_attr_trees->kernel_dim_array_type
6693    = build_array_type (uint32_type_node, dim_arr_index_type);
6694
6695  grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6696  grid_attr_trees->kernel_lattrs_dimnum_decl
6697    = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6698		  uint32_type_node);
6699  DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6700
6701  grid_attr_trees->kernel_lattrs_grid_decl
6702    = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6703		  grid_attr_trees->kernel_dim_array_type);
6704  DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6705    = grid_attr_trees->kernel_lattrs_dimnum_decl;
6706  grid_attr_trees->kernel_lattrs_group_decl
6707    = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6708		  grid_attr_trees->kernel_dim_array_type);
6709  DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6710    = grid_attr_trees->kernel_lattrs_grid_decl;
6711  finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6712			 "__gomp_kernel_launch_attributes",
6713			 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6714}
6715
6716/* Insert before the current statement in GSI a store of VALUE to INDEX of
6717   array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR.  VALUE must be
6718   of type uint32_type_node.  */
6719
6720static void
6721grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6722			     tree fld_decl, int index, tree value)
6723{
6724  tree ref = build4 (ARRAY_REF, uint32_type_node,
6725		     build3 (COMPONENT_REF,
6726			     grid_attr_trees->kernel_dim_array_type,
6727			     range_var, fld_decl, NULL_TREE),
6728		     build_int_cst (integer_type_node, index),
6729		     NULL_TREE, NULL_TREE);
6730  gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6731}
6732
6733/* Return a tree representation of a pointer to a structure with grid and
6734   work-group size information.  Statements filling that information will be
6735   inserted before GSI, TGT_STMT is the target statement which has the
6736   necessary information in it.  */
6737
6738static tree
6739grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6740				       gomp_target *tgt_stmt)
6741{
6742  grid_create_kernel_launch_attr_types ();
6743  tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6744				"__kernel_launch_attrs");
6745
6746  unsigned max_dim = 0;
6747  for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6748       clause;
6749       clause = OMP_CLAUSE_CHAIN (clause))
6750    {
6751      if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6752	continue;
6753
6754      unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6755      max_dim = MAX (dim, max_dim);
6756
6757      grid_insert_store_range_dim (gsi, lattrs,
6758				   grid_attr_trees->kernel_lattrs_grid_decl,
6759				   dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6760      grid_insert_store_range_dim (gsi, lattrs,
6761				   grid_attr_trees->kernel_lattrs_group_decl,
6762				   dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6763    }
6764
6765  tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6766			grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6767  gcc_checking_assert (max_dim <= 2);
6768  tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6769  gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6770		     GSI_SAME_STMT);
6771  TREE_ADDRESSABLE (lattrs) = 1;
6772  return build_fold_addr_expr (lattrs);
6773}
6774
6775/* Build target argument identifier from the DEVICE identifier, value
6776   identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
6777
6778static tree
6779get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6780{
6781  tree t = build_int_cst (integer_type_node, device);
6782  if (subseqent_param)
6783    t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6784		     build_int_cst (integer_type_node,
6785				    GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6786  t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6787		   build_int_cst (integer_type_node, id));
6788  return t;
6789}
6790
6791/* Like above but return it in type that can be directly stored as an element
6792   of the argument array.  */
6793
6794static tree
6795get_target_argument_identifier (int device, bool subseqent_param, int id)
6796{
6797  tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6798  return fold_convert (ptr_type_node, t);
6799}
6800
6801/* Return a target argument consisting of DEVICE identifier, value identifier
6802   ID, and the actual VALUE.  */
6803
6804static tree
6805get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6806			   tree value)
6807{
6808  tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6809			fold_convert (integer_type_node, value),
6810			build_int_cst (unsigned_type_node,
6811				       GOMP_TARGET_ARG_VALUE_SHIFT));
6812  t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6813		   get_target_argument_identifier_1 (device, false, id));
6814  t = fold_convert (ptr_type_node, t);
6815  return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6816}
6817
6818/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6819   push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6820   otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6821   arguments.  */
6822
6823static void
6824push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6825					 int id, tree value, vec <tree> *args)
6826{
6827  if (tree_fits_shwi_p (value)
6828      && tree_to_shwi (value) > -(1 << 15)
6829      && tree_to_shwi (value) < (1 << 15))
6830    args->quick_push (get_target_argument_value (gsi, device, id, value));
6831  else
6832    {
6833      args->quick_push (get_target_argument_identifier (device, true, id));
6834      value = fold_convert (ptr_type_node, value);
6835      value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6836					GSI_SAME_STMT);
6837      args->quick_push (value);
6838    }
6839}
6840
6841/* Create an array of arguments that is then passed to GOMP_target.  */
6842
6843static tree
6844get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6845{
6846  auto_vec <tree, 6> args;
6847  tree clauses = gimple_omp_target_clauses (tgt_stmt);
6848  tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6849  if (c)
6850    t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6851  else
6852    t = integer_minus_one_node;
6853  push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6854					   GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6855
6856  c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6857  if (c)
6858    t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6859  else
6860    t = integer_minus_one_node;
6861  push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6862					   GOMP_TARGET_ARG_THREAD_LIMIT, t,
6863					   &args);
6864
6865  /* Add HSA-specific grid sizes, if available.  */
6866  if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6867		       OMP_CLAUSE__GRIDDIM_))
6868    {
6869      int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
6870      t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
6871      args.quick_push (t);
6872      args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
6873    }
6874
6875  /* Produce more, perhaps device specific, arguments here.  */
6876
6877  tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
6878							  args.length () + 1),
6879				  ".omp_target_args");
6880  for (unsigned i = 0; i < args.length (); i++)
6881    {
6882      tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6883			 build_int_cst (integer_type_node, i),
6884			 NULL_TREE, NULL_TREE);
6885      gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
6886			 GSI_SAME_STMT);
6887    }
6888  tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6889		     build_int_cst (integer_type_node, args.length ()),
6890		     NULL_TREE, NULL_TREE);
6891  gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
6892		     GSI_SAME_STMT);
6893  TREE_ADDRESSABLE (argarray) = 1;
6894  return build_fold_addr_expr (argarray);
6895}
6896
6897/* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
6898
6899static void
6900expand_omp_target (struct omp_region *region)
6901{
6902  basic_block entry_bb, exit_bb, new_bb;
6903  struct function *child_cfun;
6904  tree child_fn, block, t;
6905  gimple_stmt_iterator gsi;
6906  gomp_target *entry_stmt;
6907  gimple *stmt;
6908  edge e;
6909  bool offloaded, data_region;
6910
6911  entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
6912  new_bb = region->entry;
6913
6914  offloaded = is_gimple_omp_offloaded (entry_stmt);
6915  switch (gimple_omp_target_kind (entry_stmt))
6916    {
6917    case GF_OMP_TARGET_KIND_REGION:
6918    case GF_OMP_TARGET_KIND_UPDATE:
6919    case GF_OMP_TARGET_KIND_ENTER_DATA:
6920    case GF_OMP_TARGET_KIND_EXIT_DATA:
6921    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
6922    case GF_OMP_TARGET_KIND_OACC_KERNELS:
6923    case GF_OMP_TARGET_KIND_OACC_UPDATE:
6924    case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
6925    case GF_OMP_TARGET_KIND_OACC_DECLARE:
6926      data_region = false;
6927      break;
6928    case GF_OMP_TARGET_KIND_DATA:
6929    case GF_OMP_TARGET_KIND_OACC_DATA:
6930    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
6931      data_region = true;
6932      break;
6933    default:
6934      gcc_unreachable ();
6935    }
6936
6937  child_fn = NULL_TREE;
6938  child_cfun = NULL;
6939  if (offloaded)
6940    {
6941      child_fn = gimple_omp_target_child_fn (entry_stmt);
6942      child_cfun = DECL_STRUCT_FUNCTION (child_fn);
6943    }
6944
6945  /* Supported by expand_omp_taskreg, but not here.  */
6946  if (child_cfun != NULL)
6947    gcc_checking_assert (!child_cfun->cfg);
6948  gcc_checking_assert (!gimple_in_ssa_p (cfun));
6949
6950  entry_bb = region->entry;
6951  exit_bb = region->exit;
6952
6953  if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
6954    {
6955      mark_loops_in_oacc_kernels_region (region->entry, region->exit);
6956
6957      /* Further down, both OpenACC kernels and OpenACC parallel constructs
6958	 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
6959	 two, there is an "oacc kernels" attribute set for OpenACC kernels.  */
6960      DECL_ATTRIBUTES (child_fn)
6961	= tree_cons (get_identifier ("oacc kernels"),
6962		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
6963    }
6964
6965  if (offloaded)
6966    {
6967      unsigned srcidx, dstidx, num;
6968
6969      /* If the offloading region needs data sent from the parent
6970	 function, then the very first statement (except possible
6971	 tree profile counter updates) of the offloading body
6972	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
6973	 &.OMP_DATA_O is passed as an argument to the child function,
6974	 we need to replace it with the argument as seen by the child
6975	 function.
6976
6977	 In most cases, this will end up being the identity assignment
6978	 .OMP_DATA_I = .OMP_DATA_I.  However, if the offloading body had
6979	 a function call that has been inlined, the original PARM_DECL
6980	 .OMP_DATA_I may have been converted into a different local
6981	 variable.  In which case, we need to keep the assignment.  */
6982      tree data_arg = gimple_omp_target_data_arg (entry_stmt);
6983      if (data_arg)
6984	{
6985	  basic_block entry_succ_bb = single_succ (entry_bb);
6986	  gimple_stmt_iterator gsi;
6987	  tree arg;
6988	  gimple *tgtcopy_stmt = NULL;
6989	  tree sender = TREE_VEC_ELT (data_arg, 0);
6990
6991	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
6992	    {
6993	      gcc_assert (!gsi_end_p (gsi));
6994	      stmt = gsi_stmt (gsi);
6995	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
6996		continue;
6997
6998	      if (gimple_num_ops (stmt) == 2)
6999		{
7000		  tree arg = gimple_assign_rhs1 (stmt);
7001
7002		  /* We're ignoring the subcode because we're
7003		     effectively doing a STRIP_NOPS.  */
7004
7005		  if (TREE_CODE (arg) == ADDR_EXPR
7006		      && TREE_OPERAND (arg, 0) == sender)
7007		    {
7008		      tgtcopy_stmt = stmt;
7009		      break;
7010		    }
7011		}
7012	    }
7013
7014	  gcc_assert (tgtcopy_stmt != NULL);
7015	  arg = DECL_ARGUMENTS (child_fn);
7016
7017	  gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7018	  gsi_remove (&gsi, true);
7019	}
7020
7021      /* Declare local variables needed in CHILD_CFUN.  */
7022      block = DECL_INITIAL (child_fn);
7023      BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7024      /* The gimplifier could record temporaries in the offloading block
7025	 rather than in containing function's local_decls chain,
7026	 which would mean cgraph missed finalizing them.  Do it now.  */
7027      for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7028	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7029	  varpool_node::finalize_decl (t);
7030      DECL_SAVED_TREE (child_fn) = NULL;
7031      /* We'll create a CFG for child_fn, so no gimple body is needed.  */
7032      gimple_set_body (child_fn, NULL);
7033      TREE_USED (block) = 1;
7034
7035      /* Reset DECL_CONTEXT on function arguments.  */
7036      for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7037	DECL_CONTEXT (t) = child_fn;
7038
7039      /* Split ENTRY_BB at GIMPLE_*,
7040	 so that it can be moved to the child function.  */
7041      gsi = gsi_last_nondebug_bb (entry_bb);
7042      stmt = gsi_stmt (gsi);
7043      gcc_assert (stmt
7044		  && gimple_code (stmt) == gimple_code (entry_stmt));
7045      e = split_block (entry_bb, stmt);
7046      gsi_remove (&gsi, true);
7047      entry_bb = e->dest;
7048      single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7049
7050      /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
7051      if (exit_bb)
7052	{
7053	  gsi = gsi_last_nondebug_bb (exit_bb);
7054	  gcc_assert (!gsi_end_p (gsi)
7055		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7056	  stmt = gimple_build_return (NULL);
7057	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7058	  gsi_remove (&gsi, true);
7059	}
7060
7061      /* Move the offloading region into CHILD_CFUN.  */
7062
7063      block = gimple_block (entry_stmt);
7064
7065      new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7066      if (exit_bb)
7067	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7068      /* When the OMP expansion process cannot guarantee an up-to-date
7069	 loop tree arrange for the child function to fixup loops.  */
7070      if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7071	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7072
7073      /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
7074      num = vec_safe_length (child_cfun->local_decls);
7075      for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7076	{
7077	  t = (*child_cfun->local_decls)[srcidx];
7078	  if (DECL_CONTEXT (t) == cfun->decl)
7079	    continue;
7080	  if (srcidx != dstidx)
7081	    (*child_cfun->local_decls)[dstidx] = t;
7082	  dstidx++;
7083	}
7084      if (dstidx != num)
7085	vec_safe_truncate (child_cfun->local_decls, dstidx);
7086
7087      /* Inform the callgraph about the new function.  */
7088      child_cfun->curr_properties = cfun->curr_properties;
7089      child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7090      child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7091      cgraph_node *node = cgraph_node::get_create (child_fn);
7092      node->parallelized_function = 1;
7093      cgraph_node::add_new_function (child_fn, true);
7094
7095      /* Add the new function to the offload table.  */
7096      if (ENABLE_OFFLOADING)
7097	{
7098	  if (in_lto_p)
7099	    DECL_PRESERVE_P (child_fn) = 1;
7100	  vec_safe_push (offload_funcs, child_fn);
7101	}
7102
7103      bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7104		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7105
7106      /* Fix the callgraph edges for child_cfun.  Those for cfun will be
7107	 fixed in a following pass.  */
7108      push_cfun (child_cfun);
7109      if (need_asm)
7110	assign_assembler_name_if_needed (child_fn);
7111      cgraph_edge::rebuild_edges ();
7112
7113      /* Some EH regions might become dead, see PR34608.  If
7114	 pass_cleanup_cfg isn't the first pass to happen with the
7115	 new child, these dead EH edges might cause problems.
7116	 Clean them up now.  */
7117      if (flag_exceptions)
7118	{
7119	  basic_block bb;
7120	  bool changed = false;
7121
7122	  FOR_EACH_BB_FN (bb, cfun)
7123	    changed |= gimple_purge_dead_eh_edges (bb);
7124	  if (changed)
7125	    cleanup_tree_cfg ();
7126	}
7127      if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7128	verify_loop_structure ();
7129      pop_cfun ();
7130
7131      if (dump_file && !gimple_in_ssa_p (cfun))
7132	{
7133	  omp_any_child_fn_dumped = true;
7134	  dump_function_header (dump_file, child_fn, dump_flags);
7135	  dump_function_to_file (child_fn, dump_file, dump_flags);
7136	}
7137
7138      adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
7139    }
7140
7141  /* Emit a library call to launch the offloading region, or do data
7142     transfers.  */
7143  tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7144  enum built_in_function start_ix;
7145  location_t clause_loc;
7146  unsigned int flags_i = 0;
7147
7148  switch (gimple_omp_target_kind (entry_stmt))
7149    {
7150    case GF_OMP_TARGET_KIND_REGION:
7151      start_ix = BUILT_IN_GOMP_TARGET;
7152      break;
7153    case GF_OMP_TARGET_KIND_DATA:
7154      start_ix = BUILT_IN_GOMP_TARGET_DATA;
7155      break;
7156    case GF_OMP_TARGET_KIND_UPDATE:
7157      start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7158      break;
7159    case GF_OMP_TARGET_KIND_ENTER_DATA:
7160      start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7161      break;
7162    case GF_OMP_TARGET_KIND_EXIT_DATA:
7163      start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7164      flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7165      break;
7166    case GF_OMP_TARGET_KIND_OACC_KERNELS:
7167    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7168      start_ix = BUILT_IN_GOACC_PARALLEL;
7169      break;
7170    case GF_OMP_TARGET_KIND_OACC_DATA:
7171    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7172      start_ix = BUILT_IN_GOACC_DATA_START;
7173      break;
7174    case GF_OMP_TARGET_KIND_OACC_UPDATE:
7175      start_ix = BUILT_IN_GOACC_UPDATE;
7176      break;
7177    case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7178      start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7179      break;
7180    case GF_OMP_TARGET_KIND_OACC_DECLARE:
7181      start_ix = BUILT_IN_GOACC_DECLARE;
7182      break;
7183    default:
7184      gcc_unreachable ();
7185    }
7186
7187  clauses = gimple_omp_target_clauses (entry_stmt);
7188
7189  /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7190     library choose) and there is no conditional.  */
7191  cond = NULL_TREE;
7192  device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7193
7194  c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7195  if (c)
7196    cond = OMP_CLAUSE_IF_EXPR (c);
7197
7198  c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7199  if (c)
7200    {
7201      /* Even if we pass it to all library function calls, it is currently only
7202	 defined/used for the OpenMP target ones.  */
7203      gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7204			   || start_ix == BUILT_IN_GOMP_TARGET_DATA
7205			   || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7206			   || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7207
7208      device = OMP_CLAUSE_DEVICE_ID (c);
7209      clause_loc = OMP_CLAUSE_LOCATION (c);
7210    }
7211  else
7212    clause_loc = gimple_location (entry_stmt);
7213
7214  c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7215  if (c)
7216    flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7217
7218  /* Ensure 'device' is of the correct type.  */
7219  device = fold_convert_loc (clause_loc, integer_type_node, device);
7220
7221  /* If we found the clause 'if (cond)', build
7222     (cond ? device : GOMP_DEVICE_HOST_FALLBACK).  */
7223  if (cond)
7224    {
7225      cond = gimple_boolify (cond);
7226
7227      basic_block cond_bb, then_bb, else_bb;
7228      edge e;
7229      tree tmp_var;
7230
7231      tmp_var = create_tmp_var (TREE_TYPE (device));
7232      if (offloaded)
7233	e = split_block_after_labels (new_bb);
7234      else
7235	{
7236	  gsi = gsi_last_nondebug_bb (new_bb);
7237	  gsi_prev (&gsi);
7238	  e = split_block (new_bb, gsi_stmt (gsi));
7239	}
7240      cond_bb = e->src;
7241      new_bb = e->dest;
7242      remove_edge (e);
7243
7244      then_bb = create_empty_bb (cond_bb);
7245      else_bb = create_empty_bb (then_bb);
7246      set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7247      set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7248
7249      stmt = gimple_build_cond_empty (cond);
7250      gsi = gsi_last_bb (cond_bb);
7251      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7252
7253      gsi = gsi_start_bb (then_bb);
7254      stmt = gimple_build_assign (tmp_var, device);
7255      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7256
7257      gsi = gsi_start_bb (else_bb);
7258      stmt = gimple_build_assign (tmp_var,
7259				  build_int_cst (integer_type_node,
7260						 GOMP_DEVICE_HOST_FALLBACK));
7261      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7262
7263      make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7264      make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7265      add_bb_to_loop (then_bb, cond_bb->loop_father);
7266      add_bb_to_loop (else_bb, cond_bb->loop_father);
7267      make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7268      make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7269
7270      device = tmp_var;
7271      gsi = gsi_last_nondebug_bb (new_bb);
7272    }
7273  else
7274    {
7275      gsi = gsi_last_nondebug_bb (new_bb);
7276      device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7277					 true, GSI_SAME_STMT);
7278    }
7279
7280  t = gimple_omp_target_data_arg (entry_stmt);
7281  if (t == NULL)
7282    {
7283      t1 = size_zero_node;
7284      t2 = build_zero_cst (ptr_type_node);
7285      t3 = t2;
7286      t4 = t2;
7287    }
7288  else
7289    {
7290      t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7291      t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7292      t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7293      t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7294      t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7295    }
7296
7297  gimple *g;
7298  bool tagging = false;
7299  /* The maximum number used by any start_ix, without varargs.  */
7300  auto_vec<tree, 11> args;
7301  args.quick_push (device);
7302  if (offloaded)
7303    args.quick_push (build_fold_addr_expr (child_fn));
7304  args.quick_push (t1);
7305  args.quick_push (t2);
7306  args.quick_push (t3);
7307  args.quick_push (t4);
7308  switch (start_ix)
7309    {
7310    case BUILT_IN_GOACC_DATA_START:
7311    case BUILT_IN_GOACC_DECLARE:
7312    case BUILT_IN_GOMP_TARGET_DATA:
7313      break;
7314    case BUILT_IN_GOMP_TARGET:
7315    case BUILT_IN_GOMP_TARGET_UPDATE:
7316    case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7317      args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7318      c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7319      if (c)
7320	depend = OMP_CLAUSE_DECL (c);
7321      else
7322	depend = build_int_cst (ptr_type_node, 0);
7323      args.quick_push (depend);
7324      if (start_ix == BUILT_IN_GOMP_TARGET)
7325	args.quick_push (get_target_arguments (&gsi, entry_stmt));
7326      break;
7327    case BUILT_IN_GOACC_PARALLEL:
7328      oacc_set_fn_attrib (child_fn, clauses, &args);
7329      tagging = true;
7330      /* FALLTHRU */
7331    case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7332    case BUILT_IN_GOACC_UPDATE:
7333      {
7334	tree t_async = NULL_TREE;
7335
7336	/* If present, use the value specified by the respective
7337	   clause, making sure that is of the correct type.  */
7338	c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7339	if (c)
7340	  t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7341				      integer_type_node,
7342				      OMP_CLAUSE_ASYNC_EXPR (c));
7343	else if (!tagging)
7344	  /* Default values for t_async.  */
7345	  t_async = fold_convert_loc (gimple_location (entry_stmt),
7346				      integer_type_node,
7347				      build_int_cst (integer_type_node,
7348						     GOMP_ASYNC_SYNC));
7349	if (tagging && t_async)
7350	  {
7351	    unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7352
7353	    if (TREE_CODE (t_async) == INTEGER_CST)
7354	      {
7355		/* See if we can pack the async arg in to the tag's
7356		   operand.  */
7357		i_async = TREE_INT_CST_LOW (t_async);
7358		if (i_async < GOMP_LAUNCH_OP_MAX)
7359		  t_async = NULL_TREE;
7360		else
7361		  i_async = GOMP_LAUNCH_OP_MAX;
7362	      }
7363	    args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7364					      i_async));
7365	  }
7366	if (t_async)
7367	  args.safe_push (t_async);
7368
7369	/* Save the argument index, and ... */
7370	unsigned t_wait_idx = args.length ();
7371	unsigned num_waits = 0;
7372	c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7373	if (!tagging || c)
7374	  /* ... push a placeholder.  */
7375	  args.safe_push (integer_zero_node);
7376
7377	for (; c; c = OMP_CLAUSE_CHAIN (c))
7378	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7379	    {
7380	      args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7381						integer_type_node,
7382						OMP_CLAUSE_WAIT_EXPR (c)));
7383	      num_waits++;
7384	    }
7385
7386	if (!tagging || num_waits)
7387	  {
7388	    tree len;
7389
7390	    /* Now that we know the number, update the placeholder.  */
7391	    if (tagging)
7392	      len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7393	    else
7394	      len = build_int_cst (integer_type_node, num_waits);
7395	    len = fold_convert_loc (gimple_location (entry_stmt),
7396				    unsigned_type_node, len);
7397	    args[t_wait_idx] = len;
7398	  }
7399      }
7400      break;
7401    default:
7402      gcc_unreachable ();
7403    }
7404  if (tagging)
7405    /*  Push terminal marker - zero.  */
7406    args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7407
7408  g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7409  gimple_set_location (g, gimple_location (entry_stmt));
7410  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7411  if (!offloaded)
7412    {
7413      g = gsi_stmt (gsi);
7414      gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7415      gsi_remove (&gsi, true);
7416    }
7417  if (data_region && region->exit)
7418    {
7419      gsi = gsi_last_nondebug_bb (region->exit);
7420      g = gsi_stmt (gsi);
7421      gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7422      gsi_remove (&gsi, true);
7423    }
7424}
7425
7426/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7427   iteration variable derived from the thread number.  INTRA_GROUP means this
7428   is an expansion of a loop iterating over work-items within a separate
7429   iteration over groups.  */
7430
7431static void
7432grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7433{
7434  gimple_stmt_iterator gsi;
7435  gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7436  gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7437		       == GF_OMP_FOR_KIND_GRID_LOOP);
7438  size_t collapse = gimple_omp_for_collapse (for_stmt);
7439  struct omp_for_data_loop *loops
7440    = XALLOCAVEC (struct omp_for_data_loop,
7441		  gimple_omp_for_collapse (for_stmt));
7442  struct omp_for_data fd;
7443
7444  remove_edge (BRANCH_EDGE (kfor->entry));
7445  basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7446
7447  gcc_assert (kfor->cont);
7448  omp_extract_for_data (for_stmt, &fd, loops);
7449
7450  gsi = gsi_start_bb (body_bb);
7451
7452  for (size_t dim = 0; dim < collapse; dim++)
7453    {
7454      tree type, itype;
7455      itype = type = TREE_TYPE (fd.loops[dim].v);
7456      if (POINTER_TYPE_P (type))
7457	itype = signed_type_for (type);
7458
7459      tree n1 = fd.loops[dim].n1;
7460      tree step = fd.loops[dim].step;
7461      n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7462				     true, NULL_TREE, true, GSI_SAME_STMT);
7463      step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7464				       true, NULL_TREE, true, GSI_SAME_STMT);
7465      tree threadid;
7466      if (gimple_omp_for_grid_group_iter (for_stmt))
7467	{
7468	  gcc_checking_assert (!intra_group);
7469	  threadid = build_call_expr (builtin_decl_explicit
7470				      (BUILT_IN_HSA_WORKGROUPID), 1,
7471				      build_int_cstu (unsigned_type_node, dim));
7472	}
7473      else if (intra_group)
7474	threadid = build_call_expr (builtin_decl_explicit
7475				    (BUILT_IN_HSA_WORKITEMID), 1,
7476				    build_int_cstu (unsigned_type_node, dim));
7477      else
7478	threadid = build_call_expr (builtin_decl_explicit
7479				    (BUILT_IN_HSA_WORKITEMABSID), 1,
7480				    build_int_cstu (unsigned_type_node, dim));
7481      threadid = fold_convert (itype, threadid);
7482      threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7483					   true, GSI_SAME_STMT);
7484
7485      tree startvar = fd.loops[dim].v;
7486      tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7487      if (POINTER_TYPE_P (type))
7488	t = fold_build_pointer_plus (n1, t);
7489      else
7490	t = fold_build2 (PLUS_EXPR, type, t, n1);
7491      t = fold_convert (type, t);
7492      t = force_gimple_operand_gsi (&gsi, t,
7493				    DECL_P (startvar)
7494				    && TREE_ADDRESSABLE (startvar),
7495				    NULL_TREE, true, GSI_SAME_STMT);
7496      gassign *assign_stmt = gimple_build_assign (startvar, t);
7497      gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7498    }
7499  /* Remove the omp for statement.  */
7500  gsi = gsi_last_nondebug_bb (kfor->entry);
7501  gsi_remove (&gsi, true);
7502
7503  /* Remove the GIMPLE_OMP_CONTINUE statement.  */
7504  gsi = gsi_last_nondebug_bb (kfor->cont);
7505  gcc_assert (!gsi_end_p (gsi)
7506	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7507  gsi_remove (&gsi, true);
7508
7509  /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary.  */
7510  gsi = gsi_last_nondebug_bb (kfor->exit);
7511  gcc_assert (!gsi_end_p (gsi)
7512	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7513  if (intra_group)
7514    gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7515  gsi_remove (&gsi, true);
7516
7517  /* Fixup the much simpler CFG.  */
7518  remove_edge (find_edge (kfor->cont, body_bb));
7519
7520  if (kfor->cont != body_bb)
7521    set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7522  set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7523}
7524
7525/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7526   argument_decls.  */
7527
7528struct grid_arg_decl_map
7529{
7530  tree old_arg;
7531  tree new_arg;
7532};
7533
7534/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7535   pertaining to kernel function.  */
7536
7537static tree
7538grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7539{
7540  struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7541  struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7542  tree t = *tp;
7543
7544  if (t == adm->old_arg)
7545    *tp = adm->new_arg;
7546  *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7547  return NULL_TREE;
7548}
7549
7550/* If TARGET region contains a kernel body for loop, remove its region from the
7551   TARGET and expand it in HSA gridified kernel fashion.  */
7552
7553static void
7554grid_expand_target_grid_body (struct omp_region *target)
7555{
7556  if (!hsa_gen_requested_p ())
7557    return;
7558
7559  gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7560  struct omp_region **pp;
7561
7562  for (pp = &target->inner; *pp; pp = &(*pp)->next)
7563    if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7564      break;
7565
7566  struct omp_region *gpukernel = *pp;
7567
7568  tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7569  if (!gpukernel)
7570    {
7571      /* HSA cannot handle OACC stuff.  */
7572      if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7573	return;
7574      gcc_checking_assert (orig_child_fndecl);
7575      gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7576				    OMP_CLAUSE__GRIDDIM_));
7577      cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7578
7579      hsa_register_kernel (n);
7580      return;
7581    }
7582
7583  gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7584			       OMP_CLAUSE__GRIDDIM_));
7585  tree inside_block
7586    = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7587  *pp = gpukernel->next;
7588  for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7589    if ((*pp)->type == GIMPLE_OMP_FOR)
7590      break;
7591
7592  struct omp_region *kfor = *pp;
7593  gcc_assert (kfor);
7594  gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7595  gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7596  *pp = kfor->next;
7597  if (kfor->inner)
7598    {
7599      if (gimple_omp_for_grid_group_iter (for_stmt))
7600	{
7601	  struct omp_region **next_pp;
7602	  for (pp = &kfor->inner; *pp; pp = next_pp)
7603	    {
7604	      next_pp = &(*pp)->next;
7605	      if ((*pp)->type != GIMPLE_OMP_FOR)
7606		continue;
7607	      gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7608	      gcc_assert (gimple_omp_for_kind (inner)
7609			  == GF_OMP_FOR_KIND_GRID_LOOP);
7610	      grid_expand_omp_for_loop (*pp, true);
7611	      *pp = (*pp)->next;
7612	      next_pp = pp;
7613	    }
7614	}
7615      expand_omp (kfor->inner);
7616    }
7617  if (gpukernel->inner)
7618    expand_omp (gpukernel->inner);
7619
7620  tree kern_fndecl = copy_node (orig_child_fndecl);
7621  DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7622  SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7623  tree tgtblock = gimple_block (tgt_stmt);
7624  tree fniniblock = make_node (BLOCK);
7625  BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7626  BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7627  BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7628  BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7629  DECL_INITIAL (kern_fndecl) = fniniblock;
7630  push_struct_function (kern_fndecl);
7631  cfun->function_end_locus = gimple_location (tgt_stmt);
7632  init_tree_ssa (cfun);
7633  pop_cfun ();
7634
7635  tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7636  gcc_assert (!DECL_CHAIN (old_parm_decl));
7637  tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7638  DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7639  DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7640  gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7641  DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7642  DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7643  struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7644  kern_cfun->curr_properties = cfun->curr_properties;
7645
7646  grid_expand_omp_for_loop (kfor, false);
7647
7648  /* Remove the omp for statement.  */
7649  gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
7650  gsi_remove (&gsi, true);
7651  /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7652     return.  */
7653  gsi = gsi_last_nondebug_bb (gpukernel->exit);
7654  gcc_assert (!gsi_end_p (gsi)
7655	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7656  gimple *ret_stmt = gimple_build_return (NULL);
7657  gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7658  gsi_remove (&gsi, true);
7659
7660  /* Statements in the first BB in the target construct have been produced by
7661     target lowering and must be copied inside the GPUKERNEL, with the two
7662     exceptions of the first OMP statement and the OMP_DATA assignment
7663     statement.  */
7664  gsi = gsi_start_bb (single_succ (gpukernel->entry));
7665  tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7666  tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7667  for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7668       !gsi_end_p (tsi); gsi_next (&tsi))
7669    {
7670      gimple *stmt = gsi_stmt (tsi);
7671      if (is_gimple_omp (stmt))
7672	break;
7673      if (sender
7674	  && is_gimple_assign (stmt)
7675	  && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7676	  && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7677	continue;
7678      gimple *copy = gimple_copy (stmt);
7679      gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7680      gimple_set_block (copy, fniniblock);
7681    }
7682
7683  move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7684			  gpukernel->exit, inside_block);
7685
7686  cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7687  kcn->mark_force_output ();
7688  cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7689
7690  hsa_register_kernel (kcn, orig_child);
7691
7692  cgraph_node::add_new_function (kern_fndecl, true);
7693  push_cfun (kern_cfun);
7694  cgraph_edge::rebuild_edges ();
7695
7696  /* Re-map any mention of the PARM_DECL of the original function to the
7697     PARM_DECL of the new one.
7698
7699     TODO: It would be great if lowering produced references into the GPU
7700     kernel decl straight away and we did not have to do this.  */
7701  struct grid_arg_decl_map adm;
7702  adm.old_arg = old_parm_decl;
7703  adm.new_arg = new_parm_decl;
7704  basic_block bb;
7705  FOR_EACH_BB_FN (bb, kern_cfun)
7706    {
7707      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7708	{
7709	  gimple *stmt = gsi_stmt (gsi);
7710	  struct walk_stmt_info wi;
7711	  memset (&wi, 0, sizeof (wi));
7712	  wi.info = &adm;
7713	  walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7714	}
7715    }
7716  pop_cfun ();
7717
7718  return;
7719}
7720
7721/* Expand the parallel region tree rooted at REGION.  Expansion
7722   proceeds in depth-first order.  Innermost regions are expanded
7723   first.  This way, parallel regions that require a new function to
7724   be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7725   internal dependencies in their body.  */
7726
7727static void
7728expand_omp (struct omp_region *region)
7729{
7730  omp_any_child_fn_dumped = false;
7731  while (region)
7732    {
7733      location_t saved_location;
7734      gimple *inner_stmt = NULL;
7735
7736      /* First, determine whether this is a combined parallel+workshare
7737	 region.  */
7738      if (region->type == GIMPLE_OMP_PARALLEL)
7739	determine_parallel_type (region);
7740      else if (region->type == GIMPLE_OMP_TARGET)
7741	grid_expand_target_grid_body (region);
7742
7743      if (region->type == GIMPLE_OMP_FOR
7744	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
7745	inner_stmt = last_stmt (region->inner->entry);
7746
7747      if (region->inner)
7748	expand_omp (region->inner);
7749
7750      saved_location = input_location;
7751      if (gimple_has_location (last_stmt (region->entry)))
7752	input_location = gimple_location (last_stmt (region->entry));
7753
7754      switch (region->type)
7755	{
7756	case GIMPLE_OMP_PARALLEL:
7757	case GIMPLE_OMP_TASK:
7758	  expand_omp_taskreg (region);
7759	  break;
7760
7761	case GIMPLE_OMP_FOR:
7762	  expand_omp_for (region, inner_stmt);
7763	  break;
7764
7765	case GIMPLE_OMP_SECTIONS:
7766	  expand_omp_sections (region);
7767	  break;
7768
7769	case GIMPLE_OMP_SECTION:
7770	  /* Individual omp sections are handled together with their
7771	     parent GIMPLE_OMP_SECTIONS region.  */
7772	  break;
7773
7774	case GIMPLE_OMP_SINGLE:
7775	  expand_omp_single (region);
7776	  break;
7777
7778	case GIMPLE_OMP_ORDERED:
7779	  {
7780	    gomp_ordered *ord_stmt
7781	      = as_a <gomp_ordered *> (last_stmt (region->entry));
7782	    if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7783				 OMP_CLAUSE_DEPEND))
7784	      {
7785		/* We'll expand these when expanding corresponding
7786		   worksharing region with ordered(n) clause.  */
7787		gcc_assert (region->outer
7788			    && region->outer->type == GIMPLE_OMP_FOR);
7789		region->ord_stmt = ord_stmt;
7790		break;
7791	      }
7792	  }
7793	  /* FALLTHRU */
7794	case GIMPLE_OMP_MASTER:
7795	case GIMPLE_OMP_TASKGROUP:
7796	case GIMPLE_OMP_CRITICAL:
7797	case GIMPLE_OMP_TEAMS:
7798	  expand_omp_synch (region);
7799	  break;
7800
7801	case GIMPLE_OMP_ATOMIC_LOAD:
7802	  expand_omp_atomic (region);
7803	  break;
7804
7805	case GIMPLE_OMP_TARGET:
7806	  expand_omp_target (region);
7807	  break;
7808
7809	default:
7810	  gcc_unreachable ();
7811	}
7812
7813      input_location = saved_location;
7814      region = region->next;
7815    }
7816  if (omp_any_child_fn_dumped)
7817    {
7818      if (dump_file)
7819	dump_function_header (dump_file, current_function_decl, dump_flags);
7820      omp_any_child_fn_dumped = false;
7821    }
7822}
7823
7824/* Helper for build_omp_regions.  Scan the dominator tree starting at
7825   block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
7826   true, the function ends once a single tree is built (otherwise, whole
7827   forest of OMP constructs may be built).  */
7828
7829static void
7830build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7831		     bool single_tree)
7832{
7833  gimple_stmt_iterator gsi;
7834  gimple *stmt;
7835  basic_block son;
7836
7837  gsi = gsi_last_nondebug_bb (bb);
7838  if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7839    {
7840      struct omp_region *region;
7841      enum gimple_code code;
7842
7843      stmt = gsi_stmt (gsi);
7844      code = gimple_code (stmt);
7845      if (code == GIMPLE_OMP_RETURN)
7846	{
7847	  /* STMT is the return point out of region PARENT.  Mark it
7848	     as the exit point and make PARENT the immediately
7849	     enclosing region.  */
7850	  gcc_assert (parent);
7851	  region = parent;
7852	  region->exit = bb;
7853	  parent = parent->outer;
7854	}
7855      else if (code == GIMPLE_OMP_ATOMIC_STORE)
7856	{
7857	  /* GIMPLE_OMP_ATOMIC_STORE is analogous to
7858	     GIMPLE_OMP_RETURN, but matches with
7859	     GIMPLE_OMP_ATOMIC_LOAD.  */
7860	  gcc_assert (parent);
7861	  gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7862	  region = parent;
7863	  region->exit = bb;
7864	  parent = parent->outer;
7865	}
7866      else if (code == GIMPLE_OMP_CONTINUE)
7867	{
7868	  gcc_assert (parent);
7869	  parent->cont = bb;
7870	}
7871      else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
7872	{
7873	  /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7874	     GIMPLE_OMP_SECTIONS, and we do nothing for it.  */
7875	}
7876      else
7877	{
7878	  region = new_omp_region (bb, code, parent);
7879	  /* Otherwise...  */
7880	  if (code == GIMPLE_OMP_TARGET)
7881	    {
7882	      switch (gimple_omp_target_kind (stmt))
7883		{
7884		case GF_OMP_TARGET_KIND_REGION:
7885		case GF_OMP_TARGET_KIND_DATA:
7886		case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7887		case GF_OMP_TARGET_KIND_OACC_KERNELS:
7888		case GF_OMP_TARGET_KIND_OACC_DATA:
7889		case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7890		  break;
7891		case GF_OMP_TARGET_KIND_UPDATE:
7892		case GF_OMP_TARGET_KIND_ENTER_DATA:
7893		case GF_OMP_TARGET_KIND_EXIT_DATA:
7894		case GF_OMP_TARGET_KIND_OACC_UPDATE:
7895		case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7896		case GF_OMP_TARGET_KIND_OACC_DECLARE:
7897		  /* ..., other than for those stand-alone directives...  */
7898		  region = NULL;
7899		  break;
7900		default:
7901		  gcc_unreachable ();
7902		}
7903	    }
7904	  else if (code == GIMPLE_OMP_ORDERED
7905		   && omp_find_clause (gimple_omp_ordered_clauses
7906					 (as_a <gomp_ordered *> (stmt)),
7907				       OMP_CLAUSE_DEPEND))
7908	    /* #pragma omp ordered depend is also just a stand-alone
7909	       directive.  */
7910	    region = NULL;
7911	  /* ..., this directive becomes the parent for a new region.  */
7912	  if (region)
7913	    parent = region;
7914	}
7915    }
7916
7917  if (single_tree && !parent)
7918    return;
7919
7920  for (son = first_dom_son (CDI_DOMINATORS, bb);
7921       son;
7922       son = next_dom_son (CDI_DOMINATORS, son))
7923    build_omp_regions_1 (son, parent, single_tree);
7924}
7925
7926/* Builds the tree of OMP regions rooted at ROOT, storing it to
7927   root_omp_region.  */
7928
7929static void
7930build_omp_regions_root (basic_block root)
7931{
7932  gcc_assert (root_omp_region == NULL);
7933  build_omp_regions_1 (root, NULL, true);
7934  gcc_assert (root_omp_region != NULL);
7935}
7936
7937/* Expands omp construct (and its subconstructs) starting in HEAD.  */
7938
7939void
7940omp_expand_local (basic_block head)
7941{
7942  build_omp_regions_root (head);
7943  if (dump_file && (dump_flags & TDF_DETAILS))
7944    {
7945      fprintf (dump_file, "\nOMP region tree\n\n");
7946      dump_omp_region (dump_file, root_omp_region, 0);
7947      fprintf (dump_file, "\n");
7948    }
7949
7950  remove_exit_barriers (root_omp_region);
7951  expand_omp (root_omp_region);
7952
7953  omp_free_regions ();
7954}
7955
7956/* Scan the CFG and build a tree of OMP regions.  Return the root of
7957   the OMP region tree.  */
7958
7959static void
7960build_omp_regions (void)
7961{
7962  gcc_assert (root_omp_region == NULL);
7963  calculate_dominance_info (CDI_DOMINATORS);
7964  build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
7965}
7966
7967/* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
7968
7969static unsigned int
7970execute_expand_omp (void)
7971{
7972  build_omp_regions ();
7973
7974  if (!root_omp_region)
7975    return 0;
7976
7977  if (dump_file)
7978    {
7979      fprintf (dump_file, "\nOMP region tree\n\n");
7980      dump_omp_region (dump_file, root_omp_region, 0);
7981      fprintf (dump_file, "\n");
7982    }
7983
7984  remove_exit_barriers (root_omp_region);
7985
7986  expand_omp (root_omp_region);
7987
7988  if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7989    verify_loop_structure ();
7990  cleanup_tree_cfg ();
7991
7992  omp_free_regions ();
7993
7994  return 0;
7995}
7996
7997/* OMP expansion -- the default pass, run before creation of SSA form.  */
7998
7999namespace {
8000
8001const pass_data pass_data_expand_omp =
8002{
8003  GIMPLE_PASS, /* type */
8004  "ompexp", /* name */
8005  OPTGROUP_OMP, /* optinfo_flags */
8006  TV_NONE, /* tv_id */
8007  PROP_gimple_any, /* properties_required */
8008  PROP_gimple_eomp, /* properties_provided */
8009  0, /* properties_destroyed */
8010  0, /* todo_flags_start */
8011  0, /* todo_flags_finish */
8012};
8013
8014class pass_expand_omp : public gimple_opt_pass
8015{
8016public:
8017  pass_expand_omp (gcc::context *ctxt)
8018    : gimple_opt_pass (pass_data_expand_omp, ctxt)
8019  {}
8020
8021  /* opt_pass methods: */
8022  virtual unsigned int execute (function *)
8023    {
8024      bool gate = ((flag_openacc != 0 || flag_openmp != 0
8025		    || flag_openmp_simd != 0)
8026		   && !seen_error ());
8027
8028      /* This pass always runs, to provide PROP_gimple_eomp.
8029	 But often, there is nothing to do.  */
8030      if (!gate)
8031	return 0;
8032
8033      return execute_expand_omp ();
8034    }
8035
8036}; // class pass_expand_omp
8037
8038} // anon namespace
8039
8040gimple_opt_pass *
8041make_pass_expand_omp (gcc::context *ctxt)
8042{
8043  return new pass_expand_omp (ctxt);
8044}
8045
8046namespace {
8047
8048const pass_data pass_data_expand_omp_ssa =
8049{
8050  GIMPLE_PASS, /* type */
8051  "ompexpssa", /* name */
8052  OPTGROUP_OMP, /* optinfo_flags */
8053  TV_NONE, /* tv_id */
8054  PROP_cfg | PROP_ssa, /* properties_required */
8055  PROP_gimple_eomp, /* properties_provided */
8056  0, /* properties_destroyed */
8057  0, /* todo_flags_start */
8058  TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8059};
8060
8061class pass_expand_omp_ssa : public gimple_opt_pass
8062{
8063public:
8064  pass_expand_omp_ssa (gcc::context *ctxt)
8065    : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8066  {}
8067
8068  /* opt_pass methods: */
8069  virtual bool gate (function *fun)
8070    {
8071      return !(fun->curr_properties & PROP_gimple_eomp);
8072    }
8073  virtual unsigned int execute (function *) { return execute_expand_omp (); }
8074  opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8075
8076}; // class pass_expand_omp_ssa
8077
8078} // anon namespace
8079
8080gimple_opt_pass *
8081make_pass_expand_omp_ssa (gcc::context *ctxt)
8082{
8083  return new pass_expand_omp_ssa (ctxt);
8084}
8085
8086/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8087   GIMPLE_* codes.  */
8088
8089bool
8090omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8091		       int *region_idx)
8092{
8093  gimple *last = last_stmt (bb);
8094  enum gimple_code code = gimple_code (last);
8095  struct omp_region *cur_region = *region;
8096  bool fallthru = false;
8097
8098  switch (code)
8099    {
8100    case GIMPLE_OMP_PARALLEL:
8101    case GIMPLE_OMP_TASK:
8102    case GIMPLE_OMP_FOR:
8103    case GIMPLE_OMP_SINGLE:
8104    case GIMPLE_OMP_TEAMS:
8105    case GIMPLE_OMP_MASTER:
8106    case GIMPLE_OMP_TASKGROUP:
8107    case GIMPLE_OMP_CRITICAL:
8108    case GIMPLE_OMP_SECTION:
8109    case GIMPLE_OMP_GRID_BODY:
8110      cur_region = new_omp_region (bb, code, cur_region);
8111      fallthru = true;
8112      break;
8113
8114    case GIMPLE_OMP_ORDERED:
8115      cur_region = new_omp_region (bb, code, cur_region);
8116      fallthru = true;
8117      if (omp_find_clause (gimple_omp_ordered_clauses
8118			     (as_a <gomp_ordered *> (last)),
8119			   OMP_CLAUSE_DEPEND))
8120	cur_region = cur_region->outer;
8121      break;
8122
8123    case GIMPLE_OMP_TARGET:
8124      cur_region = new_omp_region (bb, code, cur_region);
8125      fallthru = true;
8126      switch (gimple_omp_target_kind (last))
8127	{
8128	case GF_OMP_TARGET_KIND_REGION:
8129	case GF_OMP_TARGET_KIND_DATA:
8130	case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8131	case GF_OMP_TARGET_KIND_OACC_KERNELS:
8132	case GF_OMP_TARGET_KIND_OACC_DATA:
8133	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8134	  break;
8135	case GF_OMP_TARGET_KIND_UPDATE:
8136	case GF_OMP_TARGET_KIND_ENTER_DATA:
8137	case GF_OMP_TARGET_KIND_EXIT_DATA:
8138	case GF_OMP_TARGET_KIND_OACC_UPDATE:
8139	case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8140	case GF_OMP_TARGET_KIND_OACC_DECLARE:
8141	  cur_region = cur_region->outer;
8142	  break;
8143	default:
8144	  gcc_unreachable ();
8145	}
8146      break;
8147
8148    case GIMPLE_OMP_SECTIONS:
8149      cur_region = new_omp_region (bb, code, cur_region);
8150      fallthru = true;
8151      break;
8152
8153    case GIMPLE_OMP_SECTIONS_SWITCH:
8154      fallthru = false;
8155      break;
8156
8157    case GIMPLE_OMP_ATOMIC_LOAD:
8158    case GIMPLE_OMP_ATOMIC_STORE:
8159       fallthru = true;
8160       break;
8161
8162    case GIMPLE_OMP_RETURN:
8163      /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8164	 somewhere other than the next block.  This will be
8165	 created later.  */
8166      cur_region->exit = bb;
8167      if (cur_region->type == GIMPLE_OMP_TASK)
8168	/* Add an edge corresponding to not scheduling the task
8169	   immediately.  */
8170	make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8171      fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8172      cur_region = cur_region->outer;
8173      break;
8174
8175    case GIMPLE_OMP_CONTINUE:
8176      cur_region->cont = bb;
8177      switch (cur_region->type)
8178	{
8179	case GIMPLE_OMP_FOR:
8180	  /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8181	     succs edges as abnormal to prevent splitting
8182	     them.  */
8183	  single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8184	  /* Make the loopback edge.  */
8185	  make_edge (bb, single_succ (cur_region->entry),
8186		     EDGE_ABNORMAL);
8187
8188	  /* Create an edge from GIMPLE_OMP_FOR to exit, which
8189	     corresponds to the case that the body of the loop
8190	     is not executed at all.  */
8191	  make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8192	  make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8193	  fallthru = false;
8194	  break;
8195
8196	case GIMPLE_OMP_SECTIONS:
8197	  /* Wire up the edges into and out of the nested sections.  */
8198	  {
8199	    basic_block switch_bb = single_succ (cur_region->entry);
8200
8201	    struct omp_region *i;
8202	    for (i = cur_region->inner; i ; i = i->next)
8203	      {
8204		gcc_assert (i->type == GIMPLE_OMP_SECTION);
8205		make_edge (switch_bb, i->entry, 0);
8206		make_edge (i->exit, bb, EDGE_FALLTHRU);
8207	      }
8208
8209	    /* Make the loopback edge to the block with
8210	       GIMPLE_OMP_SECTIONS_SWITCH.  */
8211	    make_edge (bb, switch_bb, 0);
8212
8213	    /* Make the edge from the switch to exit.  */
8214	    make_edge (switch_bb, bb->next_bb, 0);
8215	    fallthru = false;
8216	  }
8217	  break;
8218
8219	case GIMPLE_OMP_TASK:
8220	  fallthru = true;
8221	  break;
8222
8223	default:
8224	  gcc_unreachable ();
8225	}
8226      break;
8227
8228    default:
8229      gcc_unreachable ();
8230    }
8231
8232  if (*region != cur_region)
8233    {
8234      *region = cur_region;
8235      if (cur_region)
8236	*region_idx = cur_region->entry->index;
8237      else
8238	*region_idx = 0;
8239    }
8240
8241  return fallthru;
8242}
8243
8244#include "gt-omp-expand.h"
8245