omp-expand.c revision 1.2
1/* Expansion pass for OMP directives.  Outlines regions of certain OMP
2   directives to separate functions, converts others into explicit calls to the
3   runtime library (libgomp) and so forth
4
5Copyright (C) 2005-2017 Free Software Foundation, Inc.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
55#include "symbol-summary.h"
56#include "cilk.h"
57#include "gomp-constants.h"
58#include "gimple-pretty-print.h"
59#include "hsa-common.h"
60#include "debug.h"
61
62
63/* OMP region information.  Every parallel and workshare
64   directive is enclosed between two markers, the OMP_* directive
65   and a corresponding GIMPLE_OMP_RETURN statement.  */
66
67struct omp_region
68{
69  /* The enclosing region.  */
70  struct omp_region *outer;
71
72  /* First child region.  */
73  struct omp_region *inner;
74
75  /* Next peer region.  */
76  struct omp_region *next;
77
78  /* Block containing the omp directive as its last stmt.  */
79  basic_block entry;
80
81  /* Block containing the GIMPLE_OMP_RETURN as its last stmt.  */
82  basic_block exit;
83
84  /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt.  */
85  basic_block cont;
86
87  /* If this is a combined parallel+workshare region, this is a list
88     of additional arguments needed by the combined parallel+workshare
89     library call.  */
90  vec<tree, va_gc> *ws_args;
91
92  /* The code for the omp directive of this region.  */
93  enum gimple_code type;
94
95  /* Schedule kind, only used for GIMPLE_OMP_FOR type regions.  */
96  enum omp_clause_schedule_kind sched_kind;
97
98  /* Schedule modifiers.  */
99  unsigned char sched_modifiers;
100
101  /* True if this is a combined parallel+workshare region.  */
102  bool is_combined_parallel;
103
104  /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105     a depend clause.  */
106  gomp_ordered *ord_stmt;
107};
108
109static struct omp_region *root_omp_region;
110static bool omp_any_child_fn_dumped;
111
112static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
113				     bool = false);
114static gphi *find_phi_with_arg_on_edge (tree, edge);
115static void expand_omp (struct omp_region *region);
116
117/* Return true if REGION is a combined parallel+workshare region.  */
118
119static inline bool
120is_combined_parallel (struct omp_region *region)
121{
122  return region->is_combined_parallel;
123}
124
125/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
126   is the immediate dominator of PAR_ENTRY_BB, return true if there
127   are no data dependencies that would prevent expanding the parallel
128   directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129
130   When expanding a combined parallel+workshare region, the call to
131   the child function may need additional arguments in the case of
132   GIMPLE_OMP_FOR regions.  In some cases, these arguments are
133   computed out of variables passed in from the parent to the child
134   via 'struct .omp_data_s'.  For instance:
135
136	#pragma omp parallel for schedule (guided, i * 4)
137	for (j ...)
138
139   Is lowered into:
140
141	# BLOCK 2 (PAR_ENTRY_BB)
142	.omp_data_o.i = i;
143	#pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144
145	# BLOCK 3 (WS_ENTRY_BB)
146	.omp_data_i = &.omp_data_o;
147	D.1667 = .omp_data_i->i;
148	D.1598 = D.1667 * 4;
149	#pragma omp for schedule (guided, D.1598)
150
151   When we outline the parallel region, the call to the child function
152   'bar.omp_fn.0' will need the value D.1598 in its argument list, but
153   that value is computed *after* the call site.  So, in principle we
154   cannot do the transformation.
155
156   To see whether the code in WS_ENTRY_BB blocks the combined
157   parallel+workshare call, we collect all the variables used in the
158   GIMPLE_OMP_FOR header check whether they appear on the LHS of any
159   statement in WS_ENTRY_BB.  If so, then we cannot emit the combined
160   call.
161
162   FIXME.  If we had the SSA form built at this point, we could merely
163   hoist the code in block 3 into block 2 and be done with it.  But at
164   this point we don't have dataflow information and though we could
165   hack something up here, it is really not worth the aggravation.  */
166
167static bool
168workshare_safe_to_combine_p (basic_block ws_entry_bb)
169{
170  struct omp_for_data fd;
171  gimple *ws_stmt = last_stmt (ws_entry_bb);
172
173  if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
174    return true;
175
176  gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
177
178  omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
179
180  if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
181    return false;
182  if (fd.iter_type != long_integer_type_node)
183    return false;
184
185  /* FIXME.  We give up too easily here.  If any of these arguments
186     are not constants, they will likely involve variables that have
187     been mapped into fields of .omp_data_s for sharing with the child
188     function.  With appropriate data flow, it would be possible to
189     see through this.  */
190  if (!is_gimple_min_invariant (fd.loop.n1)
191      || !is_gimple_min_invariant (fd.loop.n2)
192      || !is_gimple_min_invariant (fd.loop.step)
193      || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
194    return false;
195
196  return true;
197}
198
199/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
200   presence (SIMD_SCHEDULE).  */
201
202static tree
203omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
204{
205  if (!simd_schedule)
206    return chunk_size;
207
208  int vf = omp_max_vf ();
209  if (vf == 1)
210    return chunk_size;
211
212  tree type = TREE_TYPE (chunk_size);
213  chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
214			    build_int_cst (type, vf - 1));
215  return fold_build2 (BIT_AND_EXPR, type, chunk_size,
216		      build_int_cst (type, -vf));
217}
218
219/* Collect additional arguments needed to emit a combined
220   parallel+workshare call.  WS_STMT is the workshare directive being
221   expanded.  */
222
223static vec<tree, va_gc> *
224get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
225{
226  tree t;
227  location_t loc = gimple_location (ws_stmt);
228  vec<tree, va_gc> *ws_args;
229
230  if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
231    {
232      struct omp_for_data fd;
233      tree n1, n2;
234
235      omp_extract_for_data (for_stmt, &fd, NULL);
236      n1 = fd.loop.n1;
237      n2 = fd.loop.n2;
238
239      if (gimple_omp_for_combined_into_p (for_stmt))
240	{
241	  tree innerc
242	    = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
243			       OMP_CLAUSE__LOOPTEMP_);
244	  gcc_assert (innerc);
245	  n1 = OMP_CLAUSE_DECL (innerc);
246	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
247				    OMP_CLAUSE__LOOPTEMP_);
248	  gcc_assert (innerc);
249	  n2 = OMP_CLAUSE_DECL (innerc);
250	}
251
252      vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
253
254      t = fold_convert_loc (loc, long_integer_type_node, n1);
255      ws_args->quick_push (t);
256
257      t = fold_convert_loc (loc, long_integer_type_node, n2);
258      ws_args->quick_push (t);
259
260      t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
261      ws_args->quick_push (t);
262
263      if (fd.chunk_size)
264	{
265	  t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
266	  t = omp_adjust_chunk_size (t, fd.simd_schedule);
267	  ws_args->quick_push (t);
268	}
269
270      return ws_args;
271    }
272  else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
273    {
274      /* Number of sections is equal to the number of edges from the
275	 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
276	 the exit of the sections region.  */
277      basic_block bb = single_succ (gimple_bb (ws_stmt));
278      t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
279      vec_alloc (ws_args, 1);
280      ws_args->quick_push (t);
281      return ws_args;
282    }
283
284  gcc_unreachable ();
285}
286
287/* Discover whether REGION is a combined parallel+workshare region.  */
288
289static void
290determine_parallel_type (struct omp_region *region)
291{
292  basic_block par_entry_bb, par_exit_bb;
293  basic_block ws_entry_bb, ws_exit_bb;
294
295  if (region == NULL || region->inner == NULL
296      || region->exit == NULL || region->inner->exit == NULL
297      || region->inner->cont == NULL)
298    return;
299
300  /* We only support parallel+for and parallel+sections.  */
301  if (region->type != GIMPLE_OMP_PARALLEL
302      || (region->inner->type != GIMPLE_OMP_FOR
303	  && region->inner->type != GIMPLE_OMP_SECTIONS))
304    return;
305
306  /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
307     WS_EXIT_BB -> PAR_EXIT_BB.  */
308  par_entry_bb = region->entry;
309  par_exit_bb = region->exit;
310  ws_entry_bb = region->inner->entry;
311  ws_exit_bb = region->inner->exit;
312
313  if (single_succ (par_entry_bb) == ws_entry_bb
314      && single_succ (ws_exit_bb) == par_exit_bb
315      && workshare_safe_to_combine_p (ws_entry_bb)
316      && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
317	  || (last_and_only_stmt (ws_entry_bb)
318	      && last_and_only_stmt (par_exit_bb))))
319    {
320      gimple *par_stmt = last_stmt (par_entry_bb);
321      gimple *ws_stmt = last_stmt (ws_entry_bb);
322
323      if (region->inner->type == GIMPLE_OMP_FOR)
324	{
325	  /* If this is a combined parallel loop, we need to determine
326	     whether or not to use the combined library calls.  There
327	     are two cases where we do not apply the transformation:
328	     static loops and any kind of ordered loop.  In the first
329	     case, we already open code the loop so there is no need
330	     to do anything else.  In the latter case, the combined
331	     parallel loop call would still need extra synchronization
332	     to implement ordered semantics, so there would not be any
333	     gain in using the combined call.  */
334	  tree clauses = gimple_omp_for_clauses (ws_stmt);
335	  tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
336	  if (c == NULL
337	      || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
338		  == OMP_CLAUSE_SCHEDULE_STATIC)
339	      || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
340	    {
341	      region->is_combined_parallel = false;
342	      region->inner->is_combined_parallel = false;
343	      return;
344	    }
345	}
346
347      region->is_combined_parallel = true;
348      region->inner->is_combined_parallel = true;
349      region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
350    }
351}
352
353/* Debugging dumps for parallel regions.  */
354void dump_omp_region (FILE *, struct omp_region *, int);
355void debug_omp_region (struct omp_region *);
356void debug_all_omp_regions (void);
357
358/* Dump the parallel region tree rooted at REGION.  */
359
360void
361dump_omp_region (FILE *file, struct omp_region *region, int indent)
362{
363  fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
364	   gimple_code_name[region->type]);
365
366  if (region->inner)
367    dump_omp_region (file, region->inner, indent + 4);
368
369  if (region->cont)
370    {
371      fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
372	       region->cont->index);
373    }
374
375  if (region->exit)
376    fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
377	     region->exit->index);
378  else
379    fprintf (file, "%*s[no exit marker]\n", indent, "");
380
381  if (region->next)
382    dump_omp_region (file, region->next, indent);
383}
384
385DEBUG_FUNCTION void
386debug_omp_region (struct omp_region *region)
387{
388  dump_omp_region (stderr, region, 0);
389}
390
391DEBUG_FUNCTION void
392debug_all_omp_regions (void)
393{
394  dump_omp_region (stderr, root_omp_region, 0);
395}
396
397/* Create a new parallel region starting at STMT inside region PARENT.  */
398
399static struct omp_region *
400new_omp_region (basic_block bb, enum gimple_code type,
401		struct omp_region *parent)
402{
403  struct omp_region *region = XCNEW (struct omp_region);
404
405  region->outer = parent;
406  region->entry = bb;
407  region->type = type;
408
409  if (parent)
410    {
411      /* This is a nested region.  Add it to the list of inner
412	 regions in PARENT.  */
413      region->next = parent->inner;
414      parent->inner = region;
415    }
416  else
417    {
418      /* This is a toplevel region.  Add it to the list of toplevel
419	 regions in ROOT_OMP_REGION.  */
420      region->next = root_omp_region;
421      root_omp_region = region;
422    }
423
424  return region;
425}
426
427/* Release the memory associated with the region tree rooted at REGION.  */
428
429static void
430free_omp_region_1 (struct omp_region *region)
431{
432  struct omp_region *i, *n;
433
434  for (i = region->inner; i ; i = n)
435    {
436      n = i->next;
437      free_omp_region_1 (i);
438    }
439
440  free (region);
441}
442
443/* Release the memory for the entire omp region tree.  */
444
445void
446omp_free_regions (void)
447{
448  struct omp_region *r, *n;
449  for (r = root_omp_region; r ; r = n)
450    {
451      n = r->next;
452      free_omp_region_1 (r);
453    }
454  root_omp_region = NULL;
455}
456
457/* A convenience function to build an empty GIMPLE_COND with just the
458   condition.  */
459
460static gcond *
461gimple_build_cond_empty (tree cond)
462{
463  enum tree_code pred_code;
464  tree lhs, rhs;
465
466  gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
467  return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
468}
469
470/* Return true if a parallel REGION is within a declare target function or
471   within a target region and is not a part of a gridified target.  */
472
473static bool
474parallel_needs_hsa_kernel_p (struct omp_region *region)
475{
476  bool indirect = false;
477  for (region = region->outer; region; region = region->outer)
478    {
479      if (region->type == GIMPLE_OMP_PARALLEL)
480	indirect = true;
481      else if (region->type == GIMPLE_OMP_TARGET)
482	{
483	  gomp_target *tgt_stmt
484	    = as_a <gomp_target *> (last_stmt (region->entry));
485
486	  if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
487			       OMP_CLAUSE__GRIDDIM_))
488	    return indirect;
489	  else
490	    return true;
491	}
492    }
493
494  if (lookup_attribute ("omp declare target",
495			DECL_ATTRIBUTES (current_function_decl)))
496    return true;
497
498  return false;
499}
500
501/* Build the function calls to GOMP_parallel_start etc to actually
502   generate the parallel operation.  REGION is the parallel region
503   being expanded.  BB is the block where to insert the code.  WS_ARGS
504   will be set if this is a call to a combined parallel+workshare
505   construct, it contains the list of additional arguments needed by
506   the workshare construct.  */
507
508static void
509expand_parallel_call (struct omp_region *region, basic_block bb,
510		      gomp_parallel *entry_stmt,
511		      vec<tree, va_gc> *ws_args)
512{
513  tree t, t1, t2, val, cond, c, clauses, flags;
514  gimple_stmt_iterator gsi;
515  gimple *stmt;
516  enum built_in_function start_ix;
517  int start_ix2;
518  location_t clause_loc;
519  vec<tree, va_gc> *args;
520
521  clauses = gimple_omp_parallel_clauses (entry_stmt);
522
523  /* Determine what flavor of GOMP_parallel we will be
524     emitting.  */
525  start_ix = BUILT_IN_GOMP_PARALLEL;
526  if (is_combined_parallel (region))
527    {
528      switch (region->inner->type)
529	{
530	case GIMPLE_OMP_FOR:
531	  gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
532	  switch (region->inner->sched_kind)
533	    {
534	    case OMP_CLAUSE_SCHEDULE_RUNTIME:
535	      start_ix2 = 3;
536	      break;
537	    case OMP_CLAUSE_SCHEDULE_DYNAMIC:
538	    case OMP_CLAUSE_SCHEDULE_GUIDED:
539	      if (region->inner->sched_modifiers
540		  & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
541		{
542		  start_ix2 = 3 + region->inner->sched_kind;
543		  break;
544		}
545	      /* FALLTHRU */
546	    default:
547	      start_ix2 = region->inner->sched_kind;
548	      break;
549	    }
550	  start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
551	  start_ix = (enum built_in_function) start_ix2;
552	  break;
553	case GIMPLE_OMP_SECTIONS:
554	  start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
555	  break;
556	default:
557	  gcc_unreachable ();
558	}
559    }
560
561  /* By default, the value of NUM_THREADS is zero (selected at run time)
562     and there is no conditional.  */
563  cond = NULL_TREE;
564  val = build_int_cst (unsigned_type_node, 0);
565  flags = build_int_cst (unsigned_type_node, 0);
566
567  c = omp_find_clause (clauses, OMP_CLAUSE_IF);
568  if (c)
569    cond = OMP_CLAUSE_IF_EXPR (c);
570
571  c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
572  if (c)
573    {
574      val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
575      clause_loc = OMP_CLAUSE_LOCATION (c);
576    }
577  else
578    clause_loc = gimple_location (entry_stmt);
579
580  c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
581  if (c)
582    flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
583
584  /* Ensure 'val' is of the correct type.  */
585  val = fold_convert_loc (clause_loc, unsigned_type_node, val);
586
587  /* If we found the clause 'if (cond)', build either
588     (cond != 0) or (cond ? val : 1u).  */
589  if (cond)
590    {
591      cond = gimple_boolify (cond);
592
593      if (integer_zerop (val))
594	val = fold_build2_loc (clause_loc,
595			   EQ_EXPR, unsigned_type_node, cond,
596			   build_int_cst (TREE_TYPE (cond), 0));
597      else
598	{
599	  basic_block cond_bb, then_bb, else_bb;
600	  edge e, e_then, e_else;
601	  tree tmp_then, tmp_else, tmp_join, tmp_var;
602
603	  tmp_var = create_tmp_var (TREE_TYPE (val));
604	  if (gimple_in_ssa_p (cfun))
605	    {
606	      tmp_then = make_ssa_name (tmp_var);
607	      tmp_else = make_ssa_name (tmp_var);
608	      tmp_join = make_ssa_name (tmp_var);
609	    }
610	  else
611	    {
612	      tmp_then = tmp_var;
613	      tmp_else = tmp_var;
614	      tmp_join = tmp_var;
615	    }
616
617	  e = split_block_after_labels (bb);
618	  cond_bb = e->src;
619	  bb = e->dest;
620	  remove_edge (e);
621
622	  then_bb = create_empty_bb (cond_bb);
623	  else_bb = create_empty_bb (then_bb);
624	  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
625	  set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
626
627	  stmt = gimple_build_cond_empty (cond);
628	  gsi = gsi_start_bb (cond_bb);
629	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
630
631	  gsi = gsi_start_bb (then_bb);
632	  expand_omp_build_assign (&gsi, tmp_then, val, true);
633
634	  gsi = gsi_start_bb (else_bb);
635	  expand_omp_build_assign (&gsi, tmp_else,
636				   build_int_cst (unsigned_type_node, 1),
637				   true);
638
639	  make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
640	  make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
641	  add_bb_to_loop (then_bb, cond_bb->loop_father);
642	  add_bb_to_loop (else_bb, cond_bb->loop_father);
643	  e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
644	  e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
645
646	  if (gimple_in_ssa_p (cfun))
647	    {
648	      gphi *phi = create_phi_node (tmp_join, bb);
649	      add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
650	      add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
651	    }
652
653	  val = tmp_join;
654	}
655
656      gsi = gsi_start_bb (bb);
657      val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
658				      false, GSI_CONTINUE_LINKING);
659    }
660
661  gsi = gsi_last_bb (bb);
662  t = gimple_omp_parallel_data_arg (entry_stmt);
663  if (t == NULL)
664    t1 = null_pointer_node;
665  else
666    t1 = build_fold_addr_expr (t);
667  tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
668  t2 = build_fold_addr_expr (child_fndecl);
669
670  vec_alloc (args, 4 + vec_safe_length (ws_args));
671  args->quick_push (t2);
672  args->quick_push (t1);
673  args->quick_push (val);
674  if (ws_args)
675    args->splice (*ws_args);
676  args->quick_push (flags);
677
678  t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
679			       builtin_decl_explicit (start_ix), args);
680
681  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
682			    false, GSI_CONTINUE_LINKING);
683
684  if (hsa_gen_requested_p ()
685      && parallel_needs_hsa_kernel_p (region))
686    {
687      cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
688      hsa_register_kernel (child_cnode);
689    }
690}
691
692/* Insert a function call whose name is FUNC_NAME with the information from
693   ENTRY_STMT into the basic_block BB.  */
694
695static void
696expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
697		      vec <tree, va_gc> *ws_args)
698{
699  tree t, t1, t2;
700  gimple_stmt_iterator gsi;
701  vec <tree, va_gc> *args;
702
703  gcc_assert (vec_safe_length (ws_args) == 2);
704  tree func_name = (*ws_args)[0];
705  tree grain = (*ws_args)[1];
706
707  tree clauses = gimple_omp_parallel_clauses (entry_stmt);
708  tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
709  gcc_assert (count != NULL_TREE);
710  count = OMP_CLAUSE_OPERAND (count, 0);
711
712  gsi = gsi_last_bb (bb);
713  t = gimple_omp_parallel_data_arg (entry_stmt);
714  if (t == NULL)
715    t1 = null_pointer_node;
716  else
717    t1 = build_fold_addr_expr (t);
718  t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
719
720  vec_alloc (args, 4);
721  args->quick_push (t2);
722  args->quick_push (t1);
723  args->quick_push (count);
724  args->quick_push (grain);
725  t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
726
727  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
728			    GSI_CONTINUE_LINKING);
729}
730
731/* Build the function call to GOMP_task to actually
732   generate the task operation.  BB is the block where to insert the code.  */
733
734static void
735expand_task_call (struct omp_region *region, basic_block bb,
736		  gomp_task *entry_stmt)
737{
738  tree t1, t2, t3;
739  gimple_stmt_iterator gsi;
740  location_t loc = gimple_location (entry_stmt);
741
742  tree clauses = gimple_omp_task_clauses (entry_stmt);
743
744  tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
745  tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
746  tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
747  tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
748  tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
749  tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
750
751  unsigned int iflags
752    = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
753      | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
754      | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
755
756  bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
757  tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
758  tree num_tasks = NULL_TREE;
759  bool ull = false;
760  if (taskloop_p)
761    {
762      gimple *g = last_stmt (region->outer->entry);
763      gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
764		  && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
765      struct omp_for_data fd;
766      omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
767      startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
768      endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
769				OMP_CLAUSE__LOOPTEMP_);
770      startvar = OMP_CLAUSE_DECL (startvar);
771      endvar = OMP_CLAUSE_DECL (endvar);
772      step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
773      if (fd.loop.cond_code == LT_EXPR)
774	iflags |= GOMP_TASK_FLAG_UP;
775      tree tclauses = gimple_omp_for_clauses (g);
776      num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
777      if (num_tasks)
778	num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
779      else
780	{
781	  num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
782	  if (num_tasks)
783	    {
784	      iflags |= GOMP_TASK_FLAG_GRAINSIZE;
785	      num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
786	    }
787	  else
788	    num_tasks = integer_zero_node;
789	}
790      num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
791      if (ifc == NULL_TREE)
792	iflags |= GOMP_TASK_FLAG_IF;
793      if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
794	iflags |= GOMP_TASK_FLAG_NOGROUP;
795      ull = fd.iter_type == long_long_unsigned_type_node;
796    }
797  else if (priority)
798    iflags |= GOMP_TASK_FLAG_PRIORITY;
799
800  tree flags = build_int_cst (unsigned_type_node, iflags);
801
802  tree cond = boolean_true_node;
803  if (ifc)
804    {
805      if (taskloop_p)
806	{
807	  tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
808	  t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
809			       build_int_cst (unsigned_type_node,
810					      GOMP_TASK_FLAG_IF),
811			       build_int_cst (unsigned_type_node, 0));
812	  flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
813				   flags, t);
814	}
815      else
816	cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
817    }
818
819  if (finalc)
820    {
821      tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
822      t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
823			   build_int_cst (unsigned_type_node,
824					  GOMP_TASK_FLAG_FINAL),
825			   build_int_cst (unsigned_type_node, 0));
826      flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
827    }
828  if (depend)
829    depend = OMP_CLAUSE_DECL (depend);
830  else
831    depend = build_int_cst (ptr_type_node, 0);
832  if (priority)
833    priority = fold_convert (integer_type_node,
834			     OMP_CLAUSE_PRIORITY_EXPR (priority));
835  else
836    priority = integer_zero_node;
837
838  gsi = gsi_last_bb (bb);
839  tree t = gimple_omp_task_data_arg (entry_stmt);
840  if (t == NULL)
841    t2 = null_pointer_node;
842  else
843    t2 = build_fold_addr_expr_loc (loc, t);
844  t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
845  t = gimple_omp_task_copy_fn (entry_stmt);
846  if (t == NULL)
847    t3 = null_pointer_node;
848  else
849    t3 = build_fold_addr_expr_loc (loc, t);
850
851  if (taskloop_p)
852    t = build_call_expr (ull
853			 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
854			 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
855			 11, t1, t2, t3,
856			 gimple_omp_task_arg_size (entry_stmt),
857			 gimple_omp_task_arg_align (entry_stmt), flags,
858			 num_tasks, priority, startvar, endvar, step);
859  else
860    t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
861			 9, t1, t2, t3,
862			 gimple_omp_task_arg_size (entry_stmt),
863			 gimple_omp_task_arg_align (entry_stmt), cond, flags,
864			 depend, priority);
865
866  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
867			    false, GSI_CONTINUE_LINKING);
868}
869
870/* Chain all the DECLs in LIST by their TREE_CHAIN fields.  */
871
872static tree
873vec2chain (vec<tree, va_gc> *v)
874{
875  tree chain = NULL_TREE, t;
876  unsigned ix;
877
878  FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
879    {
880      DECL_CHAIN (t) = chain;
881      chain = t;
882    }
883
884  return chain;
885}
886
887/* Remove barriers in REGION->EXIT's block.  Note that this is only
888   valid for GIMPLE_OMP_PARALLEL regions.  Since the end of a parallel region
889   is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
890   left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
891   removed.  */
892
893static void
894remove_exit_barrier (struct omp_region *region)
895{
896  gimple_stmt_iterator gsi;
897  basic_block exit_bb;
898  edge_iterator ei;
899  edge e;
900  gimple *stmt;
901  int any_addressable_vars = -1;
902
903  exit_bb = region->exit;
904
905  /* If the parallel region doesn't return, we don't have REGION->EXIT
906     block at all.  */
907  if (! exit_bb)
908    return;
909
910  /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN.  The
911     workshare's GIMPLE_OMP_RETURN will be in a preceding block.  The kinds of
912     statements that can appear in between are extremely limited -- no
913     memory operations at all.  Here, we allow nothing at all, so the
914     only thing we allow to precede this GIMPLE_OMP_RETURN is a label.  */
915  gsi = gsi_last_bb (exit_bb);
916  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
917  gsi_prev (&gsi);
918  if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
919    return;
920
921  FOR_EACH_EDGE (e, ei, exit_bb->preds)
922    {
923      gsi = gsi_last_bb (e->src);
924      if (gsi_end_p (gsi))
925	continue;
926      stmt = gsi_stmt (gsi);
927      if (gimple_code (stmt) == GIMPLE_OMP_RETURN
928	  && !gimple_omp_return_nowait_p (stmt))
929	{
930	  /* OpenMP 3.0 tasks unfortunately prevent this optimization
931	     in many cases.  If there could be tasks queued, the barrier
932	     might be needed to let the tasks run before some local
933	     variable of the parallel that the task uses as shared
934	     runs out of scope.  The task can be spawned either
935	     from within current function (this would be easy to check)
936	     or from some function it calls and gets passed an address
937	     of such a variable.  */
938	  if (any_addressable_vars < 0)
939	    {
940	      gomp_parallel *parallel_stmt
941		= as_a <gomp_parallel *> (last_stmt (region->entry));
942	      tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
943	      tree local_decls, block, decl;
944	      unsigned ix;
945
946	      any_addressable_vars = 0;
947	      FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
948		if (TREE_ADDRESSABLE (decl))
949		  {
950		    any_addressable_vars = 1;
951		    break;
952		  }
953	      for (block = gimple_block (stmt);
954		   !any_addressable_vars
955		   && block
956		   && TREE_CODE (block) == BLOCK;
957		   block = BLOCK_SUPERCONTEXT (block))
958		{
959		  for (local_decls = BLOCK_VARS (block);
960		       local_decls;
961		       local_decls = DECL_CHAIN (local_decls))
962		    if (TREE_ADDRESSABLE (local_decls))
963		      {
964			any_addressable_vars = 1;
965			break;
966		      }
967		  if (block == gimple_block (parallel_stmt))
968		    break;
969		}
970	    }
971	  if (!any_addressable_vars)
972	    gimple_omp_return_set_nowait (stmt);
973	}
974    }
975}
976
977static void
978remove_exit_barriers (struct omp_region *region)
979{
980  if (region->type == GIMPLE_OMP_PARALLEL)
981    remove_exit_barrier (region);
982
983  if (region->inner)
984    {
985      region = region->inner;
986      remove_exit_barriers (region);
987      while (region->next)
988	{
989	  region = region->next;
990	  remove_exit_barriers (region);
991	}
992    }
993}
994
995/* Optimize omp_get_thread_num () and omp_get_num_threads ()
996   calls.  These can't be declared as const functions, but
997   within one parallel body they are constant, so they can be
998   transformed there into __builtin_omp_get_{thread_num,num_threads} ()
999   which are declared const.  Similarly for task body, except
1000   that in untied task omp_get_thread_num () can change at any task
1001   scheduling point.  */
1002
1003static void
1004optimize_omp_library_calls (gimple *entry_stmt)
1005{
1006  basic_block bb;
1007  gimple_stmt_iterator gsi;
1008  tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1009  tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1010  tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1011  tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1012  bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1013		      && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1014					  OMP_CLAUSE_UNTIED) != NULL);
1015
1016  FOR_EACH_BB_FN (bb, cfun)
1017    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1018      {
1019	gimple *call = gsi_stmt (gsi);
1020	tree decl;
1021
1022	if (is_gimple_call (call)
1023	    && (decl = gimple_call_fndecl (call))
1024	    && DECL_EXTERNAL (decl)
1025	    && TREE_PUBLIC (decl)
1026	    && DECL_INITIAL (decl) == NULL)
1027	  {
1028	    tree built_in;
1029
1030	    if (DECL_NAME (decl) == thr_num_id)
1031	      {
1032		/* In #pragma omp task untied omp_get_thread_num () can change
1033		   during the execution of the task region.  */
1034		if (untied_task)
1035		  continue;
1036		built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1037	      }
1038	    else if (DECL_NAME (decl) == num_thr_id)
1039	      built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1040	    else
1041	      continue;
1042
1043	    if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1044		|| gimple_call_num_args (call) != 0)
1045	      continue;
1046
1047	    if (flag_exceptions && !TREE_NOTHROW (decl))
1048	      continue;
1049
1050	    if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1051		|| !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1052					TREE_TYPE (TREE_TYPE (built_in))))
1053	      continue;
1054
1055	    gimple_call_set_fndecl (call, built_in);
1056	  }
1057      }
1058}
1059
1060/* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
1061   regimplified.  */
1062
1063static tree
1064expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1065{
1066  tree t = *tp;
1067
1068  /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
1069  if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1070    return t;
1071
1072  if (TREE_CODE (t) == ADDR_EXPR)
1073    recompute_tree_invariant_for_addr_expr (t);
1074
1075  *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1076  return NULL_TREE;
1077}
1078
1079/* Prepend or append TO = FROM assignment before or after *GSI_P.  */
1080
1081static void
1082expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1083			 bool after)
1084{
1085  bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1086  from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1087				   !after, after ? GSI_CONTINUE_LINKING
1088						 : GSI_SAME_STMT);
1089  gimple *stmt = gimple_build_assign (to, from);
1090  if (after)
1091    gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1092  else
1093    gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1094  if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1095      || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1096    {
1097      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1098      gimple_regimplify_operands (stmt, &gsi);
1099    }
1100}
1101
1102/* Expand the OpenMP parallel or task directive starting at REGION.  */
1103
1104static void
1105expand_omp_taskreg (struct omp_region *region)
1106{
1107  basic_block entry_bb, exit_bb, new_bb;
1108  struct function *child_cfun;
1109  tree child_fn, block, t;
1110  gimple_stmt_iterator gsi;
1111  gimple *entry_stmt, *stmt;
1112  edge e;
1113  vec<tree, va_gc> *ws_args;
1114
1115  entry_stmt = last_stmt (region->entry);
1116  child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1117  child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1118
1119  entry_bb = region->entry;
1120  if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1121    exit_bb = region->cont;
1122  else
1123    exit_bb = region->exit;
1124
1125  bool is_cilk_for
1126    = (flag_cilkplus
1127       && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
1128       && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt),
1129			   OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
1130
1131  if (is_cilk_for)
1132    /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1133       and the inner statement contains the name of the built-in function
1134       and grain.  */
1135    ws_args = region->inner->ws_args;
1136  else if (is_combined_parallel (region))
1137    ws_args = region->ws_args;
1138  else
1139    ws_args = NULL;
1140
1141  if (child_cfun->cfg)
1142    {
1143      /* Due to inlining, it may happen that we have already outlined
1144	 the region, in which case all we need to do is make the
1145	 sub-graph unreachable and emit the parallel call.  */
1146      edge entry_succ_e, exit_succ_e;
1147
1148      entry_succ_e = single_succ_edge (entry_bb);
1149
1150      gsi = gsi_last_bb (entry_bb);
1151      gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1152		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1153      gsi_remove (&gsi, true);
1154
1155      new_bb = entry_bb;
1156      if (exit_bb)
1157	{
1158	  exit_succ_e = single_succ_edge (exit_bb);
1159	  make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1160	}
1161      remove_edge_and_dominated_blocks (entry_succ_e);
1162    }
1163  else
1164    {
1165      unsigned srcidx, dstidx, num;
1166
1167      /* If the parallel region needs data sent from the parent
1168	 function, then the very first statement (except possible
1169	 tree profile counter updates) of the parallel body
1170	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
1171	 &.OMP_DATA_O is passed as an argument to the child function,
1172	 we need to replace it with the argument as seen by the child
1173	 function.
1174
1175	 In most cases, this will end up being the identity assignment
1176	 .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
1177	 a function call that has been inlined, the original PARM_DECL
1178	 .OMP_DATA_I may have been converted into a different local
1179	 variable.  In which case, we need to keep the assignment.  */
1180      if (gimple_omp_taskreg_data_arg (entry_stmt))
1181	{
1182	  basic_block entry_succ_bb
1183	    = single_succ_p (entry_bb) ? single_succ (entry_bb)
1184				       : FALLTHRU_EDGE (entry_bb)->dest;
1185	  tree arg;
1186	  gimple *parcopy_stmt = NULL;
1187
1188	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1189	    {
1190	      gimple *stmt;
1191
1192	      gcc_assert (!gsi_end_p (gsi));
1193	      stmt = gsi_stmt (gsi);
1194	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
1195		continue;
1196
1197	      if (gimple_num_ops (stmt) == 2)
1198		{
1199		  tree arg = gimple_assign_rhs1 (stmt);
1200
1201		  /* We're ignore the subcode because we're
1202		     effectively doing a STRIP_NOPS.  */
1203
1204		  if (TREE_CODE (arg) == ADDR_EXPR
1205		      && TREE_OPERAND (arg, 0)
1206			== gimple_omp_taskreg_data_arg (entry_stmt))
1207		    {
1208		      parcopy_stmt = stmt;
1209		      break;
1210		    }
1211		}
1212	    }
1213
1214	  gcc_assert (parcopy_stmt != NULL);
1215	  arg = DECL_ARGUMENTS (child_fn);
1216
1217	  if (!gimple_in_ssa_p (cfun))
1218	    {
1219	      if (gimple_assign_lhs (parcopy_stmt) == arg)
1220		gsi_remove (&gsi, true);
1221	      else
1222		{
1223		  /* ?? Is setting the subcode really necessary ??  */
1224		  gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1225		  gimple_assign_set_rhs1 (parcopy_stmt, arg);
1226		}
1227	    }
1228	  else
1229	    {
1230	      tree lhs = gimple_assign_lhs (parcopy_stmt);
1231	      gcc_assert (SSA_NAME_VAR (lhs) == arg);
1232	      /* We'd like to set the rhs to the default def in the child_fn,
1233		 but it's too early to create ssa names in the child_fn.
1234		 Instead, we set the rhs to the parm.  In
1235		 move_sese_region_to_fn, we introduce a default def for the
1236		 parm, map the parm to it's default def, and once we encounter
1237		 this stmt, replace the parm with the default def.  */
1238	      gimple_assign_set_rhs1 (parcopy_stmt, arg);
1239	      update_stmt (parcopy_stmt);
1240	    }
1241	}
1242
1243      /* Declare local variables needed in CHILD_CFUN.  */
1244      block = DECL_INITIAL (child_fn);
1245      BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1246      /* The gimplifier could record temporaries in parallel/task block
1247	 rather than in containing function's local_decls chain,
1248	 which would mean cgraph missed finalizing them.  Do it now.  */
1249      for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1250	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1251	  varpool_node::finalize_decl (t);
1252      DECL_SAVED_TREE (child_fn) = NULL;
1253      /* We'll create a CFG for child_fn, so no gimple body is needed.  */
1254      gimple_set_body (child_fn, NULL);
1255      TREE_USED (block) = 1;
1256
1257      /* Reset DECL_CONTEXT on function arguments.  */
1258      for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1259	DECL_CONTEXT (t) = child_fn;
1260
1261      /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1262	 so that it can be moved to the child function.  */
1263      gsi = gsi_last_bb (entry_bb);
1264      stmt = gsi_stmt (gsi);
1265      gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1266			   || gimple_code (stmt) == GIMPLE_OMP_TASK));
1267      e = split_block (entry_bb, stmt);
1268      gsi_remove (&gsi, true);
1269      entry_bb = e->dest;
1270      edge e2 = NULL;
1271      if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1272	single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1273      else
1274	{
1275	  e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1276	  gcc_assert (e2->dest == region->exit);
1277	  remove_edge (BRANCH_EDGE (entry_bb));
1278	  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1279	  gsi = gsi_last_bb (region->exit);
1280	  gcc_assert (!gsi_end_p (gsi)
1281		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1282	  gsi_remove (&gsi, true);
1283	}
1284
1285      /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR.  */
1286      if (exit_bb)
1287	{
1288	  gsi = gsi_last_bb (exit_bb);
1289	  gcc_assert (!gsi_end_p (gsi)
1290		      && (gimple_code (gsi_stmt (gsi))
1291			  == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1292	  stmt = gimple_build_return (NULL);
1293	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1294	  gsi_remove (&gsi, true);
1295	}
1296
1297      /* Move the parallel region into CHILD_CFUN.  */
1298
1299      if (gimple_in_ssa_p (cfun))
1300	{
1301	  init_tree_ssa (child_cfun);
1302	  init_ssa_operands (child_cfun);
1303	  child_cfun->gimple_df->in_ssa_p = true;
1304	  block = NULL_TREE;
1305	}
1306      else
1307	block = gimple_block (entry_stmt);
1308
1309      /* Make sure to generate early debug for the function before
1310         outlining anything.  */
1311      if (! gimple_in_ssa_p (cfun))
1312	(*debug_hooks->early_global_decl) (cfun->decl);
1313
1314      new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1315      if (exit_bb)
1316	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1317      if (e2)
1318	{
1319	  basic_block dest_bb = e2->dest;
1320	  if (!exit_bb)
1321	    make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1322	  remove_edge (e2);
1323	  set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1324	}
1325      /* When the OMP expansion process cannot guarantee an up-to-date
1326	 loop tree arrange for the child function to fixup loops.  */
1327      if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1328	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1329
1330      /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
1331      num = vec_safe_length (child_cfun->local_decls);
1332      for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1333	{
1334	  t = (*child_cfun->local_decls)[srcidx];
1335	  if (DECL_CONTEXT (t) == cfun->decl)
1336	    continue;
1337	  if (srcidx != dstidx)
1338	    (*child_cfun->local_decls)[dstidx] = t;
1339	  dstidx++;
1340	}
1341      if (dstidx != num)
1342	vec_safe_truncate (child_cfun->local_decls, dstidx);
1343
1344      /* Inform the callgraph about the new function.  */
1345      child_cfun->curr_properties = cfun->curr_properties;
1346      child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1347      child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1348      cgraph_node *node = cgraph_node::get_create (child_fn);
1349      node->parallelized_function = 1;
1350      cgraph_node::add_new_function (child_fn, true);
1351
1352      bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1353		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1354
1355      /* Fix the callgraph edges for child_cfun.  Those for cfun will be
1356	 fixed in a following pass.  */
1357      push_cfun (child_cfun);
1358      if (need_asm)
1359	assign_assembler_name_if_needed (child_fn);
1360
1361      if (optimize)
1362	optimize_omp_library_calls (entry_stmt);
1363      cgraph_edge::rebuild_edges ();
1364
1365      /* Some EH regions might become dead, see PR34608.  If
1366	 pass_cleanup_cfg isn't the first pass to happen with the
1367	 new child, these dead EH edges might cause problems.
1368	 Clean them up now.  */
1369      if (flag_exceptions)
1370	{
1371	  basic_block bb;
1372	  bool changed = false;
1373
1374	  FOR_EACH_BB_FN (bb, cfun)
1375	    changed |= gimple_purge_dead_eh_edges (bb);
1376	  if (changed)
1377	    cleanup_tree_cfg ();
1378	}
1379      if (gimple_in_ssa_p (cfun))
1380	update_ssa (TODO_update_ssa);
1381      if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1382	verify_loop_structure ();
1383      pop_cfun ();
1384
1385      if (dump_file && !gimple_in_ssa_p (cfun))
1386	{
1387	  omp_any_child_fn_dumped = true;
1388	  dump_function_header (dump_file, child_fn, dump_flags);
1389	  dump_function_to_file (child_fn, dump_file, dump_flags);
1390	}
1391    }
1392
1393  /* Emit a library call to launch the children threads.  */
1394  if (is_cilk_for)
1395    expand_cilk_for_call (new_bb,
1396			  as_a <gomp_parallel *> (entry_stmt), ws_args);
1397  else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1398    expand_parallel_call (region, new_bb,
1399			  as_a <gomp_parallel *> (entry_stmt), ws_args);
1400  else
1401    expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1402  if (gimple_in_ssa_p (cfun))
1403    update_ssa (TODO_update_ssa_only_virtuals);
1404}
1405
1406/* Information about members of an OpenACC collapsed loop nest.  */
1407
1408struct oacc_collapse
1409{
1410  tree base;  /* Base value.  */
1411  tree iters; /* Number of steps.  */
1412  tree step;  /* Step size.  */
1413  tree tile;  /* Tile increment (if tiled).  */
1414  tree outer; /* Tile iterator var. */
1415};
1416
1417/* Helper for expand_oacc_for.  Determine collapsed loop information.
1418   Fill in COUNTS array.  Emit any initialization code before GSI.
1419   Return the calculated outer loop bound of BOUND_TYPE.  */
1420
1421static tree
1422expand_oacc_collapse_init (const struct omp_for_data *fd,
1423			   gimple_stmt_iterator *gsi,
1424			   oacc_collapse *counts, tree bound_type,
1425			   location_t loc)
1426{
1427  tree tiling = fd->tiling;
1428  tree total = build_int_cst (bound_type, 1);
1429  int ix;
1430
1431  gcc_assert (integer_onep (fd->loop.step));
1432  gcc_assert (integer_zerop (fd->loop.n1));
1433
1434  /* When tiling, the first operand of the tile clause applies to the
1435     innermost loop, and we work outwards from there.  Seems
1436     backwards, but whatever.  */
1437  for (ix = fd->collapse; ix--;)
1438    {
1439      const omp_for_data_loop *loop = &fd->loops[ix];
1440
1441      tree iter_type = TREE_TYPE (loop->v);
1442      tree diff_type = iter_type;
1443      tree plus_type = iter_type;
1444
1445      gcc_assert (loop->cond_code == fd->loop.cond_code);
1446
1447      if (POINTER_TYPE_P (iter_type))
1448	plus_type = sizetype;
1449      if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1450	diff_type = signed_type_for (diff_type);
1451
1452      if (tiling)
1453	{
1454	  tree num = build_int_cst (integer_type_node, fd->collapse);
1455	  tree loop_no = build_int_cst (integer_type_node, ix);
1456	  tree tile = TREE_VALUE (tiling);
1457	  gcall *call
1458	    = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1459					  /* gwv-outer=*/integer_zero_node,
1460					  /* gwv-inner=*/integer_zero_node);
1461
1462	  counts[ix].outer = create_tmp_var (iter_type, ".outer");
1463	  counts[ix].tile = create_tmp_var (diff_type, ".tile");
1464	  gimple_call_set_lhs (call, counts[ix].tile);
1465	  gimple_set_location (call, loc);
1466	  gsi_insert_before (gsi, call, GSI_SAME_STMT);
1467
1468	  tiling = TREE_CHAIN (tiling);
1469	}
1470      else
1471	{
1472	  counts[ix].tile = NULL;
1473	  counts[ix].outer = loop->v;
1474	}
1475
1476      tree b = loop->n1;
1477      tree e = loop->n2;
1478      tree s = loop->step;
1479      bool up = loop->cond_code == LT_EXPR;
1480      tree dir = build_int_cst (diff_type, up ? +1 : -1);
1481      bool negating;
1482      tree expr;
1483
1484      b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1485				    true, GSI_SAME_STMT);
1486      e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1487				    true, GSI_SAME_STMT);
1488
1489      /* Convert the step, avoiding possible unsigned->signed overflow.  */
1490      negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1491      if (negating)
1492	s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1493      s = fold_convert (diff_type, s);
1494      if (negating)
1495	s = fold_build1 (NEGATE_EXPR, diff_type, s);
1496      s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1497				    true, GSI_SAME_STMT);
1498
1499      /* Determine the range, avoiding possible unsigned->signed overflow.  */
1500      negating = !up && TYPE_UNSIGNED (iter_type);
1501      expr = fold_build2 (MINUS_EXPR, plus_type,
1502			  fold_convert (plus_type, negating ? b : e),
1503			  fold_convert (plus_type, negating ? e : b));
1504      expr = fold_convert (diff_type, expr);
1505      if (negating)
1506	expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1507      tree range = force_gimple_operand_gsi
1508	(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1509
1510      /* Determine number of iterations.  */
1511      expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1512      expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1513      expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1514
1515      tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1516					     true, GSI_SAME_STMT);
1517
1518      counts[ix].base = b;
1519      counts[ix].iters = iters;
1520      counts[ix].step = s;
1521
1522      total = fold_build2 (MULT_EXPR, bound_type, total,
1523			   fold_convert (bound_type, iters));
1524    }
1525
1526  return total;
1527}
1528
1529/* Emit initializers for collapsed loop members.  INNER is true if
1530   this is for the element loop of a TILE.  IVAR is the outer
1531   loop iteration variable, from which collapsed loop iteration values
1532   are  calculated.  COUNTS array has been initialized by
1533   expand_oacc_collapse_inits.  */
1534
1535static void
1536expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1537			   gimple_stmt_iterator *gsi,
1538			   const oacc_collapse *counts, tree ivar)
1539{
1540  tree ivar_type = TREE_TYPE (ivar);
1541
1542  /*  The most rapidly changing iteration variable is the innermost
1543      one.  */
1544  for (int ix = fd->collapse; ix--;)
1545    {
1546      const omp_for_data_loop *loop = &fd->loops[ix];
1547      const oacc_collapse *collapse = &counts[ix];
1548      tree v = inner ? loop->v : collapse->outer;
1549      tree iter_type = TREE_TYPE (v);
1550      tree diff_type = TREE_TYPE (collapse->step);
1551      tree plus_type = iter_type;
1552      enum tree_code plus_code = PLUS_EXPR;
1553      tree expr;
1554
1555      if (POINTER_TYPE_P (iter_type))
1556	{
1557	  plus_code = POINTER_PLUS_EXPR;
1558	  plus_type = sizetype;
1559	}
1560
1561      expr = ivar;
1562      if (ix)
1563	{
1564	  tree mod = fold_convert (ivar_type, collapse->iters);
1565	  ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1566	  expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1567	  ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1568					   true, GSI_SAME_STMT);
1569	}
1570
1571      expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1572			  collapse->step);
1573      expr = fold_build2 (plus_code, iter_type,
1574			  inner ? collapse->outer : collapse->base,
1575			  fold_convert (plus_type, expr));
1576      expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1577				       true, GSI_SAME_STMT);
1578      gassign *ass = gimple_build_assign (v, expr);
1579      gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1580    }
1581}
1582
1583/* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
1584   of the combined collapse > 1 loop constructs, generate code like:
1585	if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1586	if (cond3 is <)
1587	  adj = STEP3 - 1;
1588	else
1589	  adj = STEP3 + 1;
1590	count3 = (adj + N32 - N31) / STEP3;
1591	if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1592	if (cond2 is <)
1593	  adj = STEP2 - 1;
1594	else
1595	  adj = STEP2 + 1;
1596	count2 = (adj + N22 - N21) / STEP2;
1597	if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1598	if (cond1 is <)
1599	  adj = STEP1 - 1;
1600	else
1601	  adj = STEP1 + 1;
1602	count1 = (adj + N12 - N11) / STEP1;
1603	count = count1 * count2 * count3;
1604   Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1605	count = 0;
1606   and set ZERO_ITER_BB to that bb.  If this isn't the outermost
1607   of the combined loop constructs, just initialize COUNTS array
1608   from the _looptemp_ clauses.  */
1609
1610/* NOTE: It *could* be better to moosh all of the BBs together,
1611   creating one larger BB with all the computation and the unexpected
1612   jump at the end.  I.e.
1613
1614   bool zero3, zero2, zero1, zero;
1615
1616   zero3 = N32 c3 N31;
1617   count3 = (N32 - N31) /[cl] STEP3;
1618   zero2 = N22 c2 N21;
1619   count2 = (N22 - N21) /[cl] STEP2;
1620   zero1 = N12 c1 N11;
1621   count1 = (N12 - N11) /[cl] STEP1;
1622   zero = zero3 || zero2 || zero1;
1623   count = count1 * count2 * count3;
1624   if (__builtin_expect(zero, false)) goto zero_iter_bb;
1625
1626   After all, we expect the zero=false, and thus we expect to have to
1627   evaluate all of the comparison expressions, so short-circuiting
1628   oughtn't be a win.  Since the condition isn't protecting a
1629   denominator, we're not concerned about divide-by-zero, so we can
1630   fully evaluate count even if a numerator turned out to be wrong.
1631
1632   It seems like putting this all together would create much better
1633   scheduling opportunities, and less pressure on the chip's branch
1634   predictor.  */
1635
1636static void
1637expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1638			    basic_block &entry_bb, tree *counts,
1639			    basic_block &zero_iter1_bb, int &first_zero_iter1,
1640			    basic_block &zero_iter2_bb, int &first_zero_iter2,
1641			    basic_block &l2_dom_bb)
1642{
1643  tree t, type = TREE_TYPE (fd->loop.v);
1644  edge e, ne;
1645  int i;
1646
1647  /* Collapsed loops need work for expansion into SSA form.  */
1648  gcc_assert (!gimple_in_ssa_p (cfun));
1649
1650  if (gimple_omp_for_combined_into_p (fd->for_stmt)
1651      && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1652    {
1653      gcc_assert (fd->ordered == 0);
1654      /* First two _looptemp_ clauses are for istart/iend, counts[0]
1655	 isn't supposed to be handled, as the inner loop doesn't
1656	 use it.  */
1657      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1658				     OMP_CLAUSE__LOOPTEMP_);
1659      gcc_assert (innerc);
1660      for (i = 0; i < fd->collapse; i++)
1661	{
1662	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1663				    OMP_CLAUSE__LOOPTEMP_);
1664	  gcc_assert (innerc);
1665	  if (i)
1666	    counts[i] = OMP_CLAUSE_DECL (innerc);
1667	  else
1668	    counts[0] = NULL_TREE;
1669	}
1670      return;
1671    }
1672
1673  for (i = fd->collapse; i < fd->ordered; i++)
1674    {
1675      tree itype = TREE_TYPE (fd->loops[i].v);
1676      counts[i] = NULL_TREE;
1677      t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1678		       fold_convert (itype, fd->loops[i].n1),
1679		       fold_convert (itype, fd->loops[i].n2));
1680      if (t && integer_zerop (t))
1681	{
1682	  for (i = fd->collapse; i < fd->ordered; i++)
1683	    counts[i] = build_int_cst (type, 0);
1684	  break;
1685	}
1686    }
1687  for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1688    {
1689      tree itype = TREE_TYPE (fd->loops[i].v);
1690
1691      if (i >= fd->collapse && counts[i])
1692	continue;
1693      if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1694	  && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1695				fold_convert (itype, fd->loops[i].n1),
1696				fold_convert (itype, fd->loops[i].n2)))
1697	      == NULL_TREE || !integer_onep (t)))
1698	{
1699	  gcond *cond_stmt;
1700	  tree n1, n2;
1701	  n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1702	  n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1703					 true, GSI_SAME_STMT);
1704	  n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1705	  n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1706					 true, GSI_SAME_STMT);
1707	  cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1708					 NULL_TREE, NULL_TREE);
1709	  gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1710	  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1711			 expand_omp_regimplify_p, NULL, NULL)
1712	      || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1713			    expand_omp_regimplify_p, NULL, NULL))
1714	    {
1715	      *gsi = gsi_for_stmt (cond_stmt);
1716	      gimple_regimplify_operands (cond_stmt, gsi);
1717	    }
1718	  e = split_block (entry_bb, cond_stmt);
1719	  basic_block &zero_iter_bb
1720	    = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1721	  int &first_zero_iter
1722	    = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1723	  if (zero_iter_bb == NULL)
1724	    {
1725	      gassign *assign_stmt;
1726	      first_zero_iter = i;
1727	      zero_iter_bb = create_empty_bb (entry_bb);
1728	      add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1729	      *gsi = gsi_after_labels (zero_iter_bb);
1730	      if (i < fd->collapse)
1731		assign_stmt = gimple_build_assign (fd->loop.n2,
1732						   build_zero_cst (type));
1733	      else
1734		{
1735		  counts[i] = create_tmp_reg (type, ".count");
1736		  assign_stmt
1737		    = gimple_build_assign (counts[i], build_zero_cst (type));
1738		}
1739	      gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1740	      set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1741				       entry_bb);
1742	    }
1743	  ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1744	  ne->probability = REG_BR_PROB_BASE / 2000 - 1;
1745	  e->flags = EDGE_TRUE_VALUE;
1746	  e->probability = REG_BR_PROB_BASE - ne->probability;
1747	  if (l2_dom_bb == NULL)
1748	    l2_dom_bb = entry_bb;
1749	  entry_bb = e->dest;
1750	  *gsi = gsi_last_bb (entry_bb);
1751	}
1752
1753      if (POINTER_TYPE_P (itype))
1754	itype = signed_type_for (itype);
1755      t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1756				 ? -1 : 1));
1757      t = fold_build2 (PLUS_EXPR, itype,
1758		       fold_convert (itype, fd->loops[i].step), t);
1759      t = fold_build2 (PLUS_EXPR, itype, t,
1760		       fold_convert (itype, fd->loops[i].n2));
1761      t = fold_build2 (MINUS_EXPR, itype, t,
1762		       fold_convert (itype, fd->loops[i].n1));
1763      /* ?? We could probably use CEIL_DIV_EXPR instead of
1764	 TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
1765	 generate the same code in the end because generically we
1766	 don't know that the values involved must be negative for
1767	 GT??  */
1768      if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1769	t = fold_build2 (TRUNC_DIV_EXPR, itype,
1770			 fold_build1 (NEGATE_EXPR, itype, t),
1771			 fold_build1 (NEGATE_EXPR, itype,
1772				      fold_convert (itype,
1773						    fd->loops[i].step)));
1774      else
1775	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1776			 fold_convert (itype, fd->loops[i].step));
1777      t = fold_convert (type, t);
1778      if (TREE_CODE (t) == INTEGER_CST)
1779	counts[i] = t;
1780      else
1781	{
1782	  if (i < fd->collapse || i != first_zero_iter2)
1783	    counts[i] = create_tmp_reg (type, ".count");
1784	  expand_omp_build_assign (gsi, counts[i], t);
1785	}
1786      if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1787	{
1788	  if (i == 0)
1789	    t = counts[0];
1790	  else
1791	    t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1792	  expand_omp_build_assign (gsi, fd->loop.n2, t);
1793	}
1794    }
1795}
1796
1797/* Helper function for expand_omp_{for_*,simd}.  Generate code like:
1798	T = V;
1799	V3 = N31 + (T % count3) * STEP3;
1800	T = T / count3;
1801	V2 = N21 + (T % count2) * STEP2;
1802	T = T / count2;
1803	V1 = N11 + T * STEP1;
1804   if this loop doesn't have an inner loop construct combined with it.
1805   If it does have an inner loop construct combined with it and the
1806   iteration count isn't known constant, store values from counts array
1807   into its _looptemp_ temporaries instead.  */
1808
1809static void
1810expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1811			  tree *counts, gimple *inner_stmt, tree startvar)
1812{
1813  int i;
1814  if (gimple_omp_for_combined_p (fd->for_stmt))
1815    {
1816      /* If fd->loop.n2 is constant, then no propagation of the counts
1817	 is needed, they are constant.  */
1818      if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1819	return;
1820
1821      tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1822		     ? gimple_omp_taskreg_clauses (inner_stmt)
1823		     : gimple_omp_for_clauses (inner_stmt);
1824      /* First two _looptemp_ clauses are for istart/iend, counts[0]
1825	 isn't supposed to be handled, as the inner loop doesn't
1826	 use it.  */
1827      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1828      gcc_assert (innerc);
1829      for (i = 0; i < fd->collapse; i++)
1830	{
1831	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1832				    OMP_CLAUSE__LOOPTEMP_);
1833	  gcc_assert (innerc);
1834	  if (i)
1835	    {
1836	      tree tem = OMP_CLAUSE_DECL (innerc);
1837	      tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1838	      t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1839					    false, GSI_CONTINUE_LINKING);
1840	      gassign *stmt = gimple_build_assign (tem, t);
1841	      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1842	    }
1843	}
1844      return;
1845    }
1846
1847  tree type = TREE_TYPE (fd->loop.v);
1848  tree tem = create_tmp_reg (type, ".tem");
1849  gassign *stmt = gimple_build_assign (tem, startvar);
1850  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1851
1852  for (i = fd->collapse - 1; i >= 0; i--)
1853    {
1854      tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1855      itype = vtype;
1856      if (POINTER_TYPE_P (vtype))
1857	itype = signed_type_for (vtype);
1858      if (i != 0)
1859	t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1860      else
1861	t = tem;
1862      t = fold_convert (itype, t);
1863      t = fold_build2 (MULT_EXPR, itype, t,
1864		       fold_convert (itype, fd->loops[i].step));
1865      if (POINTER_TYPE_P (vtype))
1866	t = fold_build_pointer_plus (fd->loops[i].n1, t);
1867      else
1868	t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1869      t = force_gimple_operand_gsi (gsi, t,
1870				    DECL_P (fd->loops[i].v)
1871				    && TREE_ADDRESSABLE (fd->loops[i].v),
1872				    NULL_TREE, false,
1873				    GSI_CONTINUE_LINKING);
1874      stmt = gimple_build_assign (fd->loops[i].v, t);
1875      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1876      if (i != 0)
1877	{
1878	  t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1879	  t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1880					false, GSI_CONTINUE_LINKING);
1881	  stmt = gimple_build_assign (tem, t);
1882	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1883	}
1884    }
1885}
1886
1887/* Helper function for expand_omp_for_*.  Generate code like:
1888    L10:
1889	V3 += STEP3;
1890	if (V3 cond3 N32) goto BODY_BB; else goto L11;
1891    L11:
1892	V3 = N31;
1893	V2 += STEP2;
1894	if (V2 cond2 N22) goto BODY_BB; else goto L12;
1895    L12:
1896	V2 = N21;
1897	V1 += STEP1;
1898	goto BODY_BB;  */
1899
1900static basic_block
1901extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1902			     basic_block body_bb)
1903{
1904  basic_block last_bb, bb, collapse_bb = NULL;
1905  int i;
1906  gimple_stmt_iterator gsi;
1907  edge e;
1908  tree t;
1909  gimple *stmt;
1910
1911  last_bb = cont_bb;
1912  for (i = fd->collapse - 1; i >= 0; i--)
1913    {
1914      tree vtype = TREE_TYPE (fd->loops[i].v);
1915
1916      bb = create_empty_bb (last_bb);
1917      add_bb_to_loop (bb, last_bb->loop_father);
1918      gsi = gsi_start_bb (bb);
1919
1920      if (i < fd->collapse - 1)
1921	{
1922	  e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1923	  e->probability = REG_BR_PROB_BASE / 8;
1924
1925	  t = fd->loops[i + 1].n1;
1926	  t = force_gimple_operand_gsi (&gsi, t,
1927					DECL_P (fd->loops[i + 1].v)
1928					&& TREE_ADDRESSABLE (fd->loops[i
1929								       + 1].v),
1930					NULL_TREE, false,
1931					GSI_CONTINUE_LINKING);
1932	  stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1933	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1934	}
1935      else
1936	collapse_bb = bb;
1937
1938      set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1939
1940      if (POINTER_TYPE_P (vtype))
1941	t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1942      else
1943	t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1944      t = force_gimple_operand_gsi (&gsi, t,
1945				    DECL_P (fd->loops[i].v)
1946				    && TREE_ADDRESSABLE (fd->loops[i].v),
1947				    NULL_TREE, false, GSI_CONTINUE_LINKING);
1948      stmt = gimple_build_assign (fd->loops[i].v, t);
1949      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1950
1951      if (i > 0)
1952	{
1953	  t = fd->loops[i].n2;
1954	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1955					false, GSI_CONTINUE_LINKING);
1956	  tree v = fd->loops[i].v;
1957	  if (DECL_P (v) && TREE_ADDRESSABLE (v))
1958	    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1959					  false, GSI_CONTINUE_LINKING);
1960	  t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1961	  stmt = gimple_build_cond_empty (t);
1962	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1963	  if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
1964			 expand_omp_regimplify_p, NULL, NULL)
1965	      || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
1966			    expand_omp_regimplify_p, NULL, NULL))
1967	    gimple_regimplify_operands (stmt, &gsi);
1968	  e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1969	  e->probability = REG_BR_PROB_BASE * 7 / 8;
1970	}
1971      else
1972	make_edge (bb, body_bb, EDGE_FALLTHRU);
1973      last_bb = bb;
1974    }
1975
1976  return collapse_bb;
1977}
1978
1979/* Expand #pragma omp ordered depend(source).  */
1980
1981static void
1982expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1983			   tree *counts, location_t loc)
1984{
1985  enum built_in_function source_ix
1986    = fd->iter_type == long_integer_type_node
1987      ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1988  gimple *g
1989    = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1990			 build_fold_addr_expr (counts[fd->ordered]));
1991  gimple_set_location (g, loc);
1992  gsi_insert_before (gsi, g, GSI_SAME_STMT);
1993}
1994
1995/* Expand a single depend from #pragma omp ordered depend(sink:...).  */
1996
1997static void
1998expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1999			 tree *counts, tree c, location_t loc)
2000{
2001  auto_vec<tree, 10> args;
2002  enum built_in_function sink_ix
2003    = fd->iter_type == long_integer_type_node
2004      ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2005  tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2006  int i;
2007  gimple_stmt_iterator gsi2 = *gsi;
2008  bool warned_step = false;
2009
2010  for (i = 0; i < fd->ordered; i++)
2011    {
2012      tree step = NULL_TREE;
2013      off = TREE_PURPOSE (deps);
2014      if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2015	{
2016	  step = TREE_OPERAND (off, 1);
2017	  off = TREE_OPERAND (off, 0);
2018	}
2019      if (!integer_zerop (off))
2020	{
2021	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2022		      || fd->loops[i].cond_code == GT_EXPR);
2023	  bool forward = fd->loops[i].cond_code == LT_EXPR;
2024	  if (step)
2025	    {
2026	      /* Non-simple Fortran DO loops.  If step is variable,
2027		 we don't know at compile even the direction, so can't
2028		 warn.  */
2029	      if (TREE_CODE (step) != INTEGER_CST)
2030		break;
2031	      forward = tree_int_cst_sgn (step) != -1;
2032	    }
2033	  if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2034	    warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2035				"lexically later iteration");
2036	  break;
2037	}
2038      deps = TREE_CHAIN (deps);
2039    }
2040  /* If all offsets corresponding to the collapsed loops are zero,
2041     this depend clause can be ignored.  FIXME: but there is still a
2042     flush needed.  We need to emit one __sync_synchronize () for it
2043     though (perhaps conditionally)?  Solve this together with the
2044     conservative dependence folding optimization.
2045  if (i >= fd->collapse)
2046    return;  */
2047
2048  deps = OMP_CLAUSE_DECL (c);
2049  gsi_prev (&gsi2);
2050  edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2051  edge e2 = split_block_after_labels (e1->dest);
2052
2053  gsi2 = gsi_after_labels (e1->dest);
2054  *gsi = gsi_last_bb (e1->src);
2055  for (i = 0; i < fd->ordered; i++)
2056    {
2057      tree itype = TREE_TYPE (fd->loops[i].v);
2058      tree step = NULL_TREE;
2059      tree orig_off = NULL_TREE;
2060      if (POINTER_TYPE_P (itype))
2061	itype = sizetype;
2062      if (i)
2063	deps = TREE_CHAIN (deps);
2064      off = TREE_PURPOSE (deps);
2065      if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2066	{
2067	  step = TREE_OPERAND (off, 1);
2068	  off = TREE_OPERAND (off, 0);
2069	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
2070		      && integer_onep (fd->loops[i].step)
2071		      && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2072	}
2073      tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2074      if (step)
2075	{
2076	  off = fold_convert_loc (loc, itype, off);
2077	  orig_off = off;
2078	  off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2079	}
2080
2081      if (integer_zerop (off))
2082	t = boolean_true_node;
2083      else
2084	{
2085	  tree a;
2086	  tree co = fold_convert_loc (loc, itype, off);
2087	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2088	    {
2089	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2090		co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2091	      a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2092				   TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2093				   co);
2094	    }
2095	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2096	    a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2097				 fd->loops[i].v, co);
2098	  else
2099	    a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2100				 fd->loops[i].v, co);
2101	  if (step)
2102	    {
2103	      tree t1, t2;
2104	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2105		t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2106				      fd->loops[i].n1);
2107	      else
2108		t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2109				      fd->loops[i].n2);
2110	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2111		t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2112				      fd->loops[i].n2);
2113	      else
2114		t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2115				      fd->loops[i].n1);
2116	      t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2117				   step, build_int_cst (TREE_TYPE (step), 0));
2118	      if (TREE_CODE (step) != INTEGER_CST)
2119		{
2120		  t1 = unshare_expr (t1);
2121		  t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2122						 false, GSI_CONTINUE_LINKING);
2123		  t2 = unshare_expr (t2);
2124		  t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2125						 false, GSI_CONTINUE_LINKING);
2126		}
2127	      t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2128				   t, t2, t1);
2129	    }
2130	  else if (fd->loops[i].cond_code == LT_EXPR)
2131	    {
2132	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2133		t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2134				     fd->loops[i].n1);
2135	      else
2136		t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2137				     fd->loops[i].n2);
2138	    }
2139	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2140	    t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2141				 fd->loops[i].n2);
2142	  else
2143	    t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2144				 fd->loops[i].n1);
2145	}
2146      if (cond)
2147	cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2148      else
2149	cond = t;
2150
2151      off = fold_convert_loc (loc, itype, off);
2152
2153      if (step
2154	  || (fd->loops[i].cond_code == LT_EXPR
2155	      ? !integer_onep (fd->loops[i].step)
2156	      : !integer_minus_onep (fd->loops[i].step)))
2157	{
2158	  if (step == NULL_TREE
2159	      && TYPE_UNSIGNED (itype)
2160	      && fd->loops[i].cond_code == GT_EXPR)
2161	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2162				 fold_build1_loc (loc, NEGATE_EXPR, itype,
2163						  s));
2164	  else
2165	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2166				 orig_off ? orig_off : off, s);
2167	  t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2168			       build_int_cst (itype, 0));
2169	  if (integer_zerop (t) && !warned_step)
2170	    {
2171	      warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2172				  "in the iteration space");
2173	      warned_step = true;
2174	    }
2175	  cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2176				  cond, t);
2177	}
2178
2179      if (i <= fd->collapse - 1 && fd->collapse > 1)
2180	t = fd->loop.v;
2181      else if (counts[i])
2182	t = counts[i];
2183      else
2184	{
2185	  t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2186			       fd->loops[i].v, fd->loops[i].n1);
2187	  t = fold_convert_loc (loc, fd->iter_type, t);
2188	}
2189      if (step)
2190	/* We have divided off by step already earlier.  */;
2191      else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2192	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2193			       fold_build1_loc (loc, NEGATE_EXPR, itype,
2194						s));
2195      else
2196	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2197      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2198	off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2199      off = fold_convert_loc (loc, fd->iter_type, off);
2200      if (i <= fd->collapse - 1 && fd->collapse > 1)
2201	{
2202	  if (i)
2203	    off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2204				   off);
2205	  if (i < fd->collapse - 1)
2206	    {
2207	      coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2208				      counts[i]);
2209	      continue;
2210	    }
2211	}
2212      off = unshare_expr (off);
2213      t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2214      t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2215				    true, GSI_SAME_STMT);
2216      args.safe_push (t);
2217    }
2218  gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2219  gimple_set_location (g, loc);
2220  gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2221
2222  cond = unshare_expr (cond);
2223  cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2224				   GSI_CONTINUE_LINKING);
2225  gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2226  edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2227  e3->probability = REG_BR_PROB_BASE / 8;
2228  e1->probability = REG_BR_PROB_BASE - e3->probability;
2229  e1->flags = EDGE_TRUE_VALUE;
2230  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2231
2232  *gsi = gsi_after_labels (e2->dest);
2233}
2234
2235/* Expand all #pragma omp ordered depend(source) and
2236   #pragma omp ordered depend(sink:...) constructs in the current
2237   #pragma omp for ordered(n) region.  */
2238
2239static void
2240expand_omp_ordered_source_sink (struct omp_region *region,
2241				struct omp_for_data *fd, tree *counts,
2242				basic_block cont_bb)
2243{
2244  struct omp_region *inner;
2245  int i;
2246  for (i = fd->collapse - 1; i < fd->ordered; i++)
2247    if (i == fd->collapse - 1 && fd->collapse > 1)
2248      counts[i] = NULL_TREE;
2249    else if (i >= fd->collapse && !cont_bb)
2250      counts[i] = build_zero_cst (fd->iter_type);
2251    else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2252	     && integer_onep (fd->loops[i].step))
2253      counts[i] = NULL_TREE;
2254    else
2255      counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2256  tree atype
2257    = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2258  counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2259  TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2260
2261  for (inner = region->inner; inner; inner = inner->next)
2262    if (inner->type == GIMPLE_OMP_ORDERED)
2263      {
2264	gomp_ordered *ord_stmt = inner->ord_stmt;
2265	gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2266	location_t loc = gimple_location (ord_stmt);
2267	tree c;
2268	for (c = gimple_omp_ordered_clauses (ord_stmt);
2269	     c; c = OMP_CLAUSE_CHAIN (c))
2270	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2271	    break;
2272	if (c)
2273	  expand_omp_ordered_source (&gsi, fd, counts, loc);
2274	for (c = gimple_omp_ordered_clauses (ord_stmt);
2275	     c; c = OMP_CLAUSE_CHAIN (c))
2276	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2277	    expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2278	gsi_remove (&gsi, true);
2279      }
2280}
2281
2282/* Wrap the body into fd->ordered - fd->collapse loops that aren't
2283   collapsed.  */
2284
2285static basic_block
2286expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2287			      basic_block cont_bb, basic_block body_bb,
2288			      bool ordered_lastprivate)
2289{
2290  if (fd->ordered == fd->collapse)
2291    return cont_bb;
2292
2293  if (!cont_bb)
2294    {
2295      gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2296      for (int i = fd->collapse; i < fd->ordered; i++)
2297	{
2298	  tree type = TREE_TYPE (fd->loops[i].v);
2299	  tree n1 = fold_convert (type, fd->loops[i].n1);
2300	  expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2301	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2302			      size_int (i - fd->collapse + 1),
2303			      NULL_TREE, NULL_TREE);
2304	  expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2305	}
2306      return NULL;
2307    }
2308
2309  for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2310    {
2311      tree t, type = TREE_TYPE (fd->loops[i].v);
2312      gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2313      expand_omp_build_assign (&gsi, fd->loops[i].v,
2314			       fold_convert (type, fd->loops[i].n1));
2315      if (counts[i])
2316	expand_omp_build_assign (&gsi, counts[i],
2317				 build_zero_cst (fd->iter_type));
2318      tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2319			  size_int (i - fd->collapse + 1),
2320			  NULL_TREE, NULL_TREE);
2321      expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2322      if (!gsi_end_p (gsi))
2323	gsi_prev (&gsi);
2324      else
2325	gsi = gsi_last_bb (body_bb);
2326      edge e1 = split_block (body_bb, gsi_stmt (gsi));
2327      basic_block new_body = e1->dest;
2328      if (body_bb == cont_bb)
2329	cont_bb = new_body;
2330      edge e2 = NULL;
2331      basic_block new_header;
2332      if (EDGE_COUNT (cont_bb->preds) > 0)
2333	{
2334	  gsi = gsi_last_bb (cont_bb);
2335	  if (POINTER_TYPE_P (type))
2336	    t = fold_build_pointer_plus (fd->loops[i].v,
2337					 fold_convert (sizetype,
2338						       fd->loops[i].step));
2339	  else
2340	    t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2341			     fold_convert (type, fd->loops[i].step));
2342	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2343	  if (counts[i])
2344	    {
2345	      t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2346			       build_int_cst (fd->iter_type, 1));
2347	      expand_omp_build_assign (&gsi, counts[i], t);
2348	      t = counts[i];
2349	    }
2350	  else
2351	    {
2352	      t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2353			       fd->loops[i].v, fd->loops[i].n1);
2354	      t = fold_convert (fd->iter_type, t);
2355	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2356					    true, GSI_SAME_STMT);
2357	    }
2358	  aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2359			 size_int (i - fd->collapse + 1),
2360			 NULL_TREE, NULL_TREE);
2361	  expand_omp_build_assign (&gsi, aref, t);
2362	  gsi_prev (&gsi);
2363	  e2 = split_block (cont_bb, gsi_stmt (gsi));
2364	  new_header = e2->dest;
2365	}
2366      else
2367	new_header = cont_bb;
2368      gsi = gsi_after_labels (new_header);
2369      tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2370					 true, GSI_SAME_STMT);
2371      tree n2
2372	= force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2373				    true, NULL_TREE, true, GSI_SAME_STMT);
2374      t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2375      gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2376      edge e3 = split_block (new_header, gsi_stmt (gsi));
2377      cont_bb = e3->dest;
2378      remove_edge (e1);
2379      make_edge (body_bb, new_header, EDGE_FALLTHRU);
2380      e3->flags = EDGE_FALSE_VALUE;
2381      e3->probability = REG_BR_PROB_BASE / 8;
2382      e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2383      e1->probability = REG_BR_PROB_BASE - e3->probability;
2384
2385      set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2386      set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2387
2388      if (e2)
2389	{
2390	  struct loop *loop = alloc_loop ();
2391	  loop->header = new_header;
2392	  loop->latch = e2->src;
2393	  add_loop (loop, body_bb->loop_father);
2394	}
2395    }
2396
2397  /* If there are any lastprivate clauses and it is possible some loops
2398     might have zero iterations, ensure all the decls are initialized,
2399     otherwise we could crash evaluating C++ class iterators with lastprivate
2400     clauses.  */
2401  bool need_inits = false;
2402  for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2403    if (need_inits)
2404      {
2405	tree type = TREE_TYPE (fd->loops[i].v);
2406	gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2407	expand_omp_build_assign (&gsi, fd->loops[i].v,
2408				 fold_convert (type, fd->loops[i].n1));
2409      }
2410    else
2411      {
2412	tree type = TREE_TYPE (fd->loops[i].v);
2413	tree this_cond = fold_build2 (fd->loops[i].cond_code,
2414				      boolean_type_node,
2415				      fold_convert (type, fd->loops[i].n1),
2416				      fold_convert (type, fd->loops[i].n2));
2417	if (!integer_onep (this_cond))
2418	  need_inits = true;
2419      }
2420
2421  return cont_bb;
2422}
2423
2424/* A subroutine of expand_omp_for.  Generate code for a parallel
2425   loop with any schedule.  Given parameters:
2426
2427	for (V = N1; V cond N2; V += STEP) BODY;
2428
2429   where COND is "<" or ">", we generate pseudocode
2430
2431	more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2432	if (more) goto L0; else goto L3;
2433    L0:
2434	V = istart0;
2435	iend = iend0;
2436    L1:
2437	BODY;
2438	V += STEP;
2439	if (V cond iend) goto L1; else goto L2;
2440    L2:
2441	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2442    L3:
2443
2444    If this is a combined omp parallel loop, instead of the call to
2445    GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2446    If this is gimple_omp_for_combined_p loop, then instead of assigning
2447    V and iend in L0 we assign the first two _looptemp_ clause decls of the
2448    inner GIMPLE_OMP_FOR and V += STEP; and
2449    if (V cond iend) goto L1; else goto L2; are removed.
2450
2451    For collapsed loops, given parameters:
2452      collapse(3)
2453      for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2454	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2455	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2456	    BODY;
2457
2458    we generate pseudocode
2459
2460	if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2461	if (cond3 is <)
2462	  adj = STEP3 - 1;
2463	else
2464	  adj = STEP3 + 1;
2465	count3 = (adj + N32 - N31) / STEP3;
2466	if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2467	if (cond2 is <)
2468	  adj = STEP2 - 1;
2469	else
2470	  adj = STEP2 + 1;
2471	count2 = (adj + N22 - N21) / STEP2;
2472	if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2473	if (cond1 is <)
2474	  adj = STEP1 - 1;
2475	else
2476	  adj = STEP1 + 1;
2477	count1 = (adj + N12 - N11) / STEP1;
2478	count = count1 * count2 * count3;
2479	goto Z1;
2480    Z0:
2481	count = 0;
2482    Z1:
2483	more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2484	if (more) goto L0; else goto L3;
2485    L0:
2486	V = istart0;
2487	T = V;
2488	V3 = N31 + (T % count3) * STEP3;
2489	T = T / count3;
2490	V2 = N21 + (T % count2) * STEP2;
2491	T = T / count2;
2492	V1 = N11 + T * STEP1;
2493	iend = iend0;
2494    L1:
2495	BODY;
2496	V += 1;
2497	if (V < iend) goto L10; else goto L2;
2498    L10:
2499	V3 += STEP3;
2500	if (V3 cond3 N32) goto L1; else goto L11;
2501    L11:
2502	V3 = N31;
2503	V2 += STEP2;
2504	if (V2 cond2 N22) goto L1; else goto L12;
2505    L12:
2506	V2 = N21;
2507	V1 += STEP1;
2508	goto L1;
2509    L2:
2510	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2511    L3:
2512
2513      */
2514
2515static void
2516expand_omp_for_generic (struct omp_region *region,
2517			struct omp_for_data *fd,
2518			enum built_in_function start_fn,
2519			enum built_in_function next_fn,
2520			gimple *inner_stmt)
2521{
2522  tree type, istart0, iend0, iend;
2523  tree t, vmain, vback, bias = NULL_TREE;
2524  basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2525  basic_block l2_bb = NULL, l3_bb = NULL;
2526  gimple_stmt_iterator gsi;
2527  gassign *assign_stmt;
2528  bool in_combined_parallel = is_combined_parallel (region);
2529  bool broken_loop = region->cont == NULL;
2530  edge e, ne;
2531  tree *counts = NULL;
2532  int i;
2533  bool ordered_lastprivate = false;
2534
2535  gcc_assert (!broken_loop || !in_combined_parallel);
2536  gcc_assert (fd->iter_type == long_integer_type_node
2537	      || !in_combined_parallel);
2538
2539  entry_bb = region->entry;
2540  cont_bb = region->cont;
2541  collapse_bb = NULL;
2542  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2543  gcc_assert (broken_loop
2544	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2545  l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2546  l1_bb = single_succ (l0_bb);
2547  if (!broken_loop)
2548    {
2549      l2_bb = create_empty_bb (cont_bb);
2550      gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2551		  || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2552		      == l1_bb));
2553      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2554    }
2555  else
2556    l2_bb = NULL;
2557  l3_bb = BRANCH_EDGE (entry_bb)->dest;
2558  exit_bb = region->exit;
2559
2560  gsi = gsi_last_bb (entry_bb);
2561
2562  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2563  if (fd->ordered
2564      && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2565			  OMP_CLAUSE_LASTPRIVATE))
2566    ordered_lastprivate = false;
2567  if (fd->collapse > 1 || fd->ordered)
2568    {
2569      int first_zero_iter1 = -1, first_zero_iter2 = -1;
2570      basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2571
2572      counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2573      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2574				  zero_iter1_bb, first_zero_iter1,
2575				  zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2576
2577      if (zero_iter1_bb)
2578	{
2579	  /* Some counts[i] vars might be uninitialized if
2580	     some loop has zero iterations.  But the body shouldn't
2581	     be executed in that case, so just avoid uninit warnings.  */
2582	  for (i = first_zero_iter1;
2583	       i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2584	    if (SSA_VAR_P (counts[i]))
2585	      TREE_NO_WARNING (counts[i]) = 1;
2586	  gsi_prev (&gsi);
2587	  e = split_block (entry_bb, gsi_stmt (gsi));
2588	  entry_bb = e->dest;
2589	  make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2590	  gsi = gsi_last_bb (entry_bb);
2591	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2592				   get_immediate_dominator (CDI_DOMINATORS,
2593							    zero_iter1_bb));
2594	}
2595      if (zero_iter2_bb)
2596	{
2597	  /* Some counts[i] vars might be uninitialized if
2598	     some loop has zero iterations.  But the body shouldn't
2599	     be executed in that case, so just avoid uninit warnings.  */
2600	  for (i = first_zero_iter2; i < fd->ordered; i++)
2601	    if (SSA_VAR_P (counts[i]))
2602	      TREE_NO_WARNING (counts[i]) = 1;
2603	  if (zero_iter1_bb)
2604	    make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2605	  else
2606	    {
2607	      gsi_prev (&gsi);
2608	      e = split_block (entry_bb, gsi_stmt (gsi));
2609	      entry_bb = e->dest;
2610	      make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2611	      gsi = gsi_last_bb (entry_bb);
2612	      set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2613				       get_immediate_dominator
2614					 (CDI_DOMINATORS, zero_iter2_bb));
2615	    }
2616	}
2617      if (fd->collapse == 1)
2618	{
2619	  counts[0] = fd->loop.n2;
2620	  fd->loop = fd->loops[0];
2621	}
2622    }
2623
2624  type = TREE_TYPE (fd->loop.v);
2625  istart0 = create_tmp_var (fd->iter_type, ".istart0");
2626  iend0 = create_tmp_var (fd->iter_type, ".iend0");
2627  TREE_ADDRESSABLE (istart0) = 1;
2628  TREE_ADDRESSABLE (iend0) = 1;
2629
2630  /* See if we need to bias by LLONG_MIN.  */
2631  if (fd->iter_type == long_long_unsigned_type_node
2632      && TREE_CODE (type) == INTEGER_TYPE
2633      && !TYPE_UNSIGNED (type)
2634      && fd->ordered == 0)
2635    {
2636      tree n1, n2;
2637
2638      if (fd->loop.cond_code == LT_EXPR)
2639	{
2640	  n1 = fd->loop.n1;
2641	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2642	}
2643      else
2644	{
2645	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2646	  n2 = fd->loop.n1;
2647	}
2648      if (TREE_CODE (n1) != INTEGER_CST
2649	  || TREE_CODE (n2) != INTEGER_CST
2650	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2651	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2652    }
2653
2654  gimple_stmt_iterator gsif = gsi;
2655  gsi_prev (&gsif);
2656
2657  tree arr = NULL_TREE;
2658  if (in_combined_parallel)
2659    {
2660      gcc_assert (fd->ordered == 0);
2661      /* In a combined parallel loop, emit a call to
2662	 GOMP_loop_foo_next.  */
2663      t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2664			   build_fold_addr_expr (istart0),
2665			   build_fold_addr_expr (iend0));
2666    }
2667  else
2668    {
2669      tree t0, t1, t2, t3, t4;
2670      /* If this is not a combined parallel loop, emit a call to
2671	 GOMP_loop_foo_start in ENTRY_BB.  */
2672      t4 = build_fold_addr_expr (iend0);
2673      t3 = build_fold_addr_expr (istart0);
2674      if (fd->ordered)
2675	{
2676	  t0 = build_int_cst (unsigned_type_node,
2677			      fd->ordered - fd->collapse + 1);
2678	  arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2679							fd->ordered
2680							- fd->collapse + 1),
2681				".omp_counts");
2682	  DECL_NAMELESS (arr) = 1;
2683	  TREE_ADDRESSABLE (arr) = 1;
2684	  TREE_STATIC (arr) = 1;
2685	  vec<constructor_elt, va_gc> *v;
2686	  vec_alloc (v, fd->ordered - fd->collapse + 1);
2687	  int idx;
2688
2689	  for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2690	    {
2691	      tree c;
2692	      if (idx == 0 && fd->collapse > 1)
2693		c = fd->loop.n2;
2694	      else
2695		c = counts[idx + fd->collapse - 1];
2696	      tree purpose = size_int (idx);
2697	      CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2698	      if (TREE_CODE (c) != INTEGER_CST)
2699		TREE_STATIC (arr) = 0;
2700	    }
2701
2702	  DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2703	  if (!TREE_STATIC (arr))
2704	    force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2705						    void_type_node, arr),
2706				      true, NULL_TREE, true, GSI_SAME_STMT);
2707	  t1 = build_fold_addr_expr (arr);
2708	  t2 = NULL_TREE;
2709	}
2710      else
2711	{
2712	  t2 = fold_convert (fd->iter_type, fd->loop.step);
2713	  t1 = fd->loop.n2;
2714	  t0 = fd->loop.n1;
2715	  if (gimple_omp_for_combined_into_p (fd->for_stmt))
2716	    {
2717	      tree innerc
2718		= omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2719				   OMP_CLAUSE__LOOPTEMP_);
2720	      gcc_assert (innerc);
2721	      t0 = OMP_CLAUSE_DECL (innerc);
2722	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2723					OMP_CLAUSE__LOOPTEMP_);
2724	      gcc_assert (innerc);
2725	      t1 = OMP_CLAUSE_DECL (innerc);
2726	    }
2727	  if (POINTER_TYPE_P (TREE_TYPE (t0))
2728	      && TYPE_PRECISION (TREE_TYPE (t0))
2729		 != TYPE_PRECISION (fd->iter_type))
2730	    {
2731	      /* Avoid casting pointers to integer of a different size.  */
2732	      tree itype = signed_type_for (type);
2733	      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2734	      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2735	    }
2736	  else
2737	    {
2738	      t1 = fold_convert (fd->iter_type, t1);
2739	      t0 = fold_convert (fd->iter_type, t0);
2740	    }
2741	  if (bias)
2742	    {
2743	      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2744	      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2745	    }
2746	}
2747      if (fd->iter_type == long_integer_type_node || fd->ordered)
2748	{
2749	  if (fd->chunk_size)
2750	    {
2751	      t = fold_convert (fd->iter_type, fd->chunk_size);
2752	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2753	      if (fd->ordered)
2754		t = build_call_expr (builtin_decl_explicit (start_fn),
2755				     5, t0, t1, t, t3, t4);
2756	      else
2757		t = build_call_expr (builtin_decl_explicit (start_fn),
2758				     6, t0, t1, t2, t, t3, t4);
2759	    }
2760	  else if (fd->ordered)
2761	    t = build_call_expr (builtin_decl_explicit (start_fn),
2762				 4, t0, t1, t3, t4);
2763	  else
2764	    t = build_call_expr (builtin_decl_explicit (start_fn),
2765				 5, t0, t1, t2, t3, t4);
2766	}
2767      else
2768	{
2769	  tree t5;
2770	  tree c_bool_type;
2771	  tree bfn_decl;
2772
2773	  /* The GOMP_loop_ull_*start functions have additional boolean
2774	     argument, true for < loops and false for > loops.
2775	     In Fortran, the C bool type can be different from
2776	     boolean_type_node.  */
2777	  bfn_decl = builtin_decl_explicit (start_fn);
2778	  c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2779	  t5 = build_int_cst (c_bool_type,
2780			      fd->loop.cond_code == LT_EXPR ? 1 : 0);
2781	  if (fd->chunk_size)
2782	    {
2783	      tree bfn_decl = builtin_decl_explicit (start_fn);
2784	      t = fold_convert (fd->iter_type, fd->chunk_size);
2785	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
2786	      t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2787	    }
2788	  else
2789	    t = build_call_expr (builtin_decl_explicit (start_fn),
2790				 6, t5, t0, t1, t2, t3, t4);
2791	}
2792    }
2793  if (TREE_TYPE (t) != boolean_type_node)
2794    t = fold_build2 (NE_EXPR, boolean_type_node,
2795		     t, build_int_cst (TREE_TYPE (t), 0));
2796  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2797				true, GSI_SAME_STMT);
2798  if (arr && !TREE_STATIC (arr))
2799    {
2800      tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2801      TREE_THIS_VOLATILE (clobber) = 1;
2802      gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2803			 GSI_SAME_STMT);
2804    }
2805  gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2806
2807  /* Remove the GIMPLE_OMP_FOR statement.  */
2808  gsi_remove (&gsi, true);
2809
2810  if (gsi_end_p (gsif))
2811    gsif = gsi_after_labels (gsi_bb (gsif));
2812  gsi_next (&gsif);
2813
2814  /* Iteration setup for sequential loop goes in L0_BB.  */
2815  tree startvar = fd->loop.v;
2816  tree endvar = NULL_TREE;
2817
2818  if (gimple_omp_for_combined_p (fd->for_stmt))
2819    {
2820      gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2821		  && gimple_omp_for_kind (inner_stmt)
2822		     == GF_OMP_FOR_KIND_SIMD);
2823      tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2824				     OMP_CLAUSE__LOOPTEMP_);
2825      gcc_assert (innerc);
2826      startvar = OMP_CLAUSE_DECL (innerc);
2827      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2828				OMP_CLAUSE__LOOPTEMP_);
2829      gcc_assert (innerc);
2830      endvar = OMP_CLAUSE_DECL (innerc);
2831    }
2832
2833  gsi = gsi_start_bb (l0_bb);
2834  t = istart0;
2835  if (fd->ordered && fd->collapse == 1)
2836    t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2837		     fold_convert (fd->iter_type, fd->loop.step));
2838  else if (bias)
2839    t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2840  if (fd->ordered && fd->collapse == 1)
2841    {
2842      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2843	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2844			 fd->loop.n1, fold_convert (sizetype, t));
2845      else
2846	{
2847	  t = fold_convert (TREE_TYPE (startvar), t);
2848	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2849			   fd->loop.n1, t);
2850	}
2851    }
2852  else
2853    {
2854      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2855	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2856      t = fold_convert (TREE_TYPE (startvar), t);
2857    }
2858  t = force_gimple_operand_gsi (&gsi, t,
2859				DECL_P (startvar)
2860				&& TREE_ADDRESSABLE (startvar),
2861				NULL_TREE, false, GSI_CONTINUE_LINKING);
2862  assign_stmt = gimple_build_assign (startvar, t);
2863  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2864
2865  t = iend0;
2866  if (fd->ordered && fd->collapse == 1)
2867    t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2868		     fold_convert (fd->iter_type, fd->loop.step));
2869  else if (bias)
2870    t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2871  if (fd->ordered && fd->collapse == 1)
2872    {
2873      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2874	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2875			 fd->loop.n1, fold_convert (sizetype, t));
2876      else
2877	{
2878	  t = fold_convert (TREE_TYPE (startvar), t);
2879	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2880			   fd->loop.n1, t);
2881	}
2882    }
2883  else
2884    {
2885      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2886	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2887      t = fold_convert (TREE_TYPE (startvar), t);
2888    }
2889  iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2890				   false, GSI_CONTINUE_LINKING);
2891  if (endvar)
2892    {
2893      assign_stmt = gimple_build_assign (endvar, iend);
2894      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2895      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2896	assign_stmt = gimple_build_assign (fd->loop.v, iend);
2897      else
2898	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2899      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2900    }
2901  /* Handle linear clause adjustments.  */
2902  tree itercnt = NULL_TREE;
2903  if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2904    for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2905	 c; c = OMP_CLAUSE_CHAIN (c))
2906      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2907	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2908	{
2909	  tree d = OMP_CLAUSE_DECL (c);
2910	  bool is_ref = omp_is_reference (d);
2911	  tree t = d, a, dest;
2912	  if (is_ref)
2913	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2914	  tree type = TREE_TYPE (t);
2915	  if (POINTER_TYPE_P (type))
2916	    type = sizetype;
2917	  dest = unshare_expr (t);
2918	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
2919	  expand_omp_build_assign (&gsif, v, t);
2920	  if (itercnt == NULL_TREE)
2921	    {
2922	      itercnt = startvar;
2923	      tree n1 = fd->loop.n1;
2924	      if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2925		{
2926		  itercnt
2927		    = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2928				    itercnt);
2929		  n1 = fold_convert (TREE_TYPE (itercnt), n1);
2930		}
2931	      itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2932				     itercnt, n1);
2933	      itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2934				     itercnt, fd->loop.step);
2935	      itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2936						  NULL_TREE, false,
2937						  GSI_CONTINUE_LINKING);
2938	    }
2939	  a = fold_build2 (MULT_EXPR, type,
2940			   fold_convert (type, itercnt),
2941			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2942	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2943			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2944	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2945					false, GSI_CONTINUE_LINKING);
2946	  assign_stmt = gimple_build_assign (dest, t);
2947	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2948	}
2949  if (fd->collapse > 1)
2950    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2951
2952  if (fd->ordered)
2953    {
2954      /* Until now, counts array contained number of iterations or
2955	 variable containing it for ith loop.  From now on, we need
2956	 those counts only for collapsed loops, and only for the 2nd
2957	 till the last collapsed one.  Move those one element earlier,
2958	 we'll use counts[fd->collapse - 1] for the first source/sink
2959	 iteration counter and so on and counts[fd->ordered]
2960	 as the array holding the current counter values for
2961	 depend(source).  */
2962      if (fd->collapse > 1)
2963	memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2964      if (broken_loop)
2965	{
2966	  int i;
2967	  for (i = fd->collapse; i < fd->ordered; i++)
2968	    {
2969	      tree type = TREE_TYPE (fd->loops[i].v);
2970	      tree this_cond
2971		= fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2972			       fold_convert (type, fd->loops[i].n1),
2973			       fold_convert (type, fd->loops[i].n2));
2974	      if (!integer_onep (this_cond))
2975		break;
2976	    }
2977	  if (i < fd->ordered)
2978	    {
2979	      cont_bb
2980		= create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2981	      add_bb_to_loop (cont_bb, l1_bb->loop_father);
2982	      gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2983	      gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2984	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2985	      make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2986	      make_edge (cont_bb, l1_bb, 0);
2987	      l2_bb = create_empty_bb (cont_bb);
2988	      broken_loop = false;
2989	    }
2990	}
2991      expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2992      cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2993					      ordered_lastprivate);
2994      if (counts[fd->collapse - 1])
2995	{
2996	  gcc_assert (fd->collapse == 1);
2997	  gsi = gsi_last_bb (l0_bb);
2998	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2999				   istart0, true);
3000	  gsi = gsi_last_bb (cont_bb);
3001	  t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3002			   build_int_cst (fd->iter_type, 1));
3003	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3004	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3005			      size_zero_node, NULL_TREE, NULL_TREE);
3006	  expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3007	  t = counts[fd->collapse - 1];
3008	}
3009      else if (fd->collapse > 1)
3010	t = fd->loop.v;
3011      else
3012	{
3013	  t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3014			   fd->loops[0].v, fd->loops[0].n1);
3015	  t = fold_convert (fd->iter_type, t);
3016	}
3017      gsi = gsi_last_bb (l0_bb);
3018      tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3019			  size_zero_node, NULL_TREE, NULL_TREE);
3020      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3021				    false, GSI_CONTINUE_LINKING);
3022      expand_omp_build_assign (&gsi, aref, t, true);
3023    }
3024
3025  if (!broken_loop)
3026    {
3027      /* Code to control the increment and predicate for the sequential
3028	 loop goes in the CONT_BB.  */
3029      gsi = gsi_last_bb (cont_bb);
3030      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3031      gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3032      vmain = gimple_omp_continue_control_use (cont_stmt);
3033      vback = gimple_omp_continue_control_def (cont_stmt);
3034
3035      if (!gimple_omp_for_combined_p (fd->for_stmt))
3036	{
3037	  if (POINTER_TYPE_P (type))
3038	    t = fold_build_pointer_plus (vmain, fd->loop.step);
3039	  else
3040	    t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3041	  t = force_gimple_operand_gsi (&gsi, t,
3042					DECL_P (vback)
3043					&& TREE_ADDRESSABLE (vback),
3044					NULL_TREE, true, GSI_SAME_STMT);
3045	  assign_stmt = gimple_build_assign (vback, t);
3046	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3047
3048	  if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3049	    {
3050	      tree tem;
3051	      if (fd->collapse > 1)
3052		tem = fd->loop.v;
3053	      else
3054		{
3055		  tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3056				     fd->loops[0].v, fd->loops[0].n1);
3057		  tem = fold_convert (fd->iter_type, tem);
3058		}
3059	      tree aref = build4 (ARRAY_REF, fd->iter_type,
3060				  counts[fd->ordered], size_zero_node,
3061				  NULL_TREE, NULL_TREE);
3062	      tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3063					      true, GSI_SAME_STMT);
3064	      expand_omp_build_assign (&gsi, aref, tem);
3065	    }
3066
3067	  t = build2 (fd->loop.cond_code, boolean_type_node,
3068		      DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3069		      iend);
3070	  gcond *cond_stmt = gimple_build_cond_empty (t);
3071	  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3072	}
3073
3074      /* Remove GIMPLE_OMP_CONTINUE.  */
3075      gsi_remove (&gsi, true);
3076
3077      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3078	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3079
3080      /* Emit code to get the next parallel iteration in L2_BB.  */
3081      gsi = gsi_start_bb (l2_bb);
3082
3083      t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3084			   build_fold_addr_expr (istart0),
3085			   build_fold_addr_expr (iend0));
3086      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3087				    false, GSI_CONTINUE_LINKING);
3088      if (TREE_TYPE (t) != boolean_type_node)
3089	t = fold_build2 (NE_EXPR, boolean_type_node,
3090			 t, build_int_cst (TREE_TYPE (t), 0));
3091      gcond *cond_stmt = gimple_build_cond_empty (t);
3092      gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3093    }
3094
3095  /* Add the loop cleanup function.  */
3096  gsi = gsi_last_bb (exit_bb);
3097  if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3098    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3099  else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3100    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3101  else
3102    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3103  gcall *call_stmt = gimple_build_call (t, 0);
3104  if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3105    gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3106  gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3107  if (fd->ordered)
3108    {
3109      tree arr = counts[fd->ordered];
3110      tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3111      TREE_THIS_VOLATILE (clobber) = 1;
3112      gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3113			GSI_SAME_STMT);
3114    }
3115  gsi_remove (&gsi, true);
3116
3117  /* Connect the new blocks.  */
3118  find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3119  find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3120
3121  if (!broken_loop)
3122    {
3123      gimple_seq phis;
3124
3125      e = find_edge (cont_bb, l3_bb);
3126      ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3127
3128      phis = phi_nodes (l3_bb);
3129      for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3130	{
3131	  gimple *phi = gsi_stmt (gsi);
3132	  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3133		   PHI_ARG_DEF_FROM_EDGE (phi, e));
3134	}
3135      remove_edge (e);
3136
3137      make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3138      e = find_edge (cont_bb, l1_bb);
3139      if (e == NULL)
3140	{
3141	  e = BRANCH_EDGE (cont_bb);
3142	  gcc_assert (single_succ (e->dest) == l1_bb);
3143	}
3144      if (gimple_omp_for_combined_p (fd->for_stmt))
3145	{
3146	  remove_edge (e);
3147	  e = NULL;
3148	}
3149      else if (fd->collapse > 1)
3150	{
3151	  remove_edge (e);
3152	  e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3153	}
3154      else
3155	e->flags = EDGE_TRUE_VALUE;
3156      if (e)
3157	{
3158	  e->probability = REG_BR_PROB_BASE * 7 / 8;
3159	  find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
3160	}
3161      else
3162	{
3163	  e = find_edge (cont_bb, l2_bb);
3164	  e->flags = EDGE_FALLTHRU;
3165	}
3166      make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3167
3168      if (gimple_in_ssa_p (cfun))
3169	{
3170	  /* Add phis to the outer loop that connect to the phis in the inner,
3171	     original loop, and move the loop entry value of the inner phi to
3172	     the loop entry value of the outer phi.  */
3173	  gphi_iterator psi;
3174	  for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3175	    {
3176	      source_location locus;
3177	      gphi *nphi;
3178	      gphi *exit_phi = psi.phi ();
3179
3180	      edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3181	      tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3182
3183	      basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3184	      edge latch_to_l1 = find_edge (latch, l1_bb);
3185	      gphi *inner_phi
3186		= find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3187
3188	      tree t = gimple_phi_result (exit_phi);
3189	      tree new_res = copy_ssa_name (t, NULL);
3190	      nphi = create_phi_node (new_res, l0_bb);
3191
3192	      edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3193	      t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3194	      locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3195	      edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3196	      add_phi_arg (nphi, t, entry_to_l0, locus);
3197
3198	      edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3199	      add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3200
3201	      add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3202	    };
3203	}
3204
3205      set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3206			       recompute_dominator (CDI_DOMINATORS, l2_bb));
3207      set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3208			       recompute_dominator (CDI_DOMINATORS, l3_bb));
3209      set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3210			       recompute_dominator (CDI_DOMINATORS, l0_bb));
3211      set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3212			       recompute_dominator (CDI_DOMINATORS, l1_bb));
3213
3214      /* We enter expand_omp_for_generic with a loop.  This original loop may
3215	 have its own loop struct, or it may be part of an outer loop struct
3216	 (which may be the fake loop).  */
3217      struct loop *outer_loop = entry_bb->loop_father;
3218      bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3219
3220      add_bb_to_loop (l2_bb, outer_loop);
3221
3222      /* We've added a new loop around the original loop.  Allocate the
3223	 corresponding loop struct.  */
3224      struct loop *new_loop = alloc_loop ();
3225      new_loop->header = l0_bb;
3226      new_loop->latch = l2_bb;
3227      add_loop (new_loop, outer_loop);
3228
3229      /* Allocate a loop structure for the original loop unless we already
3230	 had one.  */
3231      if (!orig_loop_has_loop_struct
3232	  && !gimple_omp_for_combined_p (fd->for_stmt))
3233	{
3234	  struct loop *orig_loop = alloc_loop ();
3235	  orig_loop->header = l1_bb;
3236	  /* The loop may have multiple latches.  */
3237	  add_loop (orig_loop, new_loop);
3238	}
3239    }
3240}
3241
3242/* A subroutine of expand_omp_for.  Generate code for a parallel
3243   loop with static schedule and no specified chunk size.  Given
3244   parameters:
3245
3246	for (V = N1; V cond N2; V += STEP) BODY;
3247
3248   where COND is "<" or ">", we generate pseudocode
3249
3250	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3251	if (cond is <)
3252	  adj = STEP - 1;
3253	else
3254	  adj = STEP + 1;
3255	if ((__typeof (V)) -1 > 0 && cond is >)
3256	  n = -(adj + N2 - N1) / -STEP;
3257	else
3258	  n = (adj + N2 - N1) / STEP;
3259	q = n / nthreads;
3260	tt = n % nthreads;
3261	if (threadid < tt) goto L3; else goto L4;
3262    L3:
3263	tt = 0;
3264	q = q + 1;
3265    L4:
3266	s0 = q * threadid + tt;
3267	e0 = s0 + q;
3268	V = s0 * STEP + N1;
3269	if (s0 >= e0) goto L2; else goto L0;
3270    L0:
3271	e = e0 * STEP + N1;
3272    L1:
3273	BODY;
3274	V += STEP;
3275	if (V cond e) goto L1;
3276    L2:
3277*/
3278
3279static void
3280expand_omp_for_static_nochunk (struct omp_region *region,
3281			       struct omp_for_data *fd,
3282			       gimple *inner_stmt)
3283{
3284  tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3285  tree type, itype, vmain, vback;
3286  basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3287  basic_block body_bb, cont_bb, collapse_bb = NULL;
3288  basic_block fin_bb;
3289  gimple_stmt_iterator gsi;
3290  edge ep;
3291  bool broken_loop = region->cont == NULL;
3292  tree *counts = NULL;
3293  tree n1, n2, step;
3294
3295  itype = type = TREE_TYPE (fd->loop.v);
3296  if (POINTER_TYPE_P (type))
3297    itype = signed_type_for (type);
3298
3299  entry_bb = region->entry;
3300  cont_bb = region->cont;
3301  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3302  fin_bb = BRANCH_EDGE (entry_bb)->dest;
3303  gcc_assert (broken_loop
3304	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3305  seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3306  body_bb = single_succ (seq_start_bb);
3307  if (!broken_loop)
3308    {
3309      gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3310		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3311      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3312    }
3313  exit_bb = region->exit;
3314
3315  /* Iteration space partitioning goes in ENTRY_BB.  */
3316  gsi = gsi_last_bb (entry_bb);
3317  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3318
3319  if (fd->collapse > 1)
3320    {
3321      int first_zero_iter = -1, dummy = -1;
3322      basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3323
3324      counts = XALLOCAVEC (tree, fd->collapse);
3325      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3326				  fin_bb, first_zero_iter,
3327				  dummy_bb, dummy, l2_dom_bb);
3328      t = NULL_TREE;
3329    }
3330  else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3331    t = integer_one_node;
3332  else
3333    t = fold_binary (fd->loop.cond_code, boolean_type_node,
3334		     fold_convert (type, fd->loop.n1),
3335		     fold_convert (type, fd->loop.n2));
3336  if (fd->collapse == 1
3337      && TYPE_UNSIGNED (type)
3338      && (t == NULL_TREE || !integer_onep (t)))
3339    {
3340      n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3341      n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3342				     true, GSI_SAME_STMT);
3343      n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3344      n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3345				     true, GSI_SAME_STMT);
3346      gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3347						 NULL_TREE, NULL_TREE);
3348      gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3349      if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3350		     expand_omp_regimplify_p, NULL, NULL)
3351	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3352			expand_omp_regimplify_p, NULL, NULL))
3353	{
3354	  gsi = gsi_for_stmt (cond_stmt);
3355	  gimple_regimplify_operands (cond_stmt, &gsi);
3356	}
3357      ep = split_block (entry_bb, cond_stmt);
3358      ep->flags = EDGE_TRUE_VALUE;
3359      entry_bb = ep->dest;
3360      ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3361      ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3362      ep->probability = REG_BR_PROB_BASE / 2000 - 1;
3363      if (gimple_in_ssa_p (cfun))
3364	{
3365	  int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3366	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3367	       !gsi_end_p (gpi); gsi_next (&gpi))
3368	    {
3369	      gphi *phi = gpi.phi ();
3370	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3371			   ep, UNKNOWN_LOCATION);
3372	    }
3373	}
3374      gsi = gsi_last_bb (entry_bb);
3375    }
3376
3377  switch (gimple_omp_for_kind (fd->for_stmt))
3378    {
3379    case GF_OMP_FOR_KIND_FOR:
3380      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3381      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3382      break;
3383    case GF_OMP_FOR_KIND_DISTRIBUTE:
3384      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3385      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3386      break;
3387    default:
3388      gcc_unreachable ();
3389    }
3390  nthreads = build_call_expr (nthreads, 0);
3391  nthreads = fold_convert (itype, nthreads);
3392  nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3393				       true, GSI_SAME_STMT);
3394  threadid = build_call_expr (threadid, 0);
3395  threadid = fold_convert (itype, threadid);
3396  threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3397				       true, GSI_SAME_STMT);
3398
3399  n1 = fd->loop.n1;
3400  n2 = fd->loop.n2;
3401  step = fd->loop.step;
3402  if (gimple_omp_for_combined_into_p (fd->for_stmt))
3403    {
3404      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3405				     OMP_CLAUSE__LOOPTEMP_);
3406      gcc_assert (innerc);
3407      n1 = OMP_CLAUSE_DECL (innerc);
3408      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3409				OMP_CLAUSE__LOOPTEMP_);
3410      gcc_assert (innerc);
3411      n2 = OMP_CLAUSE_DECL (innerc);
3412    }
3413  n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3414				 true, NULL_TREE, true, GSI_SAME_STMT);
3415  n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3416				 true, NULL_TREE, true, GSI_SAME_STMT);
3417  step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3418				   true, NULL_TREE, true, GSI_SAME_STMT);
3419
3420  t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3421  t = fold_build2 (PLUS_EXPR, itype, step, t);
3422  t = fold_build2 (PLUS_EXPR, itype, t, n2);
3423  t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3424  if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3425    t = fold_build2 (TRUNC_DIV_EXPR, itype,
3426		     fold_build1 (NEGATE_EXPR, itype, t),
3427		     fold_build1 (NEGATE_EXPR, itype, step));
3428  else
3429    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3430  t = fold_convert (itype, t);
3431  n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3432
3433  q = create_tmp_reg (itype, "q");
3434  t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3435  t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3436  gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3437
3438  tt = create_tmp_reg (itype, "tt");
3439  t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3440  t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3441  gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3442
3443  t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3444  gcond *cond_stmt = gimple_build_cond_empty (t);
3445  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3446
3447  second_bb = split_block (entry_bb, cond_stmt)->dest;
3448  gsi = gsi_last_bb (second_bb);
3449  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3450
3451  gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3452		     GSI_SAME_STMT);
3453  gassign *assign_stmt
3454    = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3455  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3456
3457  third_bb = split_block (second_bb, assign_stmt)->dest;
3458  gsi = gsi_last_bb (third_bb);
3459  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3460
3461  t = build2 (MULT_EXPR, itype, q, threadid);
3462  t = build2 (PLUS_EXPR, itype, t, tt);
3463  s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3464
3465  t = fold_build2 (PLUS_EXPR, itype, s0, q);
3466  e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3467
3468  t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3469  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3470
3471  /* Remove the GIMPLE_OMP_FOR statement.  */
3472  gsi_remove (&gsi, true);
3473
3474  /* Setup code for sequential iteration goes in SEQ_START_BB.  */
3475  gsi = gsi_start_bb (seq_start_bb);
3476
3477  tree startvar = fd->loop.v;
3478  tree endvar = NULL_TREE;
3479
3480  if (gimple_omp_for_combined_p (fd->for_stmt))
3481    {
3482      tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3483		     ? gimple_omp_parallel_clauses (inner_stmt)
3484		     : gimple_omp_for_clauses (inner_stmt);
3485      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3486      gcc_assert (innerc);
3487      startvar = OMP_CLAUSE_DECL (innerc);
3488      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3489				OMP_CLAUSE__LOOPTEMP_);
3490      gcc_assert (innerc);
3491      endvar = OMP_CLAUSE_DECL (innerc);
3492      if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3493	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3494	{
3495	  int i;
3496	  for (i = 1; i < fd->collapse; i++)
3497	    {
3498	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3499					OMP_CLAUSE__LOOPTEMP_);
3500	      gcc_assert (innerc);
3501	    }
3502	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3503				    OMP_CLAUSE__LOOPTEMP_);
3504	  if (innerc)
3505	    {
3506	      /* If needed (distribute parallel for with lastprivate),
3507		 propagate down the total number of iterations.  */
3508	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3509				     fd->loop.n2);
3510	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3511					    GSI_CONTINUE_LINKING);
3512	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3513	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3514	    }
3515	}
3516    }
3517  t = fold_convert (itype, s0);
3518  t = fold_build2 (MULT_EXPR, itype, t, step);
3519  if (POINTER_TYPE_P (type))
3520    t = fold_build_pointer_plus (n1, t);
3521  else
3522    t = fold_build2 (PLUS_EXPR, type, t, n1);
3523  t = fold_convert (TREE_TYPE (startvar), t);
3524  t = force_gimple_operand_gsi (&gsi, t,
3525				DECL_P (startvar)
3526				&& TREE_ADDRESSABLE (startvar),
3527				NULL_TREE, false, GSI_CONTINUE_LINKING);
3528  assign_stmt = gimple_build_assign (startvar, t);
3529  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3530
3531  t = fold_convert (itype, e0);
3532  t = fold_build2 (MULT_EXPR, itype, t, step);
3533  if (POINTER_TYPE_P (type))
3534    t = fold_build_pointer_plus (n1, t);
3535  else
3536    t = fold_build2 (PLUS_EXPR, type, t, n1);
3537  t = fold_convert (TREE_TYPE (startvar), t);
3538  e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3539				false, GSI_CONTINUE_LINKING);
3540  if (endvar)
3541    {
3542      assign_stmt = gimple_build_assign (endvar, e);
3543      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3544      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3545	assign_stmt = gimple_build_assign (fd->loop.v, e);
3546      else
3547	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3548      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3549    }
3550  /* Handle linear clause adjustments.  */
3551  tree itercnt = NULL_TREE;
3552  if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3553    for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3554	 c; c = OMP_CLAUSE_CHAIN (c))
3555      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3556	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3557	{
3558	  tree d = OMP_CLAUSE_DECL (c);
3559	  bool is_ref = omp_is_reference (d);
3560	  tree t = d, a, dest;
3561	  if (is_ref)
3562	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3563	  if (itercnt == NULL_TREE)
3564	    {
3565	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
3566		{
3567		  itercnt = fold_build2 (MINUS_EXPR, itype,
3568					 fold_convert (itype, n1),
3569					 fold_convert (itype, fd->loop.n1));
3570		  itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3571		  itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3572		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3573						      NULL_TREE, false,
3574						      GSI_CONTINUE_LINKING);
3575		}
3576	      else
3577		itercnt = s0;
3578	    }
3579	  tree type = TREE_TYPE (t);
3580	  if (POINTER_TYPE_P (type))
3581	    type = sizetype;
3582	  a = fold_build2 (MULT_EXPR, type,
3583			   fold_convert (type, itercnt),
3584			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3585	  dest = unshare_expr (t);
3586	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3587			   : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3588	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3589					false, GSI_CONTINUE_LINKING);
3590	  assign_stmt = gimple_build_assign (dest, t);
3591	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3592	}
3593  if (fd->collapse > 1)
3594    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3595
3596  if (!broken_loop)
3597    {
3598      /* The code controlling the sequential loop replaces the
3599	 GIMPLE_OMP_CONTINUE.  */
3600      gsi = gsi_last_bb (cont_bb);
3601      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3602      gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3603      vmain = gimple_omp_continue_control_use (cont_stmt);
3604      vback = gimple_omp_continue_control_def (cont_stmt);
3605
3606      if (!gimple_omp_for_combined_p (fd->for_stmt))
3607	{
3608	  if (POINTER_TYPE_P (type))
3609	    t = fold_build_pointer_plus (vmain, step);
3610	  else
3611	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
3612	  t = force_gimple_operand_gsi (&gsi, t,
3613					DECL_P (vback)
3614					&& TREE_ADDRESSABLE (vback),
3615					NULL_TREE, true, GSI_SAME_STMT);
3616	  assign_stmt = gimple_build_assign (vback, t);
3617	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3618
3619	  t = build2 (fd->loop.cond_code, boolean_type_node,
3620		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
3621		      ? t : vback, e);
3622	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3623	}
3624
3625      /* Remove the GIMPLE_OMP_CONTINUE statement.  */
3626      gsi_remove (&gsi, true);
3627
3628      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3629	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3630    }
3631
3632  /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
3633  gsi = gsi_last_bb (exit_bb);
3634  if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3635    {
3636      t = gimple_omp_return_lhs (gsi_stmt (gsi));
3637      gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3638    }
3639  gsi_remove (&gsi, true);
3640
3641  /* Connect all the blocks.  */
3642  ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3643  ep->probability = REG_BR_PROB_BASE / 4 * 3;
3644  ep = find_edge (entry_bb, second_bb);
3645  ep->flags = EDGE_TRUE_VALUE;
3646  ep->probability = REG_BR_PROB_BASE / 4;
3647  find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3648  find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3649
3650  if (!broken_loop)
3651    {
3652      ep = find_edge (cont_bb, body_bb);
3653      if (ep == NULL)
3654	{
3655	  ep = BRANCH_EDGE (cont_bb);
3656	  gcc_assert (single_succ (ep->dest) == body_bb);
3657	}
3658      if (gimple_omp_for_combined_p (fd->for_stmt))
3659	{
3660	  remove_edge (ep);
3661	  ep = NULL;
3662	}
3663      else if (fd->collapse > 1)
3664	{
3665	  remove_edge (ep);
3666	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3667	}
3668      else
3669	ep->flags = EDGE_TRUE_VALUE;
3670      find_edge (cont_bb, fin_bb)->flags
3671	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3672    }
3673
3674  set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3675  set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3676  set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3677
3678  set_immediate_dominator (CDI_DOMINATORS, body_bb,
3679			   recompute_dominator (CDI_DOMINATORS, body_bb));
3680  set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3681			   recompute_dominator (CDI_DOMINATORS, fin_bb));
3682
3683  struct loop *loop = body_bb->loop_father;
3684  if (loop != entry_bb->loop_father)
3685    {
3686      gcc_assert (broken_loop || loop->header == body_bb);
3687      gcc_assert (broken_loop
3688		  || loop->latch == region->cont
3689		  || single_pred (loop->latch) == region->cont);
3690      return;
3691    }
3692
3693  if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3694    {
3695      loop = alloc_loop ();
3696      loop->header = body_bb;
3697      if (collapse_bb == NULL)
3698	loop->latch = cont_bb;
3699      add_loop (loop, body_bb->loop_father);
3700    }
3701}
3702
3703/* Return phi in E->DEST with ARG on edge E.  */
3704
3705static gphi *
3706find_phi_with_arg_on_edge (tree arg, edge e)
3707{
3708  basic_block bb = e->dest;
3709
3710  for (gphi_iterator gpi = gsi_start_phis (bb);
3711       !gsi_end_p (gpi);
3712       gsi_next (&gpi))
3713    {
3714      gphi *phi = gpi.phi ();
3715      if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3716	return phi;
3717    }
3718
3719  return NULL;
3720}
3721
3722/* A subroutine of expand_omp_for.  Generate code for a parallel
3723   loop with static schedule and a specified chunk size.  Given
3724   parameters:
3725
3726	for (V = N1; V cond N2; V += STEP) BODY;
3727
3728   where COND is "<" or ">", we generate pseudocode
3729
3730	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3731	if (cond is <)
3732	  adj = STEP - 1;
3733	else
3734	  adj = STEP + 1;
3735	if ((__typeof (V)) -1 > 0 && cond is >)
3736	  n = -(adj + N2 - N1) / -STEP;
3737	else
3738	  n = (adj + N2 - N1) / STEP;
3739	trip = 0;
3740	V = threadid * CHUNK * STEP + N1;  -- this extra definition of V is
3741					      here so that V is defined
3742					      if the loop is not entered
3743    L0:
3744	s0 = (trip * nthreads + threadid) * CHUNK;
3745	e0 = min (s0 + CHUNK, n);
3746	if (s0 < n) goto L1; else goto L4;
3747    L1:
3748	V = s0 * STEP + N1;
3749	e = e0 * STEP + N1;
3750    L2:
3751	BODY;
3752	V += STEP;
3753	if (V cond e) goto L2; else goto L3;
3754    L3:
3755	trip += 1;
3756	goto L0;
3757    L4:
3758*/
3759
3760static void
3761expand_omp_for_static_chunk (struct omp_region *region,
3762			     struct omp_for_data *fd, gimple *inner_stmt)
3763{
3764  tree n, s0, e0, e, t;
3765  tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3766  tree type, itype, vmain, vback, vextra;
3767  basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3768  basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3769  gimple_stmt_iterator gsi;
3770  edge se;
3771  bool broken_loop = region->cont == NULL;
3772  tree *counts = NULL;
3773  tree n1, n2, step;
3774
3775  itype = type = TREE_TYPE (fd->loop.v);
3776  if (POINTER_TYPE_P (type))
3777    itype = signed_type_for (type);
3778
3779  entry_bb = region->entry;
3780  se = split_block (entry_bb, last_stmt (entry_bb));
3781  entry_bb = se->src;
3782  iter_part_bb = se->dest;
3783  cont_bb = region->cont;
3784  gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3785  fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3786  gcc_assert (broken_loop
3787	      || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3788  seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3789  body_bb = single_succ (seq_start_bb);
3790  if (!broken_loop)
3791    {
3792      gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3793		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3794      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3795      trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3796    }
3797  exit_bb = region->exit;
3798
3799  /* Trip and adjustment setup goes in ENTRY_BB.  */
3800  gsi = gsi_last_bb (entry_bb);
3801  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3802
3803  if (fd->collapse > 1)
3804    {
3805      int first_zero_iter = -1, dummy = -1;
3806      basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3807
3808      counts = XALLOCAVEC (tree, fd->collapse);
3809      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3810				  fin_bb, first_zero_iter,
3811				  dummy_bb, dummy, l2_dom_bb);
3812      t = NULL_TREE;
3813    }
3814  else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3815    t = integer_one_node;
3816  else
3817    t = fold_binary (fd->loop.cond_code, boolean_type_node,
3818		     fold_convert (type, fd->loop.n1),
3819		     fold_convert (type, fd->loop.n2));
3820  if (fd->collapse == 1
3821      && TYPE_UNSIGNED (type)
3822      && (t == NULL_TREE || !integer_onep (t)))
3823    {
3824      n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3825      n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3826				     true, GSI_SAME_STMT);
3827      n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3828      n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3829				     true, GSI_SAME_STMT);
3830      gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3831						 NULL_TREE, NULL_TREE);
3832      gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3833      if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3834		     expand_omp_regimplify_p, NULL, NULL)
3835	  || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3836			expand_omp_regimplify_p, NULL, NULL))
3837	{
3838	  gsi = gsi_for_stmt (cond_stmt);
3839	  gimple_regimplify_operands (cond_stmt, &gsi);
3840	}
3841      se = split_block (entry_bb, cond_stmt);
3842      se->flags = EDGE_TRUE_VALUE;
3843      entry_bb = se->dest;
3844      se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3845      se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3846      se->probability = REG_BR_PROB_BASE / 2000 - 1;
3847      if (gimple_in_ssa_p (cfun))
3848	{
3849	  int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3850	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3851	       !gsi_end_p (gpi); gsi_next (&gpi))
3852	    {
3853	      gphi *phi = gpi.phi ();
3854	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3855			   se, UNKNOWN_LOCATION);
3856	    }
3857	}
3858      gsi = gsi_last_bb (entry_bb);
3859    }
3860
3861  switch (gimple_omp_for_kind (fd->for_stmt))
3862    {
3863    case GF_OMP_FOR_KIND_FOR:
3864      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3865      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3866      break;
3867    case GF_OMP_FOR_KIND_DISTRIBUTE:
3868      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3869      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3870      break;
3871    default:
3872      gcc_unreachable ();
3873    }
3874  nthreads = build_call_expr (nthreads, 0);
3875  nthreads = fold_convert (itype, nthreads);
3876  nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3877				       true, GSI_SAME_STMT);
3878  threadid = build_call_expr (threadid, 0);
3879  threadid = fold_convert (itype, threadid);
3880  threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3881				       true, GSI_SAME_STMT);
3882
3883  n1 = fd->loop.n1;
3884  n2 = fd->loop.n2;
3885  step = fd->loop.step;
3886  if (gimple_omp_for_combined_into_p (fd->for_stmt))
3887    {
3888      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3889				     OMP_CLAUSE__LOOPTEMP_);
3890      gcc_assert (innerc);
3891      n1 = OMP_CLAUSE_DECL (innerc);
3892      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3893				OMP_CLAUSE__LOOPTEMP_);
3894      gcc_assert (innerc);
3895      n2 = OMP_CLAUSE_DECL (innerc);
3896    }
3897  n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3898				 true, NULL_TREE, true, GSI_SAME_STMT);
3899  n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3900				 true, NULL_TREE, true, GSI_SAME_STMT);
3901  step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3902				   true, NULL_TREE, true, GSI_SAME_STMT);
3903  tree chunk_size = fold_convert (itype, fd->chunk_size);
3904  chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3905  chunk_size
3906    = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3907				GSI_SAME_STMT);
3908
3909  t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3910  t = fold_build2 (PLUS_EXPR, itype, step, t);
3911  t = fold_build2 (PLUS_EXPR, itype, t, n2);
3912  t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3913  if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3914    t = fold_build2 (TRUNC_DIV_EXPR, itype,
3915		     fold_build1 (NEGATE_EXPR, itype, t),
3916		     fold_build1 (NEGATE_EXPR, itype, step));
3917  else
3918    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3919  t = fold_convert (itype, t);
3920  n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3921				true, GSI_SAME_STMT);
3922
3923  trip_var = create_tmp_reg (itype, ".trip");
3924  if (gimple_in_ssa_p (cfun))
3925    {
3926      trip_init = make_ssa_name (trip_var);
3927      trip_main = make_ssa_name (trip_var);
3928      trip_back = make_ssa_name (trip_var);
3929    }
3930  else
3931    {
3932      trip_init = trip_var;
3933      trip_main = trip_var;
3934      trip_back = trip_var;
3935    }
3936
3937  gassign *assign_stmt
3938    = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3939  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3940
3941  t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3942  t = fold_build2 (MULT_EXPR, itype, t, step);
3943  if (POINTER_TYPE_P (type))
3944    t = fold_build_pointer_plus (n1, t);
3945  else
3946    t = fold_build2 (PLUS_EXPR, type, t, n1);
3947  vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3948				     true, GSI_SAME_STMT);
3949
3950  /* Remove the GIMPLE_OMP_FOR.  */
3951  gsi_remove (&gsi, true);
3952
3953  gimple_stmt_iterator gsif = gsi;
3954
3955  /* Iteration space partitioning goes in ITER_PART_BB.  */
3956  gsi = gsi_last_bb (iter_part_bb);
3957
3958  t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3959  t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3960  t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3961  s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3962				 false, GSI_CONTINUE_LINKING);
3963
3964  t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3965  t = fold_build2 (MIN_EXPR, itype, t, n);
3966  e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3967				 false, GSI_CONTINUE_LINKING);
3968
3969  t = build2 (LT_EXPR, boolean_type_node, s0, n);
3970  gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3971
3972  /* Setup code for sequential iteration goes in SEQ_START_BB.  */
3973  gsi = gsi_start_bb (seq_start_bb);
3974
3975  tree startvar = fd->loop.v;
3976  tree endvar = NULL_TREE;
3977
3978  if (gimple_omp_for_combined_p (fd->for_stmt))
3979    {
3980      tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3981		     ? gimple_omp_parallel_clauses (inner_stmt)
3982		     : gimple_omp_for_clauses (inner_stmt);
3983      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3984      gcc_assert (innerc);
3985      startvar = OMP_CLAUSE_DECL (innerc);
3986      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3987				OMP_CLAUSE__LOOPTEMP_);
3988      gcc_assert (innerc);
3989      endvar = OMP_CLAUSE_DECL (innerc);
3990      if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3991	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3992	{
3993	  int i;
3994	  for (i = 1; i < fd->collapse; i++)
3995	    {
3996	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3997					OMP_CLAUSE__LOOPTEMP_);
3998	      gcc_assert (innerc);
3999	    }
4000	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4001				    OMP_CLAUSE__LOOPTEMP_);
4002	  if (innerc)
4003	    {
4004	      /* If needed (distribute parallel for with lastprivate),
4005		 propagate down the total number of iterations.  */
4006	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4007				     fd->loop.n2);
4008	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4009					    GSI_CONTINUE_LINKING);
4010	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4011	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4012	    }
4013	}
4014    }
4015
4016  t = fold_convert (itype, s0);
4017  t = fold_build2 (MULT_EXPR, itype, t, step);
4018  if (POINTER_TYPE_P (type))
4019    t = fold_build_pointer_plus (n1, t);
4020  else
4021    t = fold_build2 (PLUS_EXPR, type, t, n1);
4022  t = fold_convert (TREE_TYPE (startvar), t);
4023  t = force_gimple_operand_gsi (&gsi, t,
4024				DECL_P (startvar)
4025				&& TREE_ADDRESSABLE (startvar),
4026				NULL_TREE, false, GSI_CONTINUE_LINKING);
4027  assign_stmt = gimple_build_assign (startvar, t);
4028  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4029
4030  t = fold_convert (itype, e0);
4031  t = fold_build2 (MULT_EXPR, itype, t, step);
4032  if (POINTER_TYPE_P (type))
4033    t = fold_build_pointer_plus (n1, t);
4034  else
4035    t = fold_build2 (PLUS_EXPR, type, t, n1);
4036  t = fold_convert (TREE_TYPE (startvar), t);
4037  e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4038				false, GSI_CONTINUE_LINKING);
4039  if (endvar)
4040    {
4041      assign_stmt = gimple_build_assign (endvar, e);
4042      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4043      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4044	assign_stmt = gimple_build_assign (fd->loop.v, e);
4045      else
4046	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4047      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4048    }
4049  /* Handle linear clause adjustments.  */
4050  tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4051  if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4052    for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4053	 c; c = OMP_CLAUSE_CHAIN (c))
4054      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4055	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4056	{
4057	  tree d = OMP_CLAUSE_DECL (c);
4058	  bool is_ref = omp_is_reference (d);
4059	  tree t = d, a, dest;
4060	  if (is_ref)
4061	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4062	  tree type = TREE_TYPE (t);
4063	  if (POINTER_TYPE_P (type))
4064	    type = sizetype;
4065	  dest = unshare_expr (t);
4066	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
4067	  expand_omp_build_assign (&gsif, v, t);
4068	  if (itercnt == NULL_TREE)
4069	    {
4070	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
4071		{
4072		  itercntbias
4073		    = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4074				   fold_convert (itype, fd->loop.n1));
4075		  itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4076					     itercntbias, step);
4077		  itercntbias
4078		    = force_gimple_operand_gsi (&gsif, itercntbias, true,
4079						NULL_TREE, true,
4080						GSI_SAME_STMT);
4081		  itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4082		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4083						      NULL_TREE, false,
4084						      GSI_CONTINUE_LINKING);
4085		}
4086	      else
4087		itercnt = s0;
4088	    }
4089	  a = fold_build2 (MULT_EXPR, type,
4090			   fold_convert (type, itercnt),
4091			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4092	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4093			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4094	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4095					false, GSI_CONTINUE_LINKING);
4096	  assign_stmt = gimple_build_assign (dest, t);
4097	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4098	}
4099  if (fd->collapse > 1)
4100    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4101
4102  if (!broken_loop)
4103    {
4104      /* The code controlling the sequential loop goes in CONT_BB,
4105	 replacing the GIMPLE_OMP_CONTINUE.  */
4106      gsi = gsi_last_bb (cont_bb);
4107      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4108      vmain = gimple_omp_continue_control_use (cont_stmt);
4109      vback = gimple_omp_continue_control_def (cont_stmt);
4110
4111      if (!gimple_omp_for_combined_p (fd->for_stmt))
4112	{
4113	  if (POINTER_TYPE_P (type))
4114	    t = fold_build_pointer_plus (vmain, step);
4115	  else
4116	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
4117	  if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4118	    t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4119					  true, GSI_SAME_STMT);
4120	  assign_stmt = gimple_build_assign (vback, t);
4121	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4122
4123	  if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4124	    t = build2 (EQ_EXPR, boolean_type_node,
4125			build_int_cst (itype, 0),
4126			build_int_cst (itype, 1));
4127	  else
4128	    t = build2 (fd->loop.cond_code, boolean_type_node,
4129			DECL_P (vback) && TREE_ADDRESSABLE (vback)
4130			? t : vback, e);
4131	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4132	}
4133
4134      /* Remove GIMPLE_OMP_CONTINUE.  */
4135      gsi_remove (&gsi, true);
4136
4137      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4138	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4139
4140      /* Trip update code goes into TRIP_UPDATE_BB.  */
4141      gsi = gsi_start_bb (trip_update_bb);
4142
4143      t = build_int_cst (itype, 1);
4144      t = build2 (PLUS_EXPR, itype, trip_main, t);
4145      assign_stmt = gimple_build_assign (trip_back, t);
4146      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4147    }
4148
4149  /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
4150  gsi = gsi_last_bb (exit_bb);
4151  if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4152    {
4153      t = gimple_omp_return_lhs (gsi_stmt (gsi));
4154      gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4155    }
4156  gsi_remove (&gsi, true);
4157
4158  /* Connect the new blocks.  */
4159  find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4160  find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4161
4162  if (!broken_loop)
4163    {
4164      se = find_edge (cont_bb, body_bb);
4165      if (se == NULL)
4166	{
4167	  se = BRANCH_EDGE (cont_bb);
4168	  gcc_assert (single_succ (se->dest) == body_bb);
4169	}
4170      if (gimple_omp_for_combined_p (fd->for_stmt))
4171	{
4172	  remove_edge (se);
4173	  se = NULL;
4174	}
4175      else if (fd->collapse > 1)
4176	{
4177	  remove_edge (se);
4178	  se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4179	}
4180      else
4181	se->flags = EDGE_TRUE_VALUE;
4182      find_edge (cont_bb, trip_update_bb)->flags
4183	= se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4184
4185      redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4186				iter_part_bb);
4187    }
4188
4189  if (gimple_in_ssa_p (cfun))
4190    {
4191      gphi_iterator psi;
4192      gphi *phi;
4193      edge re, ene;
4194      edge_var_map *vm;
4195      size_t i;
4196
4197      gcc_assert (fd->collapse == 1 && !broken_loop);
4198
4199      /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4200	 remove arguments of the phi nodes in fin_bb.  We need to create
4201	 appropriate phi nodes in iter_part_bb instead.  */
4202      se = find_edge (iter_part_bb, fin_bb);
4203      re = single_succ_edge (trip_update_bb);
4204      vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4205      ene = single_succ_edge (entry_bb);
4206
4207      psi = gsi_start_phis (fin_bb);
4208      for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4209	   gsi_next (&psi), ++i)
4210	{
4211	  gphi *nphi;
4212	  source_location locus;
4213
4214	  phi = psi.phi ();
4215	  t = gimple_phi_result (phi);
4216	  gcc_assert (t == redirect_edge_var_map_result (vm));
4217
4218	  if (!single_pred_p (fin_bb))
4219	    t = copy_ssa_name (t, phi);
4220
4221	  nphi = create_phi_node (t, iter_part_bb);
4222
4223	  t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4224	  locus = gimple_phi_arg_location_from_edge (phi, se);
4225
4226	  /* A special case -- fd->loop.v is not yet computed in
4227	     iter_part_bb, we need to use vextra instead.  */
4228	  if (t == fd->loop.v)
4229	    t = vextra;
4230	  add_phi_arg (nphi, t, ene, locus);
4231	  locus = redirect_edge_var_map_location (vm);
4232	  tree back_arg = redirect_edge_var_map_def (vm);
4233	  add_phi_arg (nphi, back_arg, re, locus);
4234	  edge ce = find_edge (cont_bb, body_bb);
4235	  if (ce == NULL)
4236	    {
4237	      ce = BRANCH_EDGE (cont_bb);
4238	      gcc_assert (single_succ (ce->dest) == body_bb);
4239	      ce = single_succ_edge (ce->dest);
4240	    }
4241	  gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4242	  gcc_assert (inner_loop_phi != NULL);
4243	  add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4244		       find_edge (seq_start_bb, body_bb), locus);
4245
4246	  if (!single_pred_p (fin_bb))
4247	    add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4248	}
4249      gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4250      redirect_edge_var_map_clear (re);
4251      if (single_pred_p (fin_bb))
4252	while (1)
4253	  {
4254	    psi = gsi_start_phis (fin_bb);
4255	    if (gsi_end_p (psi))
4256	      break;
4257	    remove_phi_node (&psi, false);
4258	  }
4259
4260      /* Make phi node for trip.  */
4261      phi = create_phi_node (trip_main, iter_part_bb);
4262      add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4263		   UNKNOWN_LOCATION);
4264      add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4265		   UNKNOWN_LOCATION);
4266    }
4267
4268  if (!broken_loop)
4269    set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4270  set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4271			   recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4272  set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4273			   recompute_dominator (CDI_DOMINATORS, fin_bb));
4274  set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4275			   recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4276  set_immediate_dominator (CDI_DOMINATORS, body_bb,
4277			   recompute_dominator (CDI_DOMINATORS, body_bb));
4278
4279  if (!broken_loop)
4280    {
4281      struct loop *loop = body_bb->loop_father;
4282      struct loop *trip_loop = alloc_loop ();
4283      trip_loop->header = iter_part_bb;
4284      trip_loop->latch = trip_update_bb;
4285      add_loop (trip_loop, iter_part_bb->loop_father);
4286
4287      if (loop != entry_bb->loop_father)
4288	{
4289	  gcc_assert (loop->header == body_bb);
4290	  gcc_assert (loop->latch == region->cont
4291		      || single_pred (loop->latch) == region->cont);
4292	  trip_loop->inner = loop;
4293	  return;
4294	}
4295
4296      if (!gimple_omp_for_combined_p (fd->for_stmt))
4297	{
4298	  loop = alloc_loop ();
4299	  loop->header = body_bb;
4300	  if (collapse_bb == NULL)
4301	    loop->latch = cont_bb;
4302	  add_loop (loop, trip_loop);
4303	}
4304    }
4305}
4306
4307/* A subroutine of expand_omp_for.  Generate code for _Cilk_for loop.
4308   Given parameters:
4309   for (V = N1; V cond N2; V += STEP) BODY;
4310
4311   where COND is "<" or ">" or "!=", we generate pseudocode
4312
4313   for (ind_var = low; ind_var < high; ind_var++)
4314     {
4315       V = n1 + (ind_var * STEP)
4316
4317       <BODY>
4318     }
4319
4320   In the above pseudocode, low and high are function parameters of the
4321   child function.  In the function below, we are inserting a temp.
4322   variable that will be making a call to two OMP functions that will not be
4323   found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4324   with _Cilk_for).  These functions are replaced with low and high
4325   by the function that handles taskreg.  */
4326
4327
4328static void
4329expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
4330{
4331  bool broken_loop = region->cont == NULL;
4332  basic_block entry_bb = region->entry;
4333  basic_block cont_bb = region->cont;
4334
4335  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4336  gcc_assert (broken_loop
4337	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4338  basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4339  basic_block l1_bb, l2_bb;
4340
4341  if (!broken_loop)
4342    {
4343      gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4344      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4345      l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4346      l2_bb = BRANCH_EDGE (entry_bb)->dest;
4347    }
4348  else
4349    {
4350      BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4351      l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4352      l2_bb = single_succ (l1_bb);
4353    }
4354  basic_block exit_bb = region->exit;
4355  basic_block l2_dom_bb = NULL;
4356
4357  gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
4358
4359  /* Below statements until the "tree high_val = ..." are pseudo statements
4360     used to pass information to be used by expand_omp_taskreg.
4361     low_val and high_val will be replaced by the __low and __high
4362     parameter from the child function.
4363
4364     The call_exprs part is a place-holder, it is mainly used
4365     to distinctly identify to the top-level part that this is
4366     where we should put low and high (reasoning given in header
4367     comment).  */
4368
4369  gomp_parallel *par_stmt
4370    = as_a <gomp_parallel *> (last_stmt (region->outer->entry));
4371  tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt);
4372  tree t, low_val = NULL_TREE, high_val = NULL_TREE;
4373  for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
4374    {
4375      if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high"))
4376	high_val = t;
4377      else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low"))
4378	low_val = t;
4379    }
4380  gcc_assert (low_val && high_val);
4381
4382  tree type = TREE_TYPE (low_val);
4383  tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
4384  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4385
4386  /* Not needed in SSA form right now.  */
4387  gcc_assert (!gimple_in_ssa_p (cfun));
4388  if (l2_dom_bb == NULL)
4389    l2_dom_bb = l1_bb;
4390
4391  tree n1 = low_val;
4392  tree n2 = high_val;
4393
4394  gimple *stmt = gimple_build_assign (ind_var, n1);
4395
4396  /* Replace the GIMPLE_OMP_FOR statement.  */
4397  gsi_replace (&gsi, stmt, true);
4398
4399  if (!broken_loop)
4400    {
4401      /* Code to control the increment goes in the CONT_BB.  */
4402      gsi = gsi_last_bb (cont_bb);
4403      stmt = gsi_stmt (gsi);
4404      gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4405      stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
4406				  build_one_cst (type));
4407
4408      /* Replace GIMPLE_OMP_CONTINUE.  */
4409      gsi_replace (&gsi, stmt, true);
4410    }
4411
4412  /* Emit the condition in L1_BB.  */
4413  gsi = gsi_after_labels (l1_bb);
4414  t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
4415		   fold_convert (TREE_TYPE (fd->loop.step), ind_var),
4416		   fd->loop.step);
4417  if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
4418    t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4419		     fd->loop.n1, fold_convert (sizetype, t));
4420  else
4421    t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4422		     fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
4423  t = fold_convert (TREE_TYPE (fd->loop.v), t);
4424  expand_omp_build_assign (&gsi, fd->loop.v, t);
4425
4426  /* The condition is always '<' since the runtime will fill in the low
4427     and high values.  */
4428  stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
4429  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4430
4431  /* Remove GIMPLE_OMP_RETURN.  */
4432  gsi = gsi_last_bb (exit_bb);
4433  gsi_remove (&gsi, true);
4434
4435  /* Connect the new blocks.  */
4436  remove_edge (FALLTHRU_EDGE (entry_bb));
4437
4438  edge e, ne;
4439  if (!broken_loop)
4440    {
4441      remove_edge (BRANCH_EDGE (entry_bb));
4442      make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4443
4444      e = BRANCH_EDGE (l1_bb);
4445      ne = FALLTHRU_EDGE (l1_bb);
4446      e->flags = EDGE_TRUE_VALUE;
4447    }
4448  else
4449    {
4450      single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4451
4452      ne = single_succ_edge (l1_bb);
4453      e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4454
4455    }
4456  ne->flags = EDGE_FALSE_VALUE;
4457  e->probability = REG_BR_PROB_BASE * 7 / 8;
4458  ne->probability = REG_BR_PROB_BASE / 8;
4459
4460  set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4461  set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4462  set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4463
4464  if (!broken_loop)
4465    {
4466      struct loop *loop = alloc_loop ();
4467      loop->header = l1_bb;
4468      loop->latch = cont_bb;
4469      add_loop (loop, l1_bb->loop_father);
4470      loop->safelen = INT_MAX;
4471    }
4472
4473  /* Pick the correct library function based on the precision of the
4474     induction variable type.  */
4475  tree lib_fun = NULL_TREE;
4476  if (TYPE_PRECISION (type) == 32)
4477    lib_fun = cilk_for_32_fndecl;
4478  else if (TYPE_PRECISION (type) == 64)
4479    lib_fun = cilk_for_64_fndecl;
4480  else
4481    gcc_unreachable ();
4482
4483  gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
4484
4485  /* WS_ARGS contains the library function flavor to call:
4486     __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4487     user-defined grain value.  If the user does not define one, then zero
4488     is passed in by the parser.  */
4489  vec_alloc (region->ws_args, 2);
4490  region->ws_args->quick_push (lib_fun);
4491  region->ws_args->quick_push (fd->chunk_size);
4492}
4493
4494/* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
4495   loop.  Given parameters:
4496
4497	for (V = N1; V cond N2; V += STEP) BODY;
4498
4499   where COND is "<" or ">", we generate pseudocode
4500
4501	V = N1;
4502	goto L1;
4503    L0:
4504	BODY;
4505	V += STEP;
4506    L1:
4507	if (V cond N2) goto L0; else goto L2;
4508    L2:
4509
4510    For collapsed loops, given parameters:
4511      collapse(3)
4512      for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4513	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4514	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4515	    BODY;
4516
4517    we generate pseudocode
4518
4519	if (cond3 is <)
4520	  adj = STEP3 - 1;
4521	else
4522	  adj = STEP3 + 1;
4523	count3 = (adj + N32 - N31) / STEP3;
4524	if (cond2 is <)
4525	  adj = STEP2 - 1;
4526	else
4527	  adj = STEP2 + 1;
4528	count2 = (adj + N22 - N21) / STEP2;
4529	if (cond1 is <)
4530	  adj = STEP1 - 1;
4531	else
4532	  adj = STEP1 + 1;
4533	count1 = (adj + N12 - N11) / STEP1;
4534	count = count1 * count2 * count3;
4535	V = 0;
4536	V1 = N11;
4537	V2 = N21;
4538	V3 = N31;
4539	goto L1;
4540    L0:
4541	BODY;
4542	V += 1;
4543	V3 += STEP3;
4544	V2 += (V3 cond3 N32) ? 0 : STEP2;
4545	V3 = (V3 cond3 N32) ? V3 : N31;
4546	V1 += (V2 cond2 N22) ? 0 : STEP1;
4547	V2 = (V2 cond2 N22) ? V2 : N21;
4548    L1:
4549	if (V < count) goto L0; else goto L2;
4550    L2:
4551
4552      */
4553
4554static void
4555expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4556{
4557  tree type, t;
4558  basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4559  gimple_stmt_iterator gsi;
4560  gimple *stmt;
4561  gcond *cond_stmt;
4562  bool broken_loop = region->cont == NULL;
4563  edge e, ne;
4564  tree *counts = NULL;
4565  int i;
4566  int safelen_int = INT_MAX;
4567  tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4568				  OMP_CLAUSE_SAFELEN);
4569  tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4570				  OMP_CLAUSE__SIMDUID_);
4571  tree n1, n2;
4572
4573  if (safelen)
4574    {
4575      safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4576      if (TREE_CODE (safelen) != INTEGER_CST)
4577	safelen_int = 0;
4578      else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4579	safelen_int = tree_to_uhwi (safelen);
4580      if (safelen_int == 1)
4581	safelen_int = 0;
4582    }
4583  type = TREE_TYPE (fd->loop.v);
4584  entry_bb = region->entry;
4585  cont_bb = region->cont;
4586  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4587  gcc_assert (broken_loop
4588	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4589  l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4590  if (!broken_loop)
4591    {
4592      gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4593      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4594      l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4595      l2_bb = BRANCH_EDGE (entry_bb)->dest;
4596    }
4597  else
4598    {
4599      BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4600      l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4601      l2_bb = single_succ (l1_bb);
4602    }
4603  exit_bb = region->exit;
4604  l2_dom_bb = NULL;
4605
4606  gsi = gsi_last_bb (entry_bb);
4607
4608  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4609  /* Not needed in SSA form right now.  */
4610  gcc_assert (!gimple_in_ssa_p (cfun));
4611  if (fd->collapse > 1)
4612    {
4613      int first_zero_iter = -1, dummy = -1;
4614      basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4615
4616      counts = XALLOCAVEC (tree, fd->collapse);
4617      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4618				  zero_iter_bb, first_zero_iter,
4619				  dummy_bb, dummy, l2_dom_bb);
4620    }
4621  if (l2_dom_bb == NULL)
4622    l2_dom_bb = l1_bb;
4623
4624  n1 = fd->loop.n1;
4625  n2 = fd->loop.n2;
4626  if (gimple_omp_for_combined_into_p (fd->for_stmt))
4627    {
4628      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4629				     OMP_CLAUSE__LOOPTEMP_);
4630      gcc_assert (innerc);
4631      n1 = OMP_CLAUSE_DECL (innerc);
4632      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4633				OMP_CLAUSE__LOOPTEMP_);
4634      gcc_assert (innerc);
4635      n2 = OMP_CLAUSE_DECL (innerc);
4636    }
4637  tree step = fd->loop.step;
4638
4639  bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4640				  OMP_CLAUSE__SIMT_);
4641  if (is_simt)
4642    {
4643      cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4644      is_simt = safelen_int > 1;
4645    }
4646  tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4647  if (is_simt)
4648    {
4649      simt_lane = create_tmp_var (unsigned_type_node);
4650      gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4651      gimple_call_set_lhs (g, simt_lane);
4652      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4653      tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4654				 fold_convert (TREE_TYPE (step), simt_lane));
4655      n1 = fold_convert (type, n1);
4656      if (POINTER_TYPE_P (type))
4657	n1 = fold_build_pointer_plus (n1, offset);
4658      else
4659	n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4660
4661      /* Collapsed loops not handled for SIMT yet: limit to one lane only.  */
4662      if (fd->collapse > 1)
4663	simt_maxlane = build_one_cst (unsigned_type_node);
4664      else if (safelen_int < omp_max_simt_vf ())
4665	simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4666      tree vf
4667	= build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4668					unsigned_type_node, 0);
4669      if (simt_maxlane)
4670	vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4671      vf = fold_convert (TREE_TYPE (step), vf);
4672      step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4673    }
4674
4675  expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4676  if (fd->collapse > 1)
4677    {
4678      if (gimple_omp_for_combined_into_p (fd->for_stmt))
4679	{
4680	  gsi_prev (&gsi);
4681	  expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4682	  gsi_next (&gsi);
4683	}
4684      else
4685	for (i = 0; i < fd->collapse; i++)
4686	  {
4687	    tree itype = TREE_TYPE (fd->loops[i].v);
4688	    if (POINTER_TYPE_P (itype))
4689	      itype = signed_type_for (itype);
4690	    t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4691	    expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4692	  }
4693    }
4694
4695  /* Remove the GIMPLE_OMP_FOR statement.  */
4696  gsi_remove (&gsi, true);
4697
4698  if (!broken_loop)
4699    {
4700      /* Code to control the increment goes in the CONT_BB.  */
4701      gsi = gsi_last_bb (cont_bb);
4702      stmt = gsi_stmt (gsi);
4703      gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4704
4705      if (POINTER_TYPE_P (type))
4706	t = fold_build_pointer_plus (fd->loop.v, step);
4707      else
4708	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4709      expand_omp_build_assign (&gsi, fd->loop.v, t);
4710
4711      if (fd->collapse > 1)
4712	{
4713	  i = fd->collapse - 1;
4714	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4715	    {
4716	      t = fold_convert (sizetype, fd->loops[i].step);
4717	      t = fold_build_pointer_plus (fd->loops[i].v, t);
4718	    }
4719	  else
4720	    {
4721	      t = fold_convert (TREE_TYPE (fd->loops[i].v),
4722				fd->loops[i].step);
4723	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4724			       fd->loops[i].v, t);
4725	    }
4726	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4727
4728	  for (i = fd->collapse - 1; i > 0; i--)
4729	    {
4730	      tree itype = TREE_TYPE (fd->loops[i].v);
4731	      tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4732	      if (POINTER_TYPE_P (itype2))
4733		itype2 = signed_type_for (itype2);
4734	      t = fold_convert (itype2, fd->loops[i - 1].step);
4735	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4736					    GSI_SAME_STMT);
4737	      t = build3 (COND_EXPR, itype2,
4738			  build2 (fd->loops[i].cond_code, boolean_type_node,
4739				  fd->loops[i].v,
4740				  fold_convert (itype, fd->loops[i].n2)),
4741			  build_int_cst (itype2, 0), t);
4742	      if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4743		t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4744	      else
4745		t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4746	      expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4747
4748	      t = fold_convert (itype, fd->loops[i].n1);
4749	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4750					    GSI_SAME_STMT);
4751	      t = build3 (COND_EXPR, itype,
4752			  build2 (fd->loops[i].cond_code, boolean_type_node,
4753				  fd->loops[i].v,
4754				  fold_convert (itype, fd->loops[i].n2)),
4755			  fd->loops[i].v, t);
4756	      expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4757	    }
4758	}
4759
4760      /* Remove GIMPLE_OMP_CONTINUE.  */
4761      gsi_remove (&gsi, true);
4762    }
4763
4764  /* Emit the condition in L1_BB.  */
4765  gsi = gsi_start_bb (l1_bb);
4766
4767  t = fold_convert (type, n2);
4768  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4769				false, GSI_CONTINUE_LINKING);
4770  tree v = fd->loop.v;
4771  if (DECL_P (v) && TREE_ADDRESSABLE (v))
4772    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4773				  false, GSI_CONTINUE_LINKING);
4774  t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4775  cond_stmt = gimple_build_cond_empty (t);
4776  gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4777  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4778		 NULL, NULL)
4779      || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4780		    NULL, NULL))
4781    {
4782      gsi = gsi_for_stmt (cond_stmt);
4783      gimple_regimplify_operands (cond_stmt, &gsi);
4784    }
4785
4786  /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop.  */
4787  if (is_simt)
4788    {
4789      gsi = gsi_start_bb (l2_bb);
4790      step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4791      if (POINTER_TYPE_P (type))
4792	t = fold_build_pointer_plus (fd->loop.v, step);
4793      else
4794	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4795      expand_omp_build_assign (&gsi, fd->loop.v, t);
4796    }
4797
4798  /* Remove GIMPLE_OMP_RETURN.  */
4799  gsi = gsi_last_bb (exit_bb);
4800  gsi_remove (&gsi, true);
4801
4802  /* Connect the new blocks.  */
4803  remove_edge (FALLTHRU_EDGE (entry_bb));
4804
4805  if (!broken_loop)
4806    {
4807      remove_edge (BRANCH_EDGE (entry_bb));
4808      make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4809
4810      e = BRANCH_EDGE (l1_bb);
4811      ne = FALLTHRU_EDGE (l1_bb);
4812      e->flags = EDGE_TRUE_VALUE;
4813    }
4814  else
4815    {
4816      single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4817
4818      ne = single_succ_edge (l1_bb);
4819      e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4820
4821    }
4822  ne->flags = EDGE_FALSE_VALUE;
4823  e->probability = REG_BR_PROB_BASE * 7 / 8;
4824  ne->probability = REG_BR_PROB_BASE / 8;
4825
4826  set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4827  set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4828
4829  if (simt_maxlane)
4830    {
4831      cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4832				     NULL_TREE, NULL_TREE);
4833      gsi = gsi_last_bb (entry_bb);
4834      gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4835      make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4836      FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4837      FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8;
4838      BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8;
4839      l2_dom_bb = entry_bb;
4840    }
4841  set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4842
4843  if (!broken_loop)
4844    {
4845      struct loop *loop = alloc_loop ();
4846      loop->header = l1_bb;
4847      loop->latch = cont_bb;
4848      add_loop (loop, l1_bb->loop_father);
4849      loop->safelen = safelen_int;
4850      if (simduid)
4851	{
4852	  loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4853	  cfun->has_simduid_loops = true;
4854	}
4855      /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4856	 the loop.  */
4857      if ((flag_tree_loop_vectorize
4858	   || (!global_options_set.x_flag_tree_loop_vectorize
4859	       && !global_options_set.x_flag_tree_vectorize))
4860	  && flag_tree_loop_optimize
4861	  && loop->safelen > 1)
4862	{
4863	  loop->force_vectorize = true;
4864	  cfun->has_force_vectorize_loops = true;
4865	}
4866    }
4867  else if (simduid)
4868    cfun->has_simduid_loops = true;
4869}
4870
4871/* Taskloop construct is represented after gimplification with
4872   two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4873   in between them.  This routine expands the outer GIMPLE_OMP_FOR,
4874   which should just compute all the needed loop temporaries
4875   for GIMPLE_OMP_TASK.  */
4876
4877static void
4878expand_omp_taskloop_for_outer (struct omp_region *region,
4879			       struct omp_for_data *fd,
4880			       gimple *inner_stmt)
4881{
4882  tree type, bias = NULL_TREE;
4883  basic_block entry_bb, cont_bb, exit_bb;
4884  gimple_stmt_iterator gsi;
4885  gassign *assign_stmt;
4886  tree *counts = NULL;
4887  int i;
4888
4889  gcc_assert (inner_stmt);
4890  gcc_assert (region->cont);
4891  gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4892	      && gimple_omp_task_taskloop_p (inner_stmt));
4893  type = TREE_TYPE (fd->loop.v);
4894
4895  /* See if we need to bias by LLONG_MIN.  */
4896  if (fd->iter_type == long_long_unsigned_type_node
4897      && TREE_CODE (type) == INTEGER_TYPE
4898      && !TYPE_UNSIGNED (type))
4899    {
4900      tree n1, n2;
4901
4902      if (fd->loop.cond_code == LT_EXPR)
4903	{
4904	  n1 = fd->loop.n1;
4905	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4906	}
4907      else
4908	{
4909	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4910	  n2 = fd->loop.n1;
4911	}
4912      if (TREE_CODE (n1) != INTEGER_CST
4913	  || TREE_CODE (n2) != INTEGER_CST
4914	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4915	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4916    }
4917
4918  entry_bb = region->entry;
4919  cont_bb = region->cont;
4920  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4921  gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4922  exit_bb = region->exit;
4923
4924  gsi = gsi_last_bb (entry_bb);
4925  gimple *for_stmt = gsi_stmt (gsi);
4926  gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4927  if (fd->collapse > 1)
4928    {
4929      int first_zero_iter = -1, dummy = -1;
4930      basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4931
4932      counts = XALLOCAVEC (tree, fd->collapse);
4933      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4934				  zero_iter_bb, first_zero_iter,
4935				  dummy_bb, dummy, l2_dom_bb);
4936
4937      if (zero_iter_bb)
4938	{
4939	  /* Some counts[i] vars might be uninitialized if
4940	     some loop has zero iterations.  But the body shouldn't
4941	     be executed in that case, so just avoid uninit warnings.  */
4942	  for (i = first_zero_iter; i < fd->collapse; i++)
4943	    if (SSA_VAR_P (counts[i]))
4944	      TREE_NO_WARNING (counts[i]) = 1;
4945	  gsi_prev (&gsi);
4946	  edge e = split_block (entry_bb, gsi_stmt (gsi));
4947	  entry_bb = e->dest;
4948	  make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4949	  gsi = gsi_last_bb (entry_bb);
4950	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4951				   get_immediate_dominator (CDI_DOMINATORS,
4952							    zero_iter_bb));
4953	}
4954    }
4955
4956  tree t0, t1;
4957  t1 = fd->loop.n2;
4958  t0 = fd->loop.n1;
4959  if (POINTER_TYPE_P (TREE_TYPE (t0))
4960      && TYPE_PRECISION (TREE_TYPE (t0))
4961	 != TYPE_PRECISION (fd->iter_type))
4962    {
4963      /* Avoid casting pointers to integer of a different size.  */
4964      tree itype = signed_type_for (type);
4965      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4966      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4967    }
4968  else
4969    {
4970      t1 = fold_convert (fd->iter_type, t1);
4971      t0 = fold_convert (fd->iter_type, t0);
4972    }
4973  if (bias)
4974    {
4975      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4976      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4977    }
4978
4979  tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4980				 OMP_CLAUSE__LOOPTEMP_);
4981  gcc_assert (innerc);
4982  tree startvar = OMP_CLAUSE_DECL (innerc);
4983  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4984  gcc_assert (innerc);
4985  tree endvar = OMP_CLAUSE_DECL (innerc);
4986  if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4987    {
4988      gcc_assert (innerc);
4989      for (i = 1; i < fd->collapse; i++)
4990	{
4991	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4992				    OMP_CLAUSE__LOOPTEMP_);
4993	  gcc_assert (innerc);
4994	}
4995      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4996				OMP_CLAUSE__LOOPTEMP_);
4997      if (innerc)
4998	{
4999	  /* If needed (inner taskloop has lastprivate clause), propagate
5000	     down the total number of iterations.  */
5001	  tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5002					     NULL_TREE, false,
5003					     GSI_CONTINUE_LINKING);
5004	  assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5005	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5006	}
5007    }
5008
5009  t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5010				 GSI_CONTINUE_LINKING);
5011  assign_stmt = gimple_build_assign (startvar, t0);
5012  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5013
5014  t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5015				 GSI_CONTINUE_LINKING);
5016  assign_stmt = gimple_build_assign (endvar, t1);
5017  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5018  if (fd->collapse > 1)
5019    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5020
5021  /* Remove the GIMPLE_OMP_FOR statement.  */
5022  gsi = gsi_for_stmt (for_stmt);
5023  gsi_remove (&gsi, true);
5024
5025  gsi = gsi_last_bb (cont_bb);
5026  gsi_remove (&gsi, true);
5027
5028  gsi = gsi_last_bb (exit_bb);
5029  gsi_remove (&gsi, true);
5030
5031  FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
5032  remove_edge (BRANCH_EDGE (entry_bb));
5033  FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE;
5034  remove_edge (BRANCH_EDGE (cont_bb));
5035  set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5036  set_immediate_dominator (CDI_DOMINATORS, region->entry,
5037			   recompute_dominator (CDI_DOMINATORS, region->entry));
5038}
5039
5040/* Taskloop construct is represented after gimplification with
5041   two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5042   in between them.  This routine expands the inner GIMPLE_OMP_FOR.
5043   GOMP_taskloop{,_ull} function arranges for each task to be given just
5044   a single range of iterations.  */
5045
5046static void
5047expand_omp_taskloop_for_inner (struct omp_region *region,
5048			       struct omp_for_data *fd,
5049			       gimple *inner_stmt)
5050{
5051  tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5052  basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5053  basic_block fin_bb;
5054  gimple_stmt_iterator gsi;
5055  edge ep;
5056  bool broken_loop = region->cont == NULL;
5057  tree *counts = NULL;
5058  tree n1, n2, step;
5059
5060  itype = type = TREE_TYPE (fd->loop.v);
5061  if (POINTER_TYPE_P (type))
5062    itype = signed_type_for (type);
5063
5064  /* See if we need to bias by LLONG_MIN.  */
5065  if (fd->iter_type == long_long_unsigned_type_node
5066      && TREE_CODE (type) == INTEGER_TYPE
5067      && !TYPE_UNSIGNED (type))
5068    {
5069      tree n1, n2;
5070
5071      if (fd->loop.cond_code == LT_EXPR)
5072	{
5073	  n1 = fd->loop.n1;
5074	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5075	}
5076      else
5077	{
5078	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5079	  n2 = fd->loop.n1;
5080	}
5081      if (TREE_CODE (n1) != INTEGER_CST
5082	  || TREE_CODE (n2) != INTEGER_CST
5083	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5084	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5085    }
5086
5087  entry_bb = region->entry;
5088  cont_bb = region->cont;
5089  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5090  fin_bb = BRANCH_EDGE (entry_bb)->dest;
5091  gcc_assert (broken_loop
5092	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5093  body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5094  if (!broken_loop)
5095    {
5096      gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5097      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5098    }
5099  exit_bb = region->exit;
5100
5101  /* Iteration space partitioning goes in ENTRY_BB.  */
5102  gsi = gsi_last_bb (entry_bb);
5103  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5104
5105  if (fd->collapse > 1)
5106    {
5107      int first_zero_iter = -1, dummy = -1;
5108      basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5109
5110      counts = XALLOCAVEC (tree, fd->collapse);
5111      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5112				  fin_bb, first_zero_iter,
5113				  dummy_bb, dummy, l2_dom_bb);
5114      t = NULL_TREE;
5115    }
5116  else
5117    t = integer_one_node;
5118
5119  step = fd->loop.step;
5120  tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5121				 OMP_CLAUSE__LOOPTEMP_);
5122  gcc_assert (innerc);
5123  n1 = OMP_CLAUSE_DECL (innerc);
5124  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5125  gcc_assert (innerc);
5126  n2 = OMP_CLAUSE_DECL (innerc);
5127  if (bias)
5128    {
5129      n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5130      n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5131    }
5132  n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5133				 true, NULL_TREE, true, GSI_SAME_STMT);
5134  n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5135				 true, NULL_TREE, true, GSI_SAME_STMT);
5136  step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5137				   true, NULL_TREE, true, GSI_SAME_STMT);
5138
5139  tree startvar = fd->loop.v;
5140  tree endvar = NULL_TREE;
5141
5142  if (gimple_omp_for_combined_p (fd->for_stmt))
5143    {
5144      tree clauses = gimple_omp_for_clauses (inner_stmt);
5145      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5146      gcc_assert (innerc);
5147      startvar = OMP_CLAUSE_DECL (innerc);
5148      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5149				OMP_CLAUSE__LOOPTEMP_);
5150      gcc_assert (innerc);
5151      endvar = OMP_CLAUSE_DECL (innerc);
5152    }
5153  t = fold_convert (TREE_TYPE (startvar), n1);
5154  t = force_gimple_operand_gsi (&gsi, t,
5155				DECL_P (startvar)
5156				&& TREE_ADDRESSABLE (startvar),
5157				NULL_TREE, false, GSI_CONTINUE_LINKING);
5158  gimple *assign_stmt = gimple_build_assign (startvar, t);
5159  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5160
5161  t = fold_convert (TREE_TYPE (startvar), n2);
5162  e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5163				false, GSI_CONTINUE_LINKING);
5164  if (endvar)
5165    {
5166      assign_stmt = gimple_build_assign (endvar, e);
5167      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5168      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5169	assign_stmt = gimple_build_assign (fd->loop.v, e);
5170      else
5171	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5172      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5173    }
5174  if (fd->collapse > 1)
5175    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5176
5177  if (!broken_loop)
5178    {
5179      /* The code controlling the sequential loop replaces the
5180	 GIMPLE_OMP_CONTINUE.  */
5181      gsi = gsi_last_bb (cont_bb);
5182      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5183      gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5184      vmain = gimple_omp_continue_control_use (cont_stmt);
5185      vback = gimple_omp_continue_control_def (cont_stmt);
5186
5187      if (!gimple_omp_for_combined_p (fd->for_stmt))
5188	{
5189	  if (POINTER_TYPE_P (type))
5190	    t = fold_build_pointer_plus (vmain, step);
5191	  else
5192	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
5193	  t = force_gimple_operand_gsi (&gsi, t,
5194					DECL_P (vback)
5195					&& TREE_ADDRESSABLE (vback),
5196					NULL_TREE, true, GSI_SAME_STMT);
5197	  assign_stmt = gimple_build_assign (vback, t);
5198	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5199
5200	  t = build2 (fd->loop.cond_code, boolean_type_node,
5201		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
5202		      ? t : vback, e);
5203	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5204	}
5205
5206      /* Remove the GIMPLE_OMP_CONTINUE statement.  */
5207      gsi_remove (&gsi, true);
5208
5209      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5210	collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5211    }
5212
5213  /* Remove the GIMPLE_OMP_FOR statement.  */
5214  gsi = gsi_for_stmt (fd->for_stmt);
5215  gsi_remove (&gsi, true);
5216
5217  /* Remove the GIMPLE_OMP_RETURN statement.  */
5218  gsi = gsi_last_bb (exit_bb);
5219  gsi_remove (&gsi, true);
5220
5221  FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
5222  if (!broken_loop)
5223    remove_edge (BRANCH_EDGE (entry_bb));
5224  else
5225    {
5226      remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5227      region->outer->cont = NULL;
5228    }
5229
5230  /* Connect all the blocks.  */
5231  if (!broken_loop)
5232    {
5233      ep = find_edge (cont_bb, body_bb);
5234      if (gimple_omp_for_combined_p (fd->for_stmt))
5235	{
5236	  remove_edge (ep);
5237	  ep = NULL;
5238	}
5239      else if (fd->collapse > 1)
5240	{
5241	  remove_edge (ep);
5242	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5243	}
5244      else
5245	ep->flags = EDGE_TRUE_VALUE;
5246      find_edge (cont_bb, fin_bb)->flags
5247	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5248    }
5249
5250  set_immediate_dominator (CDI_DOMINATORS, body_bb,
5251			   recompute_dominator (CDI_DOMINATORS, body_bb));
5252  if (!broken_loop)
5253    set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5254			     recompute_dominator (CDI_DOMINATORS, fin_bb));
5255
5256  if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5257    {
5258      struct loop *loop = alloc_loop ();
5259      loop->header = body_bb;
5260      if (collapse_bb == NULL)
5261	loop->latch = cont_bb;
5262      add_loop (loop, body_bb->loop_father);
5263    }
5264}
5265
5266/* A subroutine of expand_omp_for.  Generate code for an OpenACC
5267   partitioned loop.  The lowering here is abstracted, in that the
5268   loop parameters are passed through internal functions, which are
5269   further lowered by oacc_device_lower, once we get to the target
5270   compiler.  The loop is of the form:
5271
5272   for (V = B; V LTGT E; V += S) {BODY}
5273
5274   where LTGT is < or >.  We may have a specified chunking size, CHUNKING
5275   (constant 0 for no chunking) and we will have a GWV partitioning
5276   mask, specifying dimensions over which the loop is to be
5277   partitioned (see note below).  We generate code that looks like
5278   (this ignores tiling):
5279
5280   <entry_bb> [incoming FALL->body, BRANCH->exit]
5281     typedef signedintify (typeof (V)) T;  // underlying signed integral type
5282     T range = E - B;
5283     T chunk_no = 0;
5284     T DIR = LTGT == '<' ? +1 : -1;
5285     T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5286     T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5287
5288   <head_bb> [created by splitting end of entry_bb]
5289     T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5290     T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5291     if (!(offset LTGT bound)) goto bottom_bb;
5292
5293   <body_bb> [incoming]
5294     V = B + offset;
5295     {BODY}
5296
5297   <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5298     offset += step;
5299     if (offset LTGT bound) goto body_bb; [*]
5300
5301   <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5302     chunk_no++;
5303     if (chunk < chunk_max) goto head_bb;
5304
5305   <exit_bb> [incoming]
5306     V = B + ((range -/+ 1) / S +/- 1) * S [*]
5307
5308   [*] Needed if V live at end of loop.  */
5309
5310static void
5311expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5312{
5313  tree v = fd->loop.v;
5314  enum tree_code cond_code = fd->loop.cond_code;
5315  enum tree_code plus_code = PLUS_EXPR;
5316
5317  tree chunk_size = integer_minus_one_node;
5318  tree gwv = integer_zero_node;
5319  tree iter_type = TREE_TYPE (v);
5320  tree diff_type = iter_type;
5321  tree plus_type = iter_type;
5322  struct oacc_collapse *counts = NULL;
5323
5324  gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5325		       == GF_OMP_FOR_KIND_OACC_LOOP);
5326  gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5327  gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5328
5329  if (POINTER_TYPE_P (iter_type))
5330    {
5331      plus_code = POINTER_PLUS_EXPR;
5332      plus_type = sizetype;
5333    }
5334  if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5335    diff_type = signed_type_for (diff_type);
5336
5337  basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5338  basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5339  basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE  */
5340  basic_block bottom_bb = NULL;
5341
5342  /* entry_bb has two sucessors; the branch edge is to the exit
5343     block,  fallthrough edge to body.  */
5344  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5345	      && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5346
5347  /* If cont_bb non-NULL, it has 2 successors.  The branch successor is
5348     body_bb, or to a block whose only successor is the body_bb.  Its
5349     fallthrough successor is the final block (same as the branch
5350     successor of the entry_bb).  */
5351  if (cont_bb)
5352    {
5353      basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5354      basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5355
5356      gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5357      gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5358    }
5359  else
5360    gcc_assert (!gimple_in_ssa_p (cfun));
5361
5362  /* The exit block only has entry_bb and cont_bb as predecessors.  */
5363  gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5364
5365  tree chunk_no;
5366  tree chunk_max = NULL_TREE;
5367  tree bound, offset;
5368  tree step = create_tmp_var (diff_type, ".step");
5369  bool up = cond_code == LT_EXPR;
5370  tree dir = build_int_cst (diff_type, up ? +1 : -1);
5371  bool chunking = !gimple_in_ssa_p (cfun);
5372  bool negating;
5373
5374  /* Tiling vars.  */
5375  tree tile_size = NULL_TREE;
5376  tree element_s = NULL_TREE;
5377  tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5378  basic_block elem_body_bb = NULL;
5379  basic_block elem_cont_bb = NULL;
5380
5381  /* SSA instances.  */
5382  tree offset_incr = NULL_TREE;
5383  tree offset_init = NULL_TREE;
5384
5385  gimple_stmt_iterator gsi;
5386  gassign *ass;
5387  gcall *call;
5388  gimple *stmt;
5389  tree expr;
5390  location_t loc;
5391  edge split, be, fte;
5392
5393  /* Split the end of entry_bb to create head_bb.  */
5394  split = split_block (entry_bb, last_stmt (entry_bb));
5395  basic_block head_bb = split->dest;
5396  entry_bb = split->src;
5397
5398  /* Chunk setup goes at end of entry_bb, replacing the omp_for.  */
5399  gsi = gsi_last_bb (entry_bb);
5400  gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5401  loc = gimple_location (for_stmt);
5402
5403  if (gimple_in_ssa_p (cfun))
5404    {
5405      offset_init = gimple_omp_for_index (for_stmt, 0);
5406      gcc_assert (integer_zerop (fd->loop.n1));
5407      /* The SSA parallelizer does gang parallelism.  */
5408      gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5409    }
5410
5411  if (fd->collapse > 1 || fd->tiling)
5412    {
5413      gcc_assert (!gimple_in_ssa_p (cfun) && up);
5414      counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5415      tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5416					      TREE_TYPE (fd->loop.n2), loc);
5417
5418      if (SSA_VAR_P (fd->loop.n2))
5419	{
5420	  total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5421					    true, GSI_SAME_STMT);
5422	  ass = gimple_build_assign (fd->loop.n2, total);
5423	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5424	}
5425    }
5426
5427  tree b = fd->loop.n1;
5428  tree e = fd->loop.n2;
5429  tree s = fd->loop.step;
5430
5431  b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5432  e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5433
5434  /* Convert the step, avoiding possible unsigned->signed overflow.  */
5435  negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5436  if (negating)
5437    s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5438  s = fold_convert (diff_type, s);
5439  if (negating)
5440    s = fold_build1 (NEGATE_EXPR, diff_type, s);
5441  s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5442
5443  if (!chunking)
5444    chunk_size = integer_zero_node;
5445  expr = fold_convert (diff_type, chunk_size);
5446  chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5447					 NULL_TREE, true, GSI_SAME_STMT);
5448
5449  if (fd->tiling)
5450    {
5451      /* Determine the tile size and element step,
5452	 modify the outer loop step size.  */
5453      tile_size = create_tmp_var (diff_type, ".tile_size");
5454      expr = build_int_cst (diff_type, 1);
5455      for (int ix = 0; ix < fd->collapse; ix++)
5456	expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5457      expr = force_gimple_operand_gsi (&gsi, expr, true,
5458				       NULL_TREE, true, GSI_SAME_STMT);
5459      ass = gimple_build_assign (tile_size, expr);
5460      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5461
5462      element_s = create_tmp_var (diff_type, ".element_s");
5463      ass = gimple_build_assign (element_s, s);
5464      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5465
5466      expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5467      s = force_gimple_operand_gsi (&gsi, expr, true,
5468				    NULL_TREE, true, GSI_SAME_STMT);
5469    }
5470
5471  /* Determine the range, avoiding possible unsigned->signed overflow.  */
5472  negating = !up && TYPE_UNSIGNED (iter_type);
5473  expr = fold_build2 (MINUS_EXPR, plus_type,
5474		      fold_convert (plus_type, negating ? b : e),
5475		      fold_convert (plus_type, negating ? e : b));
5476  expr = fold_convert (diff_type, expr);
5477  if (negating)
5478    expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5479  tree range = force_gimple_operand_gsi (&gsi, expr, true,
5480					 NULL_TREE, true, GSI_SAME_STMT);
5481
5482  chunk_no = build_int_cst (diff_type, 0);
5483  if (chunking)
5484    {
5485      gcc_assert (!gimple_in_ssa_p (cfun));
5486
5487      expr = chunk_no;
5488      chunk_max = create_tmp_var (diff_type, ".chunk_max");
5489      chunk_no = create_tmp_var (diff_type, ".chunk_no");
5490
5491      ass = gimple_build_assign (chunk_no, expr);
5492      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5493
5494      call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5495					 build_int_cst (integer_type_node,
5496							IFN_GOACC_LOOP_CHUNKS),
5497					 dir, range, s, chunk_size, gwv);
5498      gimple_call_set_lhs (call, chunk_max);
5499      gimple_set_location (call, loc);
5500      gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5501    }
5502  else
5503    chunk_size = chunk_no;
5504
5505  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5506				     build_int_cst (integer_type_node,
5507						    IFN_GOACC_LOOP_STEP),
5508				     dir, range, s, chunk_size, gwv);
5509  gimple_call_set_lhs (call, step);
5510  gimple_set_location (call, loc);
5511  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5512
5513  /* Remove the GIMPLE_OMP_FOR.  */
5514  gsi_remove (&gsi, true);
5515
5516  /* Fixup edges from head_bb.  */
5517  be = BRANCH_EDGE (head_bb);
5518  fte = FALLTHRU_EDGE (head_bb);
5519  be->flags |= EDGE_FALSE_VALUE;
5520  fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5521
5522  basic_block body_bb = fte->dest;
5523
5524  if (gimple_in_ssa_p (cfun))
5525    {
5526      gsi = gsi_last_bb (cont_bb);
5527      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5528
5529      offset = gimple_omp_continue_control_use (cont_stmt);
5530      offset_incr = gimple_omp_continue_control_def (cont_stmt);
5531    }
5532  else
5533    {
5534      offset = create_tmp_var (diff_type, ".offset");
5535      offset_init = offset_incr = offset;
5536    }
5537  bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5538
5539  /* Loop offset & bound go into head_bb.  */
5540  gsi = gsi_start_bb (head_bb);
5541
5542  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5543				     build_int_cst (integer_type_node,
5544						    IFN_GOACC_LOOP_OFFSET),
5545				     dir, range, s,
5546				     chunk_size, gwv, chunk_no);
5547  gimple_call_set_lhs (call, offset_init);
5548  gimple_set_location (call, loc);
5549  gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5550
5551  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5552				     build_int_cst (integer_type_node,
5553						    IFN_GOACC_LOOP_BOUND),
5554				     dir, range, s,
5555				     chunk_size, gwv, offset_init);
5556  gimple_call_set_lhs (call, bound);
5557  gimple_set_location (call, loc);
5558  gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5559
5560  expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5561  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5562		    GSI_CONTINUE_LINKING);
5563
5564  /* V assignment goes into body_bb.  */
5565  if (!gimple_in_ssa_p (cfun))
5566    {
5567      gsi = gsi_start_bb (body_bb);
5568
5569      expr = build2 (plus_code, iter_type, b,
5570		     fold_convert (plus_type, offset));
5571      expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5572				       true, GSI_SAME_STMT);
5573      ass = gimple_build_assign (v, expr);
5574      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5575
5576      if (fd->collapse > 1 || fd->tiling)
5577	expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5578
5579      if (fd->tiling)
5580	{
5581	  /* Determine the range of the element loop -- usually simply
5582	     the tile_size, but could be smaller if the final
5583	     iteration of the outer loop is a partial tile.  */
5584	  tree e_range = create_tmp_var (diff_type, ".e_range");
5585
5586	  expr = build2 (MIN_EXPR, diff_type,
5587			 build2 (MINUS_EXPR, diff_type, bound, offset),
5588			 build2 (MULT_EXPR, diff_type, tile_size,
5589				 element_s));
5590	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5591					   true, GSI_SAME_STMT);
5592	  ass = gimple_build_assign (e_range, expr);
5593	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5594
5595	  /* Determine bound, offset & step of inner loop. */
5596	  e_bound = create_tmp_var (diff_type, ".e_bound");
5597	  e_offset = create_tmp_var (diff_type, ".e_offset");
5598	  e_step = create_tmp_var (diff_type, ".e_step");
5599
5600	  /* Mark these as element loops.  */
5601	  tree t, e_gwv = integer_minus_one_node;
5602	  tree chunk = build_int_cst (diff_type, 0); /* Never chunked.  */
5603
5604	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5605	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5606					     element_s, chunk, e_gwv, chunk);
5607	  gimple_call_set_lhs (call, e_offset);
5608	  gimple_set_location (call, loc);
5609	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5610
5611	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5612	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5613					     element_s, chunk, e_gwv, e_offset);
5614	  gimple_call_set_lhs (call, e_bound);
5615	  gimple_set_location (call, loc);
5616	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5617
5618	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5619	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5620					     element_s, chunk, e_gwv);
5621	  gimple_call_set_lhs (call, e_step);
5622	  gimple_set_location (call, loc);
5623	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5624
5625	  /* Add test and split block.  */
5626	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5627	  stmt = gimple_build_cond_empty (expr);
5628	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5629	  split = split_block (body_bb, stmt);
5630	  elem_body_bb = split->dest;
5631	  if (cont_bb == body_bb)
5632	    cont_bb = elem_body_bb;
5633	  body_bb = split->src;
5634
5635	  split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5636
5637	  /* Add a dummy exit for the tiled block when cont_bb is missing.  */
5638	  if (cont_bb == NULL)
5639	    {
5640	      edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5641	      e->probability = PROB_EVEN;
5642	      split->probability = PROB_EVEN;
5643	    }
5644
5645	  /* Initialize the user's loop vars.  */
5646	  gsi = gsi_start_bb (elem_body_bb);
5647	  expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5648	}
5649    }
5650
5651  /* Loop increment goes into cont_bb.  If this is not a loop, we
5652     will have spawned threads as if it was, and each one will
5653     execute one iteration.  The specification is not explicit about
5654     whether such constructs are ill-formed or not, and they can
5655     occur, especially when noreturn routines are involved.  */
5656  if (cont_bb)
5657    {
5658      gsi = gsi_last_bb (cont_bb);
5659      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5660      loc = gimple_location (cont_stmt);
5661
5662      if (fd->tiling)
5663	{
5664	  /* Insert element loop increment and test.  */
5665	  expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5666	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5667					   true, GSI_SAME_STMT);
5668	  ass = gimple_build_assign (e_offset, expr);
5669	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5670	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5671
5672	  stmt = gimple_build_cond_empty (expr);
5673	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5674	  split = split_block (cont_bb, stmt);
5675	  elem_cont_bb = split->src;
5676	  cont_bb = split->dest;
5677
5678	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5679	  make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5680
5681	  make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5682
5683	  gsi = gsi_for_stmt (cont_stmt);
5684	}
5685
5686      /* Increment offset.  */
5687      if (gimple_in_ssa_p (cfun))
5688	expr = build2 (plus_code, iter_type, offset,
5689		       fold_convert (plus_type, step));
5690      else
5691	expr = build2 (PLUS_EXPR, diff_type, offset, step);
5692      expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5693				       true, GSI_SAME_STMT);
5694      ass = gimple_build_assign (offset_incr, expr);
5695      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5696      expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5697      gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5698
5699      /*  Remove the GIMPLE_OMP_CONTINUE.  */
5700      gsi_remove (&gsi, true);
5701
5702      /* Fixup edges from cont_bb.  */
5703      be = BRANCH_EDGE (cont_bb);
5704      fte = FALLTHRU_EDGE (cont_bb);
5705      be->flags |= EDGE_TRUE_VALUE;
5706      fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5707
5708      if (chunking)
5709	{
5710	  /* Split the beginning of exit_bb to make bottom_bb.  We
5711	     need to insert a nop at the start, because splitting is
5712	     after a stmt, not before.  */
5713	  gsi = gsi_start_bb (exit_bb);
5714	  stmt = gimple_build_nop ();
5715	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5716	  split = split_block (exit_bb, stmt);
5717	  bottom_bb = split->src;
5718	  exit_bb = split->dest;
5719	  gsi = gsi_last_bb (bottom_bb);
5720
5721	  /* Chunk increment and test goes into bottom_bb.  */
5722	  expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5723			 build_int_cst (diff_type, 1));
5724	  ass = gimple_build_assign (chunk_no, expr);
5725	  gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5726
5727	  /* Chunk test at end of bottom_bb.  */
5728	  expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5729	  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5730			    GSI_CONTINUE_LINKING);
5731
5732	  /* Fixup edges from bottom_bb.  */
5733	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5734	  make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5735	}
5736    }
5737
5738  gsi = gsi_last_bb (exit_bb);
5739  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5740  loc = gimple_location (gsi_stmt (gsi));
5741
5742  if (!gimple_in_ssa_p (cfun))
5743    {
5744      /* Insert the final value of V, in case it is live.  This is the
5745	 value for the only thread that survives past the join.  */
5746      expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5747      expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5748      expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5749      expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5750      expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5751      expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5752				       true, GSI_SAME_STMT);
5753      ass = gimple_build_assign (v, expr);
5754      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5755    }
5756
5757  /* Remove the OMP_RETURN.  */
5758  gsi_remove (&gsi, true);
5759
5760  if (cont_bb)
5761    {
5762      /* We now have one, two or three nested loops.  Update the loop
5763	 structures.  */
5764      struct loop *parent = entry_bb->loop_father;
5765      struct loop *body = body_bb->loop_father;
5766
5767      if (chunking)
5768	{
5769	  struct loop *chunk_loop = alloc_loop ();
5770	  chunk_loop->header = head_bb;
5771	  chunk_loop->latch = bottom_bb;
5772	  add_loop (chunk_loop, parent);
5773	  parent = chunk_loop;
5774	}
5775      else if (parent != body)
5776	{
5777	  gcc_assert (body->header == body_bb);
5778	  gcc_assert (body->latch == cont_bb
5779		      || single_pred (body->latch) == cont_bb);
5780	  parent = NULL;
5781	}
5782
5783      if (parent)
5784	{
5785	  struct loop *body_loop = alloc_loop ();
5786	  body_loop->header = body_bb;
5787	  body_loop->latch = cont_bb;
5788	  add_loop (body_loop, parent);
5789
5790	  if (fd->tiling)
5791	    {
5792	      /* Insert tiling's element loop.  */
5793	      struct loop *inner_loop = alloc_loop ();
5794	      inner_loop->header = elem_body_bb;
5795	      inner_loop->latch = elem_cont_bb;
5796	      add_loop (inner_loop, body_loop);
5797	    }
5798	}
5799    }
5800}
5801
5802/* Expand the OMP loop defined by REGION.  */
5803
5804static void
5805expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5806{
5807  struct omp_for_data fd;
5808  struct omp_for_data_loop *loops;
5809
5810  loops
5811    = (struct omp_for_data_loop *)
5812      alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5813	      * sizeof (struct omp_for_data_loop));
5814  omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5815			&fd, loops);
5816  region->sched_kind = fd.sched_kind;
5817  region->sched_modifiers = fd.sched_modifiers;
5818
5819  gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5820  BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5821  FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5822  if (region->cont)
5823    {
5824      gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5825      BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5826      FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5827    }
5828  else
5829    /* If there isn't a continue then this is a degerate case where
5830       the introduction of abnormal edges during lowering will prevent
5831       original loops from being detected.  Fix that up.  */
5832    loops_state_set (LOOPS_NEED_FIXUP);
5833
5834  if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5835    expand_omp_simd (region, &fd);
5836  else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
5837    expand_cilk_for (region, &fd);
5838  else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5839    {
5840      gcc_assert (!inner_stmt);
5841      expand_oacc_for (region, &fd);
5842    }
5843  else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5844    {
5845      if (gimple_omp_for_combined_into_p (fd.for_stmt))
5846	expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5847      else
5848	expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5849    }
5850  else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5851	   && !fd.have_ordered)
5852    {
5853      if (fd.chunk_size == NULL)
5854	expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5855      else
5856	expand_omp_for_static_chunk (region, &fd, inner_stmt);
5857    }
5858  else
5859    {
5860      int fn_index, start_ix, next_ix;
5861
5862      gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5863		  == GF_OMP_FOR_KIND_FOR);
5864      if (fd.chunk_size == NULL
5865	  && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5866	fd.chunk_size = integer_zero_node;
5867      gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5868      switch (fd.sched_kind)
5869	{
5870	case OMP_CLAUSE_SCHEDULE_RUNTIME:
5871	  fn_index = 3;
5872	  break;
5873	case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5874	case OMP_CLAUSE_SCHEDULE_GUIDED:
5875	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5876	      && !fd.ordered
5877	      && !fd.have_ordered)
5878	    {
5879	      fn_index = 3 + fd.sched_kind;
5880	      break;
5881	    }
5882	  /* FALLTHRU */
5883	default:
5884	  fn_index = fd.sched_kind;
5885	  break;
5886	}
5887      if (!fd.ordered)
5888	fn_index += fd.have_ordered * 6;
5889      if (fd.ordered)
5890	start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5891      else
5892	start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5893      next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5894      if (fd.iter_type == long_long_unsigned_type_node)
5895	{
5896	  start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5897			- (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5898	  next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5899		      - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5900	}
5901      expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5902			      (enum built_in_function) next_ix, inner_stmt);
5903    }
5904
5905  if (gimple_in_ssa_p (cfun))
5906    update_ssa (TODO_update_ssa_only_virtuals);
5907}
5908
5909/* Expand code for an OpenMP sections directive.  In pseudo code, we generate
5910
5911	v = GOMP_sections_start (n);
5912    L0:
5913	switch (v)
5914	  {
5915	  case 0:
5916	    goto L2;
5917	  case 1:
5918	    section 1;
5919	    goto L1;
5920	  case 2:
5921	    ...
5922	  case n:
5923	    ...
5924	  default:
5925	    abort ();
5926	  }
5927    L1:
5928	v = GOMP_sections_next ();
5929	goto L0;
5930    L2:
5931	reduction;
5932
5933    If this is a combined parallel sections, replace the call to
5934    GOMP_sections_start with call to GOMP_sections_next.  */
5935
5936static void
5937expand_omp_sections (struct omp_region *region)
5938{
5939  tree t, u, vin = NULL, vmain, vnext, l2;
5940  unsigned len;
5941  basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5942  gimple_stmt_iterator si, switch_si;
5943  gomp_sections *sections_stmt;
5944  gimple *stmt;
5945  gomp_continue *cont;
5946  edge_iterator ei;
5947  edge e;
5948  struct omp_region *inner;
5949  unsigned i, casei;
5950  bool exit_reachable = region->cont != NULL;
5951
5952  gcc_assert (region->exit != NULL);
5953  entry_bb = region->entry;
5954  l0_bb = single_succ (entry_bb);
5955  l1_bb = region->cont;
5956  l2_bb = region->exit;
5957  if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5958    l2 = gimple_block_label (l2_bb);
5959  else
5960    {
5961      /* This can happen if there are reductions.  */
5962      len = EDGE_COUNT (l0_bb->succs);
5963      gcc_assert (len > 0);
5964      e = EDGE_SUCC (l0_bb, len - 1);
5965      si = gsi_last_bb (e->dest);
5966      l2 = NULL_TREE;
5967      if (gsi_end_p (si)
5968	  || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5969	l2 = gimple_block_label (e->dest);
5970      else
5971	FOR_EACH_EDGE (e, ei, l0_bb->succs)
5972	  {
5973	    si = gsi_last_bb (e->dest);
5974	    if (gsi_end_p (si)
5975		|| gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5976	      {
5977		l2 = gimple_block_label (e->dest);
5978		break;
5979	      }
5980	  }
5981    }
5982  if (exit_reachable)
5983    default_bb = create_empty_bb (l1_bb->prev_bb);
5984  else
5985    default_bb = create_empty_bb (l0_bb);
5986
5987  /* We will build a switch() with enough cases for all the
5988     GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5989     and a default case to abort if something goes wrong.  */
5990  len = EDGE_COUNT (l0_bb->succs);
5991
5992  /* Use vec::quick_push on label_vec throughout, since we know the size
5993     in advance.  */
5994  auto_vec<tree> label_vec (len);
5995
5996  /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5997     GIMPLE_OMP_SECTIONS statement.  */
5998  si = gsi_last_bb (entry_bb);
5999  sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6000  gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6001  vin = gimple_omp_sections_control (sections_stmt);
6002  if (!is_combined_parallel (region))
6003    {
6004      /* If we are not inside a combined parallel+sections region,
6005	 call GOMP_sections_start.  */
6006      t = build_int_cst (unsigned_type_node, len - 1);
6007      u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6008      stmt = gimple_build_call (u, 1, t);
6009    }
6010  else
6011    {
6012      /* Otherwise, call GOMP_sections_next.  */
6013      u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6014      stmt = gimple_build_call (u, 0);
6015    }
6016  gimple_call_set_lhs (stmt, vin);
6017  gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6018  gsi_remove (&si, true);
6019
6020  /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6021     L0_BB.  */
6022  switch_si = gsi_last_bb (l0_bb);
6023  gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6024  if (exit_reachable)
6025    {
6026      cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6027      gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6028      vmain = gimple_omp_continue_control_use (cont);
6029      vnext = gimple_omp_continue_control_def (cont);
6030    }
6031  else
6032    {
6033      vmain = vin;
6034      vnext = NULL_TREE;
6035    }
6036
6037  t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6038  label_vec.quick_push (t);
6039  i = 1;
6040
6041  /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.  */
6042  for (inner = region->inner, casei = 1;
6043       inner;
6044       inner = inner->next, i++, casei++)
6045    {
6046      basic_block s_entry_bb, s_exit_bb;
6047
6048      /* Skip optional reduction region.  */
6049      if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6050	{
6051	  --i;
6052	  --casei;
6053	  continue;
6054	}
6055
6056      s_entry_bb = inner->entry;
6057      s_exit_bb = inner->exit;
6058
6059      t = gimple_block_label (s_entry_bb);
6060      u = build_int_cst (unsigned_type_node, casei);
6061      u = build_case_label (u, NULL, t);
6062      label_vec.quick_push (u);
6063
6064      si = gsi_last_bb (s_entry_bb);
6065      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6066      gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6067      gsi_remove (&si, true);
6068      single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6069
6070      if (s_exit_bb == NULL)
6071	continue;
6072
6073      si = gsi_last_bb (s_exit_bb);
6074      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6075      gsi_remove (&si, true);
6076
6077      single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6078    }
6079
6080  /* Error handling code goes in DEFAULT_BB.  */
6081  t = gimple_block_label (default_bb);
6082  u = build_case_label (NULL, NULL, t);
6083  make_edge (l0_bb, default_bb, 0);
6084  add_bb_to_loop (default_bb, current_loops->tree_root);
6085
6086  stmt = gimple_build_switch (vmain, u, label_vec);
6087  gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6088  gsi_remove (&switch_si, true);
6089
6090  si = gsi_start_bb (default_bb);
6091  stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6092  gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6093
6094  if (exit_reachable)
6095    {
6096      tree bfn_decl;
6097
6098      /* Code to get the next section goes in L1_BB.  */
6099      si = gsi_last_bb (l1_bb);
6100      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6101
6102      bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6103      stmt = gimple_build_call (bfn_decl, 0);
6104      gimple_call_set_lhs (stmt, vnext);
6105      gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6106      gsi_remove (&si, true);
6107
6108      single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6109    }
6110
6111  /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB.  */
6112  si = gsi_last_bb (l2_bb);
6113  if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6114    t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6115  else if (gimple_omp_return_lhs (gsi_stmt (si)))
6116    t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6117  else
6118    t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6119  stmt = gimple_build_call (t, 0);
6120  if (gimple_omp_return_lhs (gsi_stmt (si)))
6121    gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6122  gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6123  gsi_remove (&si, true);
6124
6125  set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6126}
6127
6128/* Expand code for an OpenMP single directive.  We've already expanded
6129   much of the code, here we simply place the GOMP_barrier call.  */
6130
6131static void
6132expand_omp_single (struct omp_region *region)
6133{
6134  basic_block entry_bb, exit_bb;
6135  gimple_stmt_iterator si;
6136
6137  entry_bb = region->entry;
6138  exit_bb = region->exit;
6139
6140  si = gsi_last_bb (entry_bb);
6141  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6142  gsi_remove (&si, true);
6143  single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6144
6145  si = gsi_last_bb (exit_bb);
6146  if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6147    {
6148      tree t = gimple_omp_return_lhs (gsi_stmt (si));
6149      gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6150    }
6151  gsi_remove (&si, true);
6152  single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6153}
6154
6155/* Generic expansion for OpenMP synchronization directives: master,
6156   ordered and critical.  All we need to do here is remove the entry
6157   and exit markers for REGION.  */
6158
6159static void
6160expand_omp_synch (struct omp_region *region)
6161{
6162  basic_block entry_bb, exit_bb;
6163  gimple_stmt_iterator si;
6164
6165  entry_bb = region->entry;
6166  exit_bb = region->exit;
6167
6168  si = gsi_last_bb (entry_bb);
6169  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6170	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6171	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6172	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6173	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6174	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6175  gsi_remove (&si, true);
6176  single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6177
6178  if (exit_bb)
6179    {
6180      si = gsi_last_bb (exit_bb);
6181      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6182      gsi_remove (&si, true);
6183      single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6184    }
6185}
6186
6187/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6188   operation as a normal volatile load.  */
6189
6190static bool
6191expand_omp_atomic_load (basic_block load_bb, tree addr,
6192			tree loaded_val, int index)
6193{
6194  enum built_in_function tmpbase;
6195  gimple_stmt_iterator gsi;
6196  basic_block store_bb;
6197  location_t loc;
6198  gimple *stmt;
6199  tree decl, call, type, itype;
6200
6201  gsi = gsi_last_bb (load_bb);
6202  stmt = gsi_stmt (gsi);
6203  gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6204  loc = gimple_location (stmt);
6205
6206  /* ??? If the target does not implement atomic_load_optab[mode], and mode
6207     is smaller than word size, then expand_atomic_load assumes that the load
6208     is atomic.  We could avoid the builtin entirely in this case.  */
6209
6210  tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6211  decl = builtin_decl_explicit (tmpbase);
6212  if (decl == NULL_TREE)
6213    return false;
6214
6215  type = TREE_TYPE (loaded_val);
6216  itype = TREE_TYPE (TREE_TYPE (decl));
6217
6218  call = build_call_expr_loc (loc, decl, 2, addr,
6219			      build_int_cst (NULL,
6220					     gimple_omp_atomic_seq_cst_p (stmt)
6221					     ? MEMMODEL_SEQ_CST
6222					     : MEMMODEL_RELAXED));
6223  if (!useless_type_conversion_p (type, itype))
6224    call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6225  call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6226
6227  force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6228  gsi_remove (&gsi, true);
6229
6230  store_bb = single_succ (load_bb);
6231  gsi = gsi_last_bb (store_bb);
6232  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6233  gsi_remove (&gsi, true);
6234
6235  if (gimple_in_ssa_p (cfun))
6236    update_ssa (TODO_update_ssa_no_phi);
6237
6238  return true;
6239}
6240
6241/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6242   operation as a normal volatile store.  */
6243
6244static bool
6245expand_omp_atomic_store (basic_block load_bb, tree addr,
6246			 tree loaded_val, tree stored_val, int index)
6247{
6248  enum built_in_function tmpbase;
6249  gimple_stmt_iterator gsi;
6250  basic_block store_bb = single_succ (load_bb);
6251  location_t loc;
6252  gimple *stmt;
6253  tree decl, call, type, itype;
6254  machine_mode imode;
6255  bool exchange;
6256
6257  gsi = gsi_last_bb (load_bb);
6258  stmt = gsi_stmt (gsi);
6259  gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6260
6261  /* If the load value is needed, then this isn't a store but an exchange.  */
6262  exchange = gimple_omp_atomic_need_value_p (stmt);
6263
6264  gsi = gsi_last_bb (store_bb);
6265  stmt = gsi_stmt (gsi);
6266  gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6267  loc = gimple_location (stmt);
6268
6269  /* ??? If the target does not implement atomic_store_optab[mode], and mode
6270     is smaller than word size, then expand_atomic_store assumes that the store
6271     is atomic.  We could avoid the builtin entirely in this case.  */
6272
6273  tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6274  tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6275  decl = builtin_decl_explicit (tmpbase);
6276  if (decl == NULL_TREE)
6277    return false;
6278
6279  type = TREE_TYPE (stored_val);
6280
6281  /* Dig out the type of the function's second argument.  */
6282  itype = TREE_TYPE (decl);
6283  itype = TYPE_ARG_TYPES (itype);
6284  itype = TREE_CHAIN (itype);
6285  itype = TREE_VALUE (itype);
6286  imode = TYPE_MODE (itype);
6287
6288  if (exchange && !can_atomic_exchange_p (imode, true))
6289    return false;
6290
6291  if (!useless_type_conversion_p (itype, type))
6292    stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6293  call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6294			      build_int_cst (NULL,
6295					     gimple_omp_atomic_seq_cst_p (stmt)
6296					     ? MEMMODEL_SEQ_CST
6297					     : MEMMODEL_RELAXED));
6298  if (exchange)
6299    {
6300      if (!useless_type_conversion_p (type, itype))
6301	call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6302      call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6303    }
6304
6305  force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6306  gsi_remove (&gsi, true);
6307
6308  /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above.  */
6309  gsi = gsi_last_bb (load_bb);
6310  gsi_remove (&gsi, true);
6311
6312  if (gimple_in_ssa_p (cfun))
6313    update_ssa (TODO_update_ssa_no_phi);
6314
6315  return true;
6316}
6317
6318/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
6319   operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
6320   size of the data type, and thus usable to find the index of the builtin
6321   decl.  Returns false if the expression is not of the proper form.  */
6322
6323static bool
6324expand_omp_atomic_fetch_op (basic_block load_bb,
6325			    tree addr, tree loaded_val,
6326			    tree stored_val, int index)
6327{
6328  enum built_in_function oldbase, newbase, tmpbase;
6329  tree decl, itype, call;
6330  tree lhs, rhs;
6331  basic_block store_bb = single_succ (load_bb);
6332  gimple_stmt_iterator gsi;
6333  gimple *stmt;
6334  location_t loc;
6335  enum tree_code code;
6336  bool need_old, need_new;
6337  machine_mode imode;
6338  bool seq_cst;
6339
6340  /* We expect to find the following sequences:
6341
6342   load_bb:
6343       GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6344
6345   store_bb:
6346       val = tmp OP something; (or: something OP tmp)
6347       GIMPLE_OMP_STORE (val)
6348
6349  ???FIXME: Allow a more flexible sequence.
6350  Perhaps use data flow to pick the statements.
6351
6352  */
6353
6354  gsi = gsi_after_labels (store_bb);
6355  stmt = gsi_stmt (gsi);
6356  loc = gimple_location (stmt);
6357  if (!is_gimple_assign (stmt))
6358    return false;
6359  gsi_next (&gsi);
6360  if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6361    return false;
6362  need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6363  need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6364  seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6365  gcc_checking_assert (!need_old || !need_new);
6366
6367  if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6368    return false;
6369
6370  /* Check for one of the supported fetch-op operations.  */
6371  code = gimple_assign_rhs_code (stmt);
6372  switch (code)
6373    {
6374    case PLUS_EXPR:
6375    case POINTER_PLUS_EXPR:
6376      oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6377      newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6378      break;
6379    case MINUS_EXPR:
6380      oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6381      newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6382      break;
6383    case BIT_AND_EXPR:
6384      oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6385      newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6386      break;
6387    case BIT_IOR_EXPR:
6388      oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6389      newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6390      break;
6391    case BIT_XOR_EXPR:
6392      oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6393      newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6394      break;
6395    default:
6396      return false;
6397    }
6398
6399  /* Make sure the expression is of the proper form.  */
6400  if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6401    rhs = gimple_assign_rhs2 (stmt);
6402  else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6403	   && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6404    rhs = gimple_assign_rhs1 (stmt);
6405  else
6406    return false;
6407
6408  tmpbase = ((enum built_in_function)
6409	     ((need_new ? newbase : oldbase) + index + 1));
6410  decl = builtin_decl_explicit (tmpbase);
6411  if (decl == NULL_TREE)
6412    return false;
6413  itype = TREE_TYPE (TREE_TYPE (decl));
6414  imode = TYPE_MODE (itype);
6415
6416  /* We could test all of the various optabs involved, but the fact of the
6417     matter is that (with the exception of i486 vs i586 and xadd) all targets
6418     that support any atomic operaton optab also implements compare-and-swap.
6419     Let optabs.c take care of expanding any compare-and-swap loop.  */
6420  if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6421    return false;
6422
6423  gsi = gsi_last_bb (load_bb);
6424  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6425
6426  /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6427     It only requires that the operation happen atomically.  Thus we can
6428     use the RELAXED memory model.  */
6429  call = build_call_expr_loc (loc, decl, 3, addr,
6430			      fold_convert_loc (loc, itype, rhs),
6431			      build_int_cst (NULL,
6432					     seq_cst ? MEMMODEL_SEQ_CST
6433						     : MEMMODEL_RELAXED));
6434
6435  if (need_old || need_new)
6436    {
6437      lhs = need_old ? loaded_val : stored_val;
6438      call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6439      call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6440    }
6441  else
6442    call = fold_convert_loc (loc, void_type_node, call);
6443  force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6444  gsi_remove (&gsi, true);
6445
6446  gsi = gsi_last_bb (store_bb);
6447  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6448  gsi_remove (&gsi, true);
6449  gsi = gsi_last_bb (store_bb);
6450  stmt = gsi_stmt (gsi);
6451  gsi_remove (&gsi, true);
6452
6453  if (gimple_in_ssa_p (cfun))
6454    {
6455      release_defs (stmt);
6456      update_ssa (TODO_update_ssa_no_phi);
6457    }
6458
6459  return true;
6460}
6461
6462/* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6463
6464      oldval = *addr;
6465      repeat:
6466	newval = rhs;	 // with oldval replacing *addr in rhs
6467	oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6468	if (oldval != newval)
6469	  goto repeat;
6470
6471   INDEX is log2 of the size of the data type, and thus usable to find the
6472   index of the builtin decl.  */
6473
6474static bool
6475expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6476			    tree addr, tree loaded_val, tree stored_val,
6477			    int index)
6478{
6479  tree loadedi, storedi, initial, new_storedi, old_vali;
6480  tree type, itype, cmpxchg, iaddr;
6481  gimple_stmt_iterator si;
6482  basic_block loop_header = single_succ (load_bb);
6483  gimple *phi, *stmt;
6484  edge e;
6485  enum built_in_function fncode;
6486
6487  /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6488     order to use the RELAXED memory model effectively.  */
6489  fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6490				    + index + 1);
6491  cmpxchg = builtin_decl_explicit (fncode);
6492  if (cmpxchg == NULL_TREE)
6493    return false;
6494  type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6495  itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6496
6497  if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6498      || !can_atomic_load_p (TYPE_MODE (itype)))
6499    return false;
6500
6501  /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
6502  si = gsi_last_bb (load_bb);
6503  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6504
6505  /* For floating-point values, we'll need to view-convert them to integers
6506     so that we can perform the atomic compare and swap.  Simplify the
6507     following code by always setting up the "i"ntegral variables.  */
6508  if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6509    {
6510      tree iaddr_val;
6511
6512      iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6513							   true));
6514      iaddr_val
6515	= force_gimple_operand_gsi (&si,
6516				    fold_convert (TREE_TYPE (iaddr), addr),
6517				    false, NULL_TREE, true, GSI_SAME_STMT);
6518      stmt = gimple_build_assign (iaddr, iaddr_val);
6519      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6520      loadedi = create_tmp_var (itype);
6521      if (gimple_in_ssa_p (cfun))
6522	loadedi = make_ssa_name (loadedi);
6523    }
6524  else
6525    {
6526      iaddr = addr;
6527      loadedi = loaded_val;
6528    }
6529
6530  fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6531  tree loaddecl = builtin_decl_explicit (fncode);
6532  if (loaddecl)
6533    initial
6534      = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6535		      build_call_expr (loaddecl, 2, iaddr,
6536				       build_int_cst (NULL_TREE,
6537						      MEMMODEL_RELAXED)));
6538  else
6539    initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6540		      build_int_cst (TREE_TYPE (iaddr), 0));
6541
6542  initial
6543    = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6544				GSI_SAME_STMT);
6545
6546  /* Move the value to the LOADEDI temporary.  */
6547  if (gimple_in_ssa_p (cfun))
6548    {
6549      gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6550      phi = create_phi_node (loadedi, loop_header);
6551      SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6552	       initial);
6553    }
6554  else
6555    gsi_insert_before (&si,
6556		       gimple_build_assign (loadedi, initial),
6557		       GSI_SAME_STMT);
6558  if (loadedi != loaded_val)
6559    {
6560      gimple_stmt_iterator gsi2;
6561      tree x;
6562
6563      x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6564      gsi2 = gsi_start_bb (loop_header);
6565      if (gimple_in_ssa_p (cfun))
6566	{
6567	  gassign *stmt;
6568	  x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6569					true, GSI_SAME_STMT);
6570	  stmt = gimple_build_assign (loaded_val, x);
6571	  gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6572	}
6573      else
6574	{
6575	  x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6576	  force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6577				    true, GSI_SAME_STMT);
6578	}
6579    }
6580  gsi_remove (&si, true);
6581
6582  si = gsi_last_bb (store_bb);
6583  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6584
6585  if (iaddr == addr)
6586    storedi = stored_val;
6587  else
6588    storedi
6589      = force_gimple_operand_gsi (&si,
6590				  build1 (VIEW_CONVERT_EXPR, itype,
6591					  stored_val), true, NULL_TREE, true,
6592				  GSI_SAME_STMT);
6593
6594  /* Build the compare&swap statement.  */
6595  new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6596  new_storedi = force_gimple_operand_gsi (&si,
6597					  fold_convert (TREE_TYPE (loadedi),
6598							new_storedi),
6599					  true, NULL_TREE,
6600					  true, GSI_SAME_STMT);
6601
6602  if (gimple_in_ssa_p (cfun))
6603    old_vali = loadedi;
6604  else
6605    {
6606      old_vali = create_tmp_var (TREE_TYPE (loadedi));
6607      stmt = gimple_build_assign (old_vali, loadedi);
6608      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6609
6610      stmt = gimple_build_assign (loadedi, new_storedi);
6611      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6612    }
6613
6614  /* Note that we always perform the comparison as an integer, even for
6615     floating point.  This allows the atomic operation to properly
6616     succeed even with NaNs and -0.0.  */
6617  tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6618  stmt = gimple_build_cond_empty (ne);
6619  gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6620
6621  /* Update cfg.  */
6622  e = single_succ_edge (store_bb);
6623  e->flags &= ~EDGE_FALLTHRU;
6624  e->flags |= EDGE_FALSE_VALUE;
6625
6626  e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6627
6628  /* Copy the new value to loadedi (we already did that before the condition
6629     if we are not in SSA).  */
6630  if (gimple_in_ssa_p (cfun))
6631    {
6632      phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6633      SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6634    }
6635
6636  /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
6637  gsi_remove (&si, true);
6638
6639  struct loop *loop = alloc_loop ();
6640  loop->header = loop_header;
6641  loop->latch = store_bb;
6642  add_loop (loop, loop_header->loop_father);
6643
6644  if (gimple_in_ssa_p (cfun))
6645    update_ssa (TODO_update_ssa_no_phi);
6646
6647  return true;
6648}
6649
6650/* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
6651
6652				  GOMP_atomic_start ();
6653				  *addr = rhs;
6654				  GOMP_atomic_end ();
6655
6656   The result is not globally atomic, but works so long as all parallel
6657   references are within #pragma omp atomic directives.  According to
6658   responses received from omp@openmp.org, appears to be within spec.
6659   Which makes sense, since that's how several other compilers handle
6660   this situation as well.
6661   LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6662   expanding.  STORED_VAL is the operand of the matching
6663   GIMPLE_OMP_ATOMIC_STORE.
6664
6665   We replace
6666   GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6667   loaded_val = *addr;
6668
6669   and replace
6670   GIMPLE_OMP_ATOMIC_STORE (stored_val)  with
6671   *addr = stored_val;
6672*/
6673
6674static bool
6675expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6676			 tree addr, tree loaded_val, tree stored_val)
6677{
6678  gimple_stmt_iterator si;
6679  gassign *stmt;
6680  tree t;
6681
6682  si = gsi_last_bb (load_bb);
6683  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6684
6685  t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6686  t = build_call_expr (t, 0);
6687  force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6688
6689  stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6690  gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6691  gsi_remove (&si, true);
6692
6693  si = gsi_last_bb (store_bb);
6694  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6695
6696  stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6697			      stored_val);
6698  gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6699
6700  t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6701  t = build_call_expr (t, 0);
6702  force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6703  gsi_remove (&si, true);
6704
6705  if (gimple_in_ssa_p (cfun))
6706    update_ssa (TODO_update_ssa_no_phi);
6707  return true;
6708}
6709
6710/* Expand an GIMPLE_OMP_ATOMIC statement.  We try to expand
6711   using expand_omp_atomic_fetch_op.  If it failed, we try to
6712   call expand_omp_atomic_pipeline, and if it fails too, the
6713   ultimate fallback is wrapping the operation in a mutex
6714   (expand_omp_atomic_mutex).  REGION is the atomic region built
6715   by build_omp_regions_1().  */
6716
6717static void
6718expand_omp_atomic (struct omp_region *region)
6719{
6720  basic_block load_bb = region->entry, store_bb = region->exit;
6721  gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6722  gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6723  tree loaded_val = gimple_omp_atomic_load_lhs (load);
6724  tree addr = gimple_omp_atomic_load_rhs (load);
6725  tree stored_val = gimple_omp_atomic_store_val (store);
6726  tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6727  HOST_WIDE_INT index;
6728
6729  /* Make sure the type is one of the supported sizes.  */
6730  index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6731  index = exact_log2 (index);
6732  if (index >= 0 && index <= 4)
6733    {
6734      unsigned int align = TYPE_ALIGN_UNIT (type);
6735
6736      /* __sync builtins require strict data alignment.  */
6737      if (exact_log2 (align) >= index)
6738	{
6739	  /* Atomic load.  */
6740	  if (loaded_val == stored_val
6741	      && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6742		  || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6743	      && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6744	      && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6745	    return;
6746
6747	  /* Atomic store.  */
6748	  if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6749	       || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6750	      && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6751	      && store_bb == single_succ (load_bb)
6752	      && first_stmt (store_bb) == store
6753	      && expand_omp_atomic_store (load_bb, addr, loaded_val,
6754					  stored_val, index))
6755	    return;
6756
6757	  /* When possible, use specialized atomic update functions.  */
6758	  if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6759	      && store_bb == single_succ (load_bb)
6760	      && expand_omp_atomic_fetch_op (load_bb, addr,
6761					     loaded_val, stored_val, index))
6762	    return;
6763
6764	  /* If we don't have specialized __sync builtins, try and implement
6765	     as a compare and swap loop.  */
6766	  if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6767					  loaded_val, stored_val, index))
6768	    return;
6769	}
6770    }
6771
6772  /* The ultimate fallback is wrapping the operation in a mutex.  */
6773  expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6774}
6775
6776/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6777   at REGION_EXIT.  */
6778
6779static void
6780mark_loops_in_oacc_kernels_region (basic_block region_entry,
6781				   basic_block region_exit)
6782{
6783  struct loop *outer = region_entry->loop_father;
6784  gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6785
6786  /* Don't parallelize the kernels region if it contains more than one outer
6787     loop.  */
6788  unsigned int nr_outer_loops = 0;
6789  struct loop *single_outer = NULL;
6790  for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6791    {
6792      gcc_assert (loop_outer (loop) == outer);
6793
6794      if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6795	continue;
6796
6797      if (region_exit != NULL
6798	  && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6799	continue;
6800
6801      nr_outer_loops++;
6802      single_outer = loop;
6803    }
6804  if (nr_outer_loops != 1)
6805    return;
6806
6807  for (struct loop *loop = single_outer->inner;
6808       loop != NULL;
6809       loop = loop->inner)
6810    if (loop->next)
6811      return;
6812
6813  /* Mark the loops in the region.  */
6814  for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6815    loop->in_oacc_kernels_region = true;
6816}
6817
6818/* Types used to pass grid and wortkgroup sizes to kernel invocation.  */
6819
6820struct GTY(()) grid_launch_attributes_trees
6821{
6822  tree kernel_dim_array_type;
6823  tree kernel_lattrs_dimnum_decl;
6824  tree kernel_lattrs_grid_decl;
6825  tree kernel_lattrs_group_decl;
6826  tree kernel_launch_attributes_type;
6827};
6828
6829static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6830
6831/* Create types used to pass kernel launch attributes to target.  */
6832
6833static void
6834grid_create_kernel_launch_attr_types (void)
6835{
6836  if (grid_attr_trees)
6837    return;
6838  grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6839
6840  tree dim_arr_index_type
6841    = build_index_type (build_int_cst (integer_type_node, 2));
6842  grid_attr_trees->kernel_dim_array_type
6843    = build_array_type (uint32_type_node, dim_arr_index_type);
6844
6845  grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6846  grid_attr_trees->kernel_lattrs_dimnum_decl
6847    = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6848		  uint32_type_node);
6849  DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6850
6851  grid_attr_trees->kernel_lattrs_grid_decl
6852    = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6853		  grid_attr_trees->kernel_dim_array_type);
6854  DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6855    = grid_attr_trees->kernel_lattrs_dimnum_decl;
6856  grid_attr_trees->kernel_lattrs_group_decl
6857    = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6858		  grid_attr_trees->kernel_dim_array_type);
6859  DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6860    = grid_attr_trees->kernel_lattrs_grid_decl;
6861  finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6862			 "__gomp_kernel_launch_attributes",
6863			 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6864}
6865
6866/* Insert before the current statement in GSI a store of VALUE to INDEX of
6867   array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR.  VALUE must be
6868   of type uint32_type_node.  */
6869
6870static void
6871grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6872			     tree fld_decl, int index, tree value)
6873{
6874  tree ref = build4 (ARRAY_REF, uint32_type_node,
6875		     build3 (COMPONENT_REF,
6876			     grid_attr_trees->kernel_dim_array_type,
6877			     range_var, fld_decl, NULL_TREE),
6878		     build_int_cst (integer_type_node, index),
6879		     NULL_TREE, NULL_TREE);
6880  gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6881}
6882
6883/* Return a tree representation of a pointer to a structure with grid and
6884   work-group size information.  Statements filling that information will be
6885   inserted before GSI, TGT_STMT is the target statement which has the
6886   necessary information in it.  */
6887
6888static tree
6889grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6890				       gomp_target *tgt_stmt)
6891{
6892  grid_create_kernel_launch_attr_types ();
6893  tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6894				"__kernel_launch_attrs");
6895
6896  unsigned max_dim = 0;
6897  for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6898       clause;
6899       clause = OMP_CLAUSE_CHAIN (clause))
6900    {
6901      if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6902	continue;
6903
6904      unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6905      max_dim = MAX (dim, max_dim);
6906
6907      grid_insert_store_range_dim (gsi, lattrs,
6908				   grid_attr_trees->kernel_lattrs_grid_decl,
6909				   dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6910      grid_insert_store_range_dim (gsi, lattrs,
6911				   grid_attr_trees->kernel_lattrs_group_decl,
6912				   dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6913    }
6914
6915  tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6916			grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6917  gcc_checking_assert (max_dim <= 2);
6918  tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6919  gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6920		     GSI_SAME_STMT);
6921  TREE_ADDRESSABLE (lattrs) = 1;
6922  return build_fold_addr_expr (lattrs);
6923}
6924
6925/* Build target argument identifier from the DEVICE identifier, value
6926   identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
6927
6928static tree
6929get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6930{
6931  tree t = build_int_cst (integer_type_node, device);
6932  if (subseqent_param)
6933    t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6934		     build_int_cst (integer_type_node,
6935				    GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6936  t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6937		   build_int_cst (integer_type_node, id));
6938  return t;
6939}
6940
6941/* Like above but return it in type that can be directly stored as an element
6942   of the argument array.  */
6943
6944static tree
6945get_target_argument_identifier (int device, bool subseqent_param, int id)
6946{
6947  tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6948  return fold_convert (ptr_type_node, t);
6949}
6950
6951/* Return a target argument consisting of DEVICE identifier, value identifier
6952   ID, and the actual VALUE.  */
6953
6954static tree
6955get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6956			   tree value)
6957{
6958  tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6959			fold_convert (integer_type_node, value),
6960			build_int_cst (unsigned_type_node,
6961				       GOMP_TARGET_ARG_VALUE_SHIFT));
6962  t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6963		   get_target_argument_identifier_1 (device, false, id));
6964  t = fold_convert (ptr_type_node, t);
6965  return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6966}
6967
6968/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6969   push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6970   otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6971   arguments.  */
6972
6973static void
6974push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6975					 int id, tree value, vec <tree> *args)
6976{
6977  if (tree_fits_shwi_p (value)
6978      && tree_to_shwi (value) > -(1 << 15)
6979      && tree_to_shwi (value) < (1 << 15))
6980    args->quick_push (get_target_argument_value (gsi, device, id, value));
6981  else
6982    {
6983      args->quick_push (get_target_argument_identifier (device, true, id));
6984      value = fold_convert (ptr_type_node, value);
6985      value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6986					GSI_SAME_STMT);
6987      args->quick_push (value);
6988    }
6989}
6990
6991/* Create an array of arguments that is then passed to GOMP_target.  */
6992
6993static tree
6994get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6995{
6996  auto_vec <tree, 6> args;
6997  tree clauses = gimple_omp_target_clauses (tgt_stmt);
6998  tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6999  if (c)
7000    t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7001  else
7002    t = integer_minus_one_node;
7003  push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7004					   GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7005
7006  c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7007  if (c)
7008    t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7009  else
7010    t = integer_minus_one_node;
7011  push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7012					   GOMP_TARGET_ARG_THREAD_LIMIT, t,
7013					   &args);
7014
7015  /* Add HSA-specific grid sizes, if available.  */
7016  if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7017		       OMP_CLAUSE__GRIDDIM_))
7018    {
7019      int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7020      t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7021      args.quick_push (t);
7022      args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7023    }
7024
7025  /* Produce more, perhaps device specific, arguments here.  */
7026
7027  tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7028							  args.length () + 1),
7029				  ".omp_target_args");
7030  for (unsigned i = 0; i < args.length (); i++)
7031    {
7032      tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7033			 build_int_cst (integer_type_node, i),
7034			 NULL_TREE, NULL_TREE);
7035      gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7036			 GSI_SAME_STMT);
7037    }
7038  tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7039		     build_int_cst (integer_type_node, args.length ()),
7040		     NULL_TREE, NULL_TREE);
7041  gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7042		     GSI_SAME_STMT);
7043  TREE_ADDRESSABLE (argarray) = 1;
7044  return build_fold_addr_expr (argarray);
7045}
7046
7047/* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
7048
7049static void
7050expand_omp_target (struct omp_region *region)
7051{
7052  basic_block entry_bb, exit_bb, new_bb;
7053  struct function *child_cfun;
7054  tree child_fn, block, t;
7055  gimple_stmt_iterator gsi;
7056  gomp_target *entry_stmt;
7057  gimple *stmt;
7058  edge e;
7059  bool offloaded, data_region;
7060
7061  entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7062  new_bb = region->entry;
7063
7064  offloaded = is_gimple_omp_offloaded (entry_stmt);
7065  switch (gimple_omp_target_kind (entry_stmt))
7066    {
7067    case GF_OMP_TARGET_KIND_REGION:
7068    case GF_OMP_TARGET_KIND_UPDATE:
7069    case GF_OMP_TARGET_KIND_ENTER_DATA:
7070    case GF_OMP_TARGET_KIND_EXIT_DATA:
7071    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7072    case GF_OMP_TARGET_KIND_OACC_KERNELS:
7073    case GF_OMP_TARGET_KIND_OACC_UPDATE:
7074    case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7075    case GF_OMP_TARGET_KIND_OACC_DECLARE:
7076      data_region = false;
7077      break;
7078    case GF_OMP_TARGET_KIND_DATA:
7079    case GF_OMP_TARGET_KIND_OACC_DATA:
7080    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7081      data_region = true;
7082      break;
7083    default:
7084      gcc_unreachable ();
7085    }
7086
7087  child_fn = NULL_TREE;
7088  child_cfun = NULL;
7089  if (offloaded)
7090    {
7091      child_fn = gimple_omp_target_child_fn (entry_stmt);
7092      child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7093    }
7094
7095  /* Supported by expand_omp_taskreg, but not here.  */
7096  if (child_cfun != NULL)
7097    gcc_checking_assert (!child_cfun->cfg);
7098  gcc_checking_assert (!gimple_in_ssa_p (cfun));
7099
7100  entry_bb = region->entry;
7101  exit_bb = region->exit;
7102
7103  if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7104    mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7105
7106  if (offloaded)
7107    {
7108      unsigned srcidx, dstidx, num;
7109
7110      /* If the offloading region needs data sent from the parent
7111	 function, then the very first statement (except possible
7112	 tree profile counter updates) of the offloading body
7113	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
7114	 &.OMP_DATA_O is passed as an argument to the child function,
7115	 we need to replace it with the argument as seen by the child
7116	 function.
7117
7118	 In most cases, this will end up being the identity assignment
7119	 .OMP_DATA_I = .OMP_DATA_I.  However, if the offloading body had
7120	 a function call that has been inlined, the original PARM_DECL
7121	 .OMP_DATA_I may have been converted into a different local
7122	 variable.  In which case, we need to keep the assignment.  */
7123      tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7124      if (data_arg)
7125	{
7126	  basic_block entry_succ_bb = single_succ (entry_bb);
7127	  gimple_stmt_iterator gsi;
7128	  tree arg;
7129	  gimple *tgtcopy_stmt = NULL;
7130	  tree sender = TREE_VEC_ELT (data_arg, 0);
7131
7132	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7133	    {
7134	      gcc_assert (!gsi_end_p (gsi));
7135	      stmt = gsi_stmt (gsi);
7136	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
7137		continue;
7138
7139	      if (gimple_num_ops (stmt) == 2)
7140		{
7141		  tree arg = gimple_assign_rhs1 (stmt);
7142
7143		  /* We're ignoring the subcode because we're
7144		     effectively doing a STRIP_NOPS.  */
7145
7146		  if (TREE_CODE (arg) == ADDR_EXPR
7147		      && TREE_OPERAND (arg, 0) == sender)
7148		    {
7149		      tgtcopy_stmt = stmt;
7150		      break;
7151		    }
7152		}
7153	    }
7154
7155	  gcc_assert (tgtcopy_stmt != NULL);
7156	  arg = DECL_ARGUMENTS (child_fn);
7157
7158	  gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7159	  gsi_remove (&gsi, true);
7160	}
7161
7162      /* Declare local variables needed in CHILD_CFUN.  */
7163      block = DECL_INITIAL (child_fn);
7164      BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7165      /* The gimplifier could record temporaries in the offloading block
7166	 rather than in containing function's local_decls chain,
7167	 which would mean cgraph missed finalizing them.  Do it now.  */
7168      for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7169	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7170	  varpool_node::finalize_decl (t);
7171      DECL_SAVED_TREE (child_fn) = NULL;
7172      /* We'll create a CFG for child_fn, so no gimple body is needed.  */
7173      gimple_set_body (child_fn, NULL);
7174      TREE_USED (block) = 1;
7175
7176      /* Reset DECL_CONTEXT on function arguments.  */
7177      for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7178	DECL_CONTEXT (t) = child_fn;
7179
7180      /* Split ENTRY_BB at GIMPLE_*,
7181	 so that it can be moved to the child function.  */
7182      gsi = gsi_last_bb (entry_bb);
7183      stmt = gsi_stmt (gsi);
7184      gcc_assert (stmt
7185		  && gimple_code (stmt) == gimple_code (entry_stmt));
7186      e = split_block (entry_bb, stmt);
7187      gsi_remove (&gsi, true);
7188      entry_bb = e->dest;
7189      single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7190
7191      /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
7192      if (exit_bb)
7193	{
7194	  gsi = gsi_last_bb (exit_bb);
7195	  gcc_assert (!gsi_end_p (gsi)
7196		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7197	  stmt = gimple_build_return (NULL);
7198	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7199	  gsi_remove (&gsi, true);
7200	}
7201
7202      /* Make sure to generate early debug for the function before
7203         outlining anything.  */
7204      if (! gimple_in_ssa_p (cfun))
7205	(*debug_hooks->early_global_decl) (cfun->decl);
7206
7207      /* Move the offloading region into CHILD_CFUN.  */
7208
7209      block = gimple_block (entry_stmt);
7210
7211      new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7212      if (exit_bb)
7213	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7214      /* When the OMP expansion process cannot guarantee an up-to-date
7215	 loop tree arrange for the child function to fixup loops.  */
7216      if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7217	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7218
7219      /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
7220      num = vec_safe_length (child_cfun->local_decls);
7221      for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7222	{
7223	  t = (*child_cfun->local_decls)[srcidx];
7224	  if (DECL_CONTEXT (t) == cfun->decl)
7225	    continue;
7226	  if (srcidx != dstidx)
7227	    (*child_cfun->local_decls)[dstidx] = t;
7228	  dstidx++;
7229	}
7230      if (dstidx != num)
7231	vec_safe_truncate (child_cfun->local_decls, dstidx);
7232
7233      /* Inform the callgraph about the new function.  */
7234      child_cfun->curr_properties = cfun->curr_properties;
7235      child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7236      child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7237      cgraph_node *node = cgraph_node::get_create (child_fn);
7238      node->parallelized_function = 1;
7239      cgraph_node::add_new_function (child_fn, true);
7240
7241      /* Add the new function to the offload table.  */
7242      if (ENABLE_OFFLOADING)
7243	vec_safe_push (offload_funcs, child_fn);
7244
7245      bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7246		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7247
7248      /* Fix the callgraph edges for child_cfun.  Those for cfun will be
7249	 fixed in a following pass.  */
7250      push_cfun (child_cfun);
7251      if (need_asm)
7252	assign_assembler_name_if_needed (child_fn);
7253      cgraph_edge::rebuild_edges ();
7254
7255      /* Some EH regions might become dead, see PR34608.  If
7256	 pass_cleanup_cfg isn't the first pass to happen with the
7257	 new child, these dead EH edges might cause problems.
7258	 Clean them up now.  */
7259      if (flag_exceptions)
7260	{
7261	  basic_block bb;
7262	  bool changed = false;
7263
7264	  FOR_EACH_BB_FN (bb, cfun)
7265	    changed |= gimple_purge_dead_eh_edges (bb);
7266	  if (changed)
7267	    cleanup_tree_cfg ();
7268	}
7269      if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7270	verify_loop_structure ();
7271      pop_cfun ();
7272
7273      if (dump_file && !gimple_in_ssa_p (cfun))
7274	{
7275	  omp_any_child_fn_dumped = true;
7276	  dump_function_header (dump_file, child_fn, dump_flags);
7277	  dump_function_to_file (child_fn, dump_file, dump_flags);
7278	}
7279    }
7280
7281  /* Emit a library call to launch the offloading region, or do data
7282     transfers.  */
7283  tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7284  enum built_in_function start_ix;
7285  location_t clause_loc;
7286  unsigned int flags_i = 0;
7287  bool oacc_kernels_p = false;
7288
7289  switch (gimple_omp_target_kind (entry_stmt))
7290    {
7291    case GF_OMP_TARGET_KIND_REGION:
7292      start_ix = BUILT_IN_GOMP_TARGET;
7293      break;
7294    case GF_OMP_TARGET_KIND_DATA:
7295      start_ix = BUILT_IN_GOMP_TARGET_DATA;
7296      break;
7297    case GF_OMP_TARGET_KIND_UPDATE:
7298      start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7299      break;
7300    case GF_OMP_TARGET_KIND_ENTER_DATA:
7301      start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7302      break;
7303    case GF_OMP_TARGET_KIND_EXIT_DATA:
7304      start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7305      flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7306      break;
7307    case GF_OMP_TARGET_KIND_OACC_KERNELS:
7308      oacc_kernels_p = true;
7309      /* FALLTHROUGH */
7310    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7311      start_ix = BUILT_IN_GOACC_PARALLEL;
7312      break;
7313    case GF_OMP_TARGET_KIND_OACC_DATA:
7314    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7315      start_ix = BUILT_IN_GOACC_DATA_START;
7316      break;
7317    case GF_OMP_TARGET_KIND_OACC_UPDATE:
7318      start_ix = BUILT_IN_GOACC_UPDATE;
7319      break;
7320    case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7321      start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7322      break;
7323    case GF_OMP_TARGET_KIND_OACC_DECLARE:
7324      start_ix = BUILT_IN_GOACC_DECLARE;
7325      break;
7326    default:
7327      gcc_unreachable ();
7328    }
7329
7330  clauses = gimple_omp_target_clauses (entry_stmt);
7331
7332  /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7333     library choose) and there is no conditional.  */
7334  cond = NULL_TREE;
7335  device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7336
7337  c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7338  if (c)
7339    cond = OMP_CLAUSE_IF_EXPR (c);
7340
7341  c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7342  if (c)
7343    {
7344      /* Even if we pass it to all library function calls, it is currently only
7345	 defined/used for the OpenMP target ones.  */
7346      gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7347			   || start_ix == BUILT_IN_GOMP_TARGET_DATA
7348			   || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7349			   || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7350
7351      device = OMP_CLAUSE_DEVICE_ID (c);
7352      clause_loc = OMP_CLAUSE_LOCATION (c);
7353    }
7354  else
7355    clause_loc = gimple_location (entry_stmt);
7356
7357  c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7358  if (c)
7359    flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7360
7361  /* Ensure 'device' is of the correct type.  */
7362  device = fold_convert_loc (clause_loc, integer_type_node, device);
7363
7364  /* If we found the clause 'if (cond)', build
7365     (cond ? device : GOMP_DEVICE_HOST_FALLBACK).  */
7366  if (cond)
7367    {
7368      cond = gimple_boolify (cond);
7369
7370      basic_block cond_bb, then_bb, else_bb;
7371      edge e;
7372      tree tmp_var;
7373
7374      tmp_var = create_tmp_var (TREE_TYPE (device));
7375      if (offloaded)
7376	e = split_block_after_labels (new_bb);
7377      else
7378	{
7379	  gsi = gsi_last_bb (new_bb);
7380	  gsi_prev (&gsi);
7381	  e = split_block (new_bb, gsi_stmt (gsi));
7382	}
7383      cond_bb = e->src;
7384      new_bb = e->dest;
7385      remove_edge (e);
7386
7387      then_bb = create_empty_bb (cond_bb);
7388      else_bb = create_empty_bb (then_bb);
7389      set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7390      set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7391
7392      stmt = gimple_build_cond_empty (cond);
7393      gsi = gsi_last_bb (cond_bb);
7394      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7395
7396      gsi = gsi_start_bb (then_bb);
7397      stmt = gimple_build_assign (tmp_var, device);
7398      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7399
7400      gsi = gsi_start_bb (else_bb);
7401      stmt = gimple_build_assign (tmp_var,
7402				  build_int_cst (integer_type_node,
7403						 GOMP_DEVICE_HOST_FALLBACK));
7404      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7405
7406      make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7407      make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7408      add_bb_to_loop (then_bb, cond_bb->loop_father);
7409      add_bb_to_loop (else_bb, cond_bb->loop_father);
7410      make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7411      make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7412
7413      device = tmp_var;
7414      gsi = gsi_last_bb (new_bb);
7415    }
7416  else
7417    {
7418      gsi = gsi_last_bb (new_bb);
7419      device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7420					 true, GSI_SAME_STMT);
7421    }
7422
7423  t = gimple_omp_target_data_arg (entry_stmt);
7424  if (t == NULL)
7425    {
7426      t1 = size_zero_node;
7427      t2 = build_zero_cst (ptr_type_node);
7428      t3 = t2;
7429      t4 = t2;
7430    }
7431  else
7432    {
7433      t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7434      t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7435      t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7436      t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7437      t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7438    }
7439
7440  gimple *g;
7441  bool tagging = false;
7442  /* The maximum number used by any start_ix, without varargs.  */
7443  auto_vec<tree, 11> args;
7444  args.quick_push (device);
7445  if (offloaded)
7446    args.quick_push (build_fold_addr_expr (child_fn));
7447  args.quick_push (t1);
7448  args.quick_push (t2);
7449  args.quick_push (t3);
7450  args.quick_push (t4);
7451  switch (start_ix)
7452    {
7453    case BUILT_IN_GOACC_DATA_START:
7454    case BUILT_IN_GOACC_DECLARE:
7455    case BUILT_IN_GOMP_TARGET_DATA:
7456      break;
7457    case BUILT_IN_GOMP_TARGET:
7458    case BUILT_IN_GOMP_TARGET_UPDATE:
7459    case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7460      args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7461      c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7462      if (c)
7463	depend = OMP_CLAUSE_DECL (c);
7464      else
7465	depend = build_int_cst (ptr_type_node, 0);
7466      args.quick_push (depend);
7467      if (start_ix == BUILT_IN_GOMP_TARGET)
7468	args.quick_push (get_target_arguments (&gsi, entry_stmt));
7469      break;
7470    case BUILT_IN_GOACC_PARALLEL:
7471      {
7472	oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
7473	tagging = true;
7474      }
7475      /* FALLTHRU */
7476    case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7477    case BUILT_IN_GOACC_UPDATE:
7478      {
7479	tree t_async = NULL_TREE;
7480
7481	/* If present, use the value specified by the respective
7482	   clause, making sure that is of the correct type.  */
7483	c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7484	if (c)
7485	  t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7486				      integer_type_node,
7487				      OMP_CLAUSE_ASYNC_EXPR (c));
7488	else if (!tagging)
7489	  /* Default values for t_async.  */
7490	  t_async = fold_convert_loc (gimple_location (entry_stmt),
7491				      integer_type_node,
7492				      build_int_cst (integer_type_node,
7493						     GOMP_ASYNC_SYNC));
7494	if (tagging && t_async)
7495	  {
7496	    unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7497
7498	    if (TREE_CODE (t_async) == INTEGER_CST)
7499	      {
7500		/* See if we can pack the async arg in to the tag's
7501		   operand.  */
7502		i_async = TREE_INT_CST_LOW (t_async);
7503		if (i_async < GOMP_LAUNCH_OP_MAX)
7504		  t_async = NULL_TREE;
7505		else
7506		  i_async = GOMP_LAUNCH_OP_MAX;
7507	      }
7508	    args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7509					      i_async));
7510	  }
7511	if (t_async)
7512	  args.safe_push (t_async);
7513
7514	/* Save the argument index, and ... */
7515	unsigned t_wait_idx = args.length ();
7516	unsigned num_waits = 0;
7517	c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7518	if (!tagging || c)
7519	  /* ... push a placeholder.  */
7520	  args.safe_push (integer_zero_node);
7521
7522	for (; c; c = OMP_CLAUSE_CHAIN (c))
7523	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7524	    {
7525	      args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7526						integer_type_node,
7527						OMP_CLAUSE_WAIT_EXPR (c)));
7528	      num_waits++;
7529	    }
7530
7531	if (!tagging || num_waits)
7532	  {
7533	    tree len;
7534
7535	    /* Now that we know the number, update the placeholder.  */
7536	    if (tagging)
7537	      len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7538	    else
7539	      len = build_int_cst (integer_type_node, num_waits);
7540	    len = fold_convert_loc (gimple_location (entry_stmt),
7541				    unsigned_type_node, len);
7542	    args[t_wait_idx] = len;
7543	  }
7544      }
7545      break;
7546    default:
7547      gcc_unreachable ();
7548    }
7549  if (tagging)
7550    /*  Push terminal marker - zero.  */
7551    args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7552
7553  g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7554  gimple_set_location (g, gimple_location (entry_stmt));
7555  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7556  if (!offloaded)
7557    {
7558      g = gsi_stmt (gsi);
7559      gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7560      gsi_remove (&gsi, true);
7561    }
7562  if (data_region && region->exit)
7563    {
7564      gsi = gsi_last_bb (region->exit);
7565      g = gsi_stmt (gsi);
7566      gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7567      gsi_remove (&gsi, true);
7568    }
7569}
7570
7571/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7572   iteration variable derived from the thread number.  INTRA_GROUP means this
7573   is an expansion of a loop iterating over work-items within a separate
7574   iteration over groups.  */
7575
7576static void
7577grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7578{
7579  gimple_stmt_iterator gsi;
7580  gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7581  gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7582		       == GF_OMP_FOR_KIND_GRID_LOOP);
7583  size_t collapse = gimple_omp_for_collapse (for_stmt);
7584  struct omp_for_data_loop *loops
7585    = XALLOCAVEC (struct omp_for_data_loop,
7586		  gimple_omp_for_collapse (for_stmt));
7587  struct omp_for_data fd;
7588
7589  remove_edge (BRANCH_EDGE (kfor->entry));
7590  basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7591
7592  gcc_assert (kfor->cont);
7593  omp_extract_for_data (for_stmt, &fd, loops);
7594
7595  gsi = gsi_start_bb (body_bb);
7596
7597  for (size_t dim = 0; dim < collapse; dim++)
7598    {
7599      tree type, itype;
7600      itype = type = TREE_TYPE (fd.loops[dim].v);
7601      if (POINTER_TYPE_P (type))
7602	itype = signed_type_for (type);
7603
7604      tree n1 = fd.loops[dim].n1;
7605      tree step = fd.loops[dim].step;
7606      n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7607				     true, NULL_TREE, true, GSI_SAME_STMT);
7608      step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7609				       true, NULL_TREE, true, GSI_SAME_STMT);
7610      tree threadid;
7611      if (gimple_omp_for_grid_group_iter (for_stmt))
7612	{
7613	  gcc_checking_assert (!intra_group);
7614	  threadid = build_call_expr (builtin_decl_explicit
7615				      (BUILT_IN_HSA_WORKGROUPID), 1,
7616				      build_int_cstu (unsigned_type_node, dim));
7617	}
7618      else if (intra_group)
7619	threadid = build_call_expr (builtin_decl_explicit
7620				    (BUILT_IN_HSA_WORKITEMID), 1,
7621				    build_int_cstu (unsigned_type_node, dim));
7622      else
7623	threadid = build_call_expr (builtin_decl_explicit
7624				    (BUILT_IN_HSA_WORKITEMABSID), 1,
7625				    build_int_cstu (unsigned_type_node, dim));
7626      threadid = fold_convert (itype, threadid);
7627      threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7628					   true, GSI_SAME_STMT);
7629
7630      tree startvar = fd.loops[dim].v;
7631      tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7632      if (POINTER_TYPE_P (type))
7633	t = fold_build_pointer_plus (n1, t);
7634      else
7635	t = fold_build2 (PLUS_EXPR, type, t, n1);
7636      t = fold_convert (type, t);
7637      t = force_gimple_operand_gsi (&gsi, t,
7638				    DECL_P (startvar)
7639				    && TREE_ADDRESSABLE (startvar),
7640				    NULL_TREE, true, GSI_SAME_STMT);
7641      gassign *assign_stmt = gimple_build_assign (startvar, t);
7642      gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7643    }
7644  /* Remove the omp for statement.  */
7645  gsi = gsi_last_bb (kfor->entry);
7646  gsi_remove (&gsi, true);
7647
7648  /* Remove the GIMPLE_OMP_CONTINUE statement.  */
7649  gsi = gsi_last_bb (kfor->cont);
7650  gcc_assert (!gsi_end_p (gsi)
7651	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7652  gsi_remove (&gsi, true);
7653
7654  /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary.  */
7655  gsi = gsi_last_bb (kfor->exit);
7656  gcc_assert (!gsi_end_p (gsi)
7657	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7658  if (intra_group)
7659    gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7660  gsi_remove (&gsi, true);
7661
7662  /* Fixup the much simpler CFG.  */
7663  remove_edge (find_edge (kfor->cont, body_bb));
7664
7665  if (kfor->cont != body_bb)
7666    set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7667  set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7668}
7669
7670/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7671   argument_decls.  */
7672
7673struct grid_arg_decl_map
7674{
7675  tree old_arg;
7676  tree new_arg;
7677};
7678
7679/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7680   pertaining to kernel function.  */
7681
7682static tree
7683grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7684{
7685  struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7686  struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7687  tree t = *tp;
7688
7689  if (t == adm->old_arg)
7690    *tp = adm->new_arg;
7691  *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7692  return NULL_TREE;
7693}
7694
7695/* If TARGET region contains a kernel body for loop, remove its region from the
7696   TARGET and expand it in HSA gridified kernel fashion.  */
7697
7698static void
7699grid_expand_target_grid_body (struct omp_region *target)
7700{
7701  if (!hsa_gen_requested_p ())
7702    return;
7703
7704  gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7705  struct omp_region **pp;
7706
7707  for (pp = &target->inner; *pp; pp = &(*pp)->next)
7708    if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7709      break;
7710
7711  struct omp_region *gpukernel = *pp;
7712
7713  tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7714  if (!gpukernel)
7715    {
7716      /* HSA cannot handle OACC stuff.  */
7717      if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7718	return;
7719      gcc_checking_assert (orig_child_fndecl);
7720      gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7721				    OMP_CLAUSE__GRIDDIM_));
7722      cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7723
7724      hsa_register_kernel (n);
7725      return;
7726    }
7727
7728  gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7729			       OMP_CLAUSE__GRIDDIM_));
7730  tree inside_block
7731    = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7732  *pp = gpukernel->next;
7733  for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7734    if ((*pp)->type == GIMPLE_OMP_FOR)
7735      break;
7736
7737  struct omp_region *kfor = *pp;
7738  gcc_assert (kfor);
7739  gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7740  gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7741  *pp = kfor->next;
7742  if (kfor->inner)
7743    {
7744      if (gimple_omp_for_grid_group_iter (for_stmt))
7745	{
7746	  struct omp_region **next_pp;
7747	  for (pp = &kfor->inner; *pp; pp = next_pp)
7748	    {
7749	      next_pp = &(*pp)->next;
7750	      if ((*pp)->type != GIMPLE_OMP_FOR)
7751		continue;
7752	      gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7753	      gcc_assert (gimple_omp_for_kind (inner)
7754			  == GF_OMP_FOR_KIND_GRID_LOOP);
7755	      grid_expand_omp_for_loop (*pp, true);
7756	      *pp = (*pp)->next;
7757	      next_pp = pp;
7758	    }
7759	}
7760      expand_omp (kfor->inner);
7761    }
7762  if (gpukernel->inner)
7763    expand_omp (gpukernel->inner);
7764
7765  tree kern_fndecl = copy_node (orig_child_fndecl);
7766  DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7767  SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7768  tree tgtblock = gimple_block (tgt_stmt);
7769  tree fniniblock = make_node (BLOCK);
7770  BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7771  BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7772  BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7773  BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7774  DECL_INITIAL (kern_fndecl) = fniniblock;
7775  push_struct_function (kern_fndecl);
7776  cfun->function_end_locus = gimple_location (tgt_stmt);
7777  init_tree_ssa (cfun);
7778  pop_cfun ();
7779
7780  /* Make sure to generate early debug for the function before
7781     outlining anything.  */
7782  if (! gimple_in_ssa_p (cfun))
7783    (*debug_hooks->early_global_decl) (cfun->decl);
7784
7785  tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7786  gcc_assert (!DECL_CHAIN (old_parm_decl));
7787  tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7788  DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7789  DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7790  gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7791  DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7792  DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7793  struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7794  kern_cfun->curr_properties = cfun->curr_properties;
7795
7796  grid_expand_omp_for_loop (kfor, false);
7797
7798  /* Remove the omp for statement.  */
7799  gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
7800  gsi_remove (&gsi, true);
7801  /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7802     return.  */
7803  gsi = gsi_last_bb (gpukernel->exit);
7804  gcc_assert (!gsi_end_p (gsi)
7805	      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7806  gimple *ret_stmt = gimple_build_return (NULL);
7807  gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7808  gsi_remove (&gsi, true);
7809
7810  /* Statements in the first BB in the target construct have been produced by
7811     target lowering and must be copied inside the GPUKERNEL, with the two
7812     exceptions of the first OMP statement and the OMP_DATA assignment
7813     statement.  */
7814  gsi = gsi_start_bb (single_succ (gpukernel->entry));
7815  tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7816  tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7817  for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7818       !gsi_end_p (tsi); gsi_next (&tsi))
7819    {
7820      gimple *stmt = gsi_stmt (tsi);
7821      if (is_gimple_omp (stmt))
7822	break;
7823      if (sender
7824	  && is_gimple_assign (stmt)
7825	  && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7826	  && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7827	continue;
7828      gimple *copy = gimple_copy (stmt);
7829      gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7830      gimple_set_block (copy, fniniblock);
7831    }
7832
7833  move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7834			  gpukernel->exit, inside_block);
7835
7836  cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7837  kcn->mark_force_output ();
7838  cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7839
7840  hsa_register_kernel (kcn, orig_child);
7841
7842  cgraph_node::add_new_function (kern_fndecl, true);
7843  push_cfun (kern_cfun);
7844  cgraph_edge::rebuild_edges ();
7845
7846  /* Re-map any mention of the PARM_DECL of the original function to the
7847     PARM_DECL of the new one.
7848
7849     TODO: It would be great if lowering produced references into the GPU
7850     kernel decl straight away and we did not have to do this.  */
7851  struct grid_arg_decl_map adm;
7852  adm.old_arg = old_parm_decl;
7853  adm.new_arg = new_parm_decl;
7854  basic_block bb;
7855  FOR_EACH_BB_FN (bb, kern_cfun)
7856    {
7857      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7858	{
7859	  gimple *stmt = gsi_stmt (gsi);
7860	  struct walk_stmt_info wi;
7861	  memset (&wi, 0, sizeof (wi));
7862	  wi.info = &adm;
7863	  walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7864	}
7865    }
7866  pop_cfun ();
7867
7868  return;
7869}
7870
7871/* Expand the parallel region tree rooted at REGION.  Expansion
7872   proceeds in depth-first order.  Innermost regions are expanded
7873   first.  This way, parallel regions that require a new function to
7874   be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7875   internal dependencies in their body.  */
7876
7877static void
7878expand_omp (struct omp_region *region)
7879{
7880  omp_any_child_fn_dumped = false;
7881  while (region)
7882    {
7883      location_t saved_location;
7884      gimple *inner_stmt = NULL;
7885
7886      /* First, determine whether this is a combined parallel+workshare
7887	 region.  */
7888      if (region->type == GIMPLE_OMP_PARALLEL)
7889	determine_parallel_type (region);
7890      else if (region->type == GIMPLE_OMP_TARGET)
7891	grid_expand_target_grid_body (region);
7892
7893      if (region->type == GIMPLE_OMP_FOR
7894	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
7895	inner_stmt = last_stmt (region->inner->entry);
7896
7897      if (region->inner)
7898	expand_omp (region->inner);
7899
7900      saved_location = input_location;
7901      if (gimple_has_location (last_stmt (region->entry)))
7902	input_location = gimple_location (last_stmt (region->entry));
7903
7904      switch (region->type)
7905	{
7906	case GIMPLE_OMP_PARALLEL:
7907	case GIMPLE_OMP_TASK:
7908	  expand_omp_taskreg (region);
7909	  break;
7910
7911	case GIMPLE_OMP_FOR:
7912	  expand_omp_for (region, inner_stmt);
7913	  break;
7914
7915	case GIMPLE_OMP_SECTIONS:
7916	  expand_omp_sections (region);
7917	  break;
7918
7919	case GIMPLE_OMP_SECTION:
7920	  /* Individual omp sections are handled together with their
7921	     parent GIMPLE_OMP_SECTIONS region.  */
7922	  break;
7923
7924	case GIMPLE_OMP_SINGLE:
7925	  expand_omp_single (region);
7926	  break;
7927
7928	case GIMPLE_OMP_ORDERED:
7929	  {
7930	    gomp_ordered *ord_stmt
7931	      = as_a <gomp_ordered *> (last_stmt (region->entry));
7932	    if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7933				 OMP_CLAUSE_DEPEND))
7934	      {
7935		/* We'll expand these when expanding corresponding
7936		   worksharing region with ordered(n) clause.  */
7937		gcc_assert (region->outer
7938			    && region->outer->type == GIMPLE_OMP_FOR);
7939		region->ord_stmt = ord_stmt;
7940		break;
7941	      }
7942	  }
7943	  /* FALLTHRU */
7944	case GIMPLE_OMP_MASTER:
7945	case GIMPLE_OMP_TASKGROUP:
7946	case GIMPLE_OMP_CRITICAL:
7947	case GIMPLE_OMP_TEAMS:
7948	  expand_omp_synch (region);
7949	  break;
7950
7951	case GIMPLE_OMP_ATOMIC_LOAD:
7952	  expand_omp_atomic (region);
7953	  break;
7954
7955	case GIMPLE_OMP_TARGET:
7956	  expand_omp_target (region);
7957	  break;
7958
7959	default:
7960	  gcc_unreachable ();
7961	}
7962
7963      input_location = saved_location;
7964      region = region->next;
7965    }
7966  if (omp_any_child_fn_dumped)
7967    {
7968      if (dump_file)
7969	dump_function_header (dump_file, current_function_decl, dump_flags);
7970      omp_any_child_fn_dumped = false;
7971    }
7972}
7973
7974/* Helper for build_omp_regions.  Scan the dominator tree starting at
7975   block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
7976   true, the function ends once a single tree is built (otherwise, whole
7977   forest of OMP constructs may be built).  */
7978
7979static void
7980build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7981		     bool single_tree)
7982{
7983  gimple_stmt_iterator gsi;
7984  gimple *stmt;
7985  basic_block son;
7986
7987  gsi = gsi_last_bb (bb);
7988  if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7989    {
7990      struct omp_region *region;
7991      enum gimple_code code;
7992
7993      stmt = gsi_stmt (gsi);
7994      code = gimple_code (stmt);
7995      if (code == GIMPLE_OMP_RETURN)
7996	{
7997	  /* STMT is the return point out of region PARENT.  Mark it
7998	     as the exit point and make PARENT the immediately
7999	     enclosing region.  */
8000	  gcc_assert (parent);
8001	  region = parent;
8002	  region->exit = bb;
8003	  parent = parent->outer;
8004	}
8005      else if (code == GIMPLE_OMP_ATOMIC_STORE)
8006	{
8007	  /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8008	     GIMPLE_OMP_RETURN, but matches with
8009	     GIMPLE_OMP_ATOMIC_LOAD.  */
8010	  gcc_assert (parent);
8011	  gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8012	  region = parent;
8013	  region->exit = bb;
8014	  parent = parent->outer;
8015	}
8016      else if (code == GIMPLE_OMP_CONTINUE)
8017	{
8018	  gcc_assert (parent);
8019	  parent->cont = bb;
8020	}
8021      else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8022	{
8023	  /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8024	     GIMPLE_OMP_SECTIONS, and we do nothing for it.  */
8025	}
8026      else
8027	{
8028	  region = new_omp_region (bb, code, parent);
8029	  /* Otherwise...  */
8030	  if (code == GIMPLE_OMP_TARGET)
8031	    {
8032	      switch (gimple_omp_target_kind (stmt))
8033		{
8034		case GF_OMP_TARGET_KIND_REGION:
8035		case GF_OMP_TARGET_KIND_DATA:
8036		case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8037		case GF_OMP_TARGET_KIND_OACC_KERNELS:
8038		case GF_OMP_TARGET_KIND_OACC_DATA:
8039		case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8040		  break;
8041		case GF_OMP_TARGET_KIND_UPDATE:
8042		case GF_OMP_TARGET_KIND_ENTER_DATA:
8043		case GF_OMP_TARGET_KIND_EXIT_DATA:
8044		case GF_OMP_TARGET_KIND_OACC_UPDATE:
8045		case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8046		case GF_OMP_TARGET_KIND_OACC_DECLARE:
8047		  /* ..., other than for those stand-alone directives...  */
8048		  region = NULL;
8049		  break;
8050		default:
8051		  gcc_unreachable ();
8052		}
8053	    }
8054	  else if (code == GIMPLE_OMP_ORDERED
8055		   && omp_find_clause (gimple_omp_ordered_clauses
8056					 (as_a <gomp_ordered *> (stmt)),
8057				       OMP_CLAUSE_DEPEND))
8058	    /* #pragma omp ordered depend is also just a stand-alone
8059	       directive.  */
8060	    region = NULL;
8061	  /* ..., this directive becomes the parent for a new region.  */
8062	  if (region)
8063	    parent = region;
8064	}
8065    }
8066
8067  if (single_tree && !parent)
8068    return;
8069
8070  for (son = first_dom_son (CDI_DOMINATORS, bb);
8071       son;
8072       son = next_dom_son (CDI_DOMINATORS, son))
8073    build_omp_regions_1 (son, parent, single_tree);
8074}
8075
8076/* Builds the tree of OMP regions rooted at ROOT, storing it to
8077   root_omp_region.  */
8078
8079static void
8080build_omp_regions_root (basic_block root)
8081{
8082  gcc_assert (root_omp_region == NULL);
8083  build_omp_regions_1 (root, NULL, true);
8084  gcc_assert (root_omp_region != NULL);
8085}
8086
8087/* Expands omp construct (and its subconstructs) starting in HEAD.  */
8088
8089void
8090omp_expand_local (basic_block head)
8091{
8092  build_omp_regions_root (head);
8093  if (dump_file && (dump_flags & TDF_DETAILS))
8094    {
8095      fprintf (dump_file, "\nOMP region tree\n\n");
8096      dump_omp_region (dump_file, root_omp_region, 0);
8097      fprintf (dump_file, "\n");
8098    }
8099
8100  remove_exit_barriers (root_omp_region);
8101  expand_omp (root_omp_region);
8102
8103  omp_free_regions ();
8104}
8105
8106/* Scan the CFG and build a tree of OMP regions.  Return the root of
8107   the OMP region tree.  */
8108
8109static void
8110build_omp_regions (void)
8111{
8112  gcc_assert (root_omp_region == NULL);
8113  calculate_dominance_info (CDI_DOMINATORS);
8114  build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8115}
8116
8117/* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
8118
8119static unsigned int
8120execute_expand_omp (void)
8121{
8122  build_omp_regions ();
8123
8124  if (!root_omp_region)
8125    return 0;
8126
8127  if (dump_file)
8128    {
8129      fprintf (dump_file, "\nOMP region tree\n\n");
8130      dump_omp_region (dump_file, root_omp_region, 0);
8131      fprintf (dump_file, "\n");
8132    }
8133
8134  remove_exit_barriers (root_omp_region);
8135
8136  expand_omp (root_omp_region);
8137
8138  if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8139    verify_loop_structure ();
8140  cleanup_tree_cfg ();
8141
8142  omp_free_regions ();
8143
8144  return 0;
8145}
8146
8147/* OMP expansion -- the default pass, run before creation of SSA form.  */
8148
8149namespace {
8150
8151const pass_data pass_data_expand_omp =
8152{
8153  GIMPLE_PASS, /* type */
8154  "ompexp", /* name */
8155  OPTGROUP_OMP, /* optinfo_flags */
8156  TV_NONE, /* tv_id */
8157  PROP_gimple_any, /* properties_required */
8158  PROP_gimple_eomp, /* properties_provided */
8159  0, /* properties_destroyed */
8160  0, /* todo_flags_start */
8161  0, /* todo_flags_finish */
8162};
8163
8164class pass_expand_omp : public gimple_opt_pass
8165{
8166public:
8167  pass_expand_omp (gcc::context *ctxt)
8168    : gimple_opt_pass (pass_data_expand_omp, ctxt)
8169  {}
8170
8171  /* opt_pass methods: */
8172  virtual unsigned int execute (function *)
8173    {
8174      bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
8175		    || flag_openmp_simd != 0)
8176		   && !seen_error ());
8177
8178      /* This pass always runs, to provide PROP_gimple_eomp.
8179	 But often, there is nothing to do.  */
8180      if (!gate)
8181	return 0;
8182
8183      return execute_expand_omp ();
8184    }
8185
8186}; // class pass_expand_omp
8187
8188} // anon namespace
8189
8190gimple_opt_pass *
8191make_pass_expand_omp (gcc::context *ctxt)
8192{
8193  return new pass_expand_omp (ctxt);
8194}
8195
8196namespace {
8197
8198const pass_data pass_data_expand_omp_ssa =
8199{
8200  GIMPLE_PASS, /* type */
8201  "ompexpssa", /* name */
8202  OPTGROUP_OMP, /* optinfo_flags */
8203  TV_NONE, /* tv_id */
8204  PROP_cfg | PROP_ssa, /* properties_required */
8205  PROP_gimple_eomp, /* properties_provided */
8206  0, /* properties_destroyed */
8207  0, /* todo_flags_start */
8208  TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8209};
8210
8211class pass_expand_omp_ssa : public gimple_opt_pass
8212{
8213public:
8214  pass_expand_omp_ssa (gcc::context *ctxt)
8215    : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8216  {}
8217
8218  /* opt_pass methods: */
8219  virtual bool gate (function *fun)
8220    {
8221      return !(fun->curr_properties & PROP_gimple_eomp);
8222    }
8223  virtual unsigned int execute (function *) { return execute_expand_omp (); }
8224  opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8225
8226}; // class pass_expand_omp_ssa
8227
8228} // anon namespace
8229
8230gimple_opt_pass *
8231make_pass_expand_omp_ssa (gcc::context *ctxt)
8232{
8233  return new pass_expand_omp_ssa (ctxt);
8234}
8235
8236/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8237   GIMPLE_* codes.  */
8238
8239bool
8240omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8241		       int *region_idx)
8242{
8243  gimple *last = last_stmt (bb);
8244  enum gimple_code code = gimple_code (last);
8245  struct omp_region *cur_region = *region;
8246  bool fallthru = false;
8247
8248  switch (code)
8249    {
8250    case GIMPLE_OMP_PARALLEL:
8251    case GIMPLE_OMP_TASK:
8252    case GIMPLE_OMP_FOR:
8253    case GIMPLE_OMP_SINGLE:
8254    case GIMPLE_OMP_TEAMS:
8255    case GIMPLE_OMP_MASTER:
8256    case GIMPLE_OMP_TASKGROUP:
8257    case GIMPLE_OMP_CRITICAL:
8258    case GIMPLE_OMP_SECTION:
8259    case GIMPLE_OMP_GRID_BODY:
8260      cur_region = new_omp_region (bb, code, cur_region);
8261      fallthru = true;
8262      break;
8263
8264    case GIMPLE_OMP_ORDERED:
8265      cur_region = new_omp_region (bb, code, cur_region);
8266      fallthru = true;
8267      if (omp_find_clause (gimple_omp_ordered_clauses
8268			     (as_a <gomp_ordered *> (last)),
8269			   OMP_CLAUSE_DEPEND))
8270	cur_region = cur_region->outer;
8271      break;
8272
8273    case GIMPLE_OMP_TARGET:
8274      cur_region = new_omp_region (bb, code, cur_region);
8275      fallthru = true;
8276      switch (gimple_omp_target_kind (last))
8277	{
8278	case GF_OMP_TARGET_KIND_REGION:
8279	case GF_OMP_TARGET_KIND_DATA:
8280	case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8281	case GF_OMP_TARGET_KIND_OACC_KERNELS:
8282	case GF_OMP_TARGET_KIND_OACC_DATA:
8283	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8284	  break;
8285	case GF_OMP_TARGET_KIND_UPDATE:
8286	case GF_OMP_TARGET_KIND_ENTER_DATA:
8287	case GF_OMP_TARGET_KIND_EXIT_DATA:
8288	case GF_OMP_TARGET_KIND_OACC_UPDATE:
8289	case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8290	case GF_OMP_TARGET_KIND_OACC_DECLARE:
8291	  cur_region = cur_region->outer;
8292	  break;
8293	default:
8294	  gcc_unreachable ();
8295	}
8296      break;
8297
8298    case GIMPLE_OMP_SECTIONS:
8299      cur_region = new_omp_region (bb, code, cur_region);
8300      fallthru = true;
8301      break;
8302
8303    case GIMPLE_OMP_SECTIONS_SWITCH:
8304      fallthru = false;
8305      break;
8306
8307    case GIMPLE_OMP_ATOMIC_LOAD:
8308    case GIMPLE_OMP_ATOMIC_STORE:
8309       fallthru = true;
8310       break;
8311
8312    case GIMPLE_OMP_RETURN:
8313      /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8314	 somewhere other than the next block.  This will be
8315	 created later.  */
8316      cur_region->exit = bb;
8317      if (cur_region->type == GIMPLE_OMP_TASK)
8318	/* Add an edge corresponding to not scheduling the task
8319	   immediately.  */
8320	make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8321      fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8322      cur_region = cur_region->outer;
8323      break;
8324
8325    case GIMPLE_OMP_CONTINUE:
8326      cur_region->cont = bb;
8327      switch (cur_region->type)
8328	{
8329	case GIMPLE_OMP_FOR:
8330	  /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8331	     succs edges as abnormal to prevent splitting
8332	     them.  */
8333	  single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8334	  /* Make the loopback edge.  */
8335	  make_edge (bb, single_succ (cur_region->entry),
8336		     EDGE_ABNORMAL);
8337
8338	  /* Create an edge from GIMPLE_OMP_FOR to exit, which
8339	     corresponds to the case that the body of the loop
8340	     is not executed at all.  */
8341	  make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8342	  make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8343	  fallthru = false;
8344	  break;
8345
8346	case GIMPLE_OMP_SECTIONS:
8347	  /* Wire up the edges into and out of the nested sections.  */
8348	  {
8349	    basic_block switch_bb = single_succ (cur_region->entry);
8350
8351	    struct omp_region *i;
8352	    for (i = cur_region->inner; i ; i = i->next)
8353	      {
8354		gcc_assert (i->type == GIMPLE_OMP_SECTION);
8355		make_edge (switch_bb, i->entry, 0);
8356		make_edge (i->exit, bb, EDGE_FALLTHRU);
8357	      }
8358
8359	    /* Make the loopback edge to the block with
8360	       GIMPLE_OMP_SECTIONS_SWITCH.  */
8361	    make_edge (bb, switch_bb, 0);
8362
8363	    /* Make the edge from the switch to exit.  */
8364	    make_edge (switch_bb, bb->next_bb, 0);
8365	    fallthru = false;
8366	  }
8367	  break;
8368
8369	case GIMPLE_OMP_TASK:
8370	  fallthru = true;
8371	  break;
8372
8373	default:
8374	  gcc_unreachable ();
8375	}
8376      break;
8377
8378    default:
8379      gcc_unreachable ();
8380    }
8381
8382  if (*region != cur_region)
8383    {
8384      *region = cur_region;
8385      if (cur_region)
8386	*region_idx = cur_region->entry->index;
8387      else
8388	*region_idx = 0;
8389    }
8390
8391  return fallthru;
8392}
8393
8394#include "gt-omp-expand.h"
8395