1/* Expansion pass for OMP directives.  Outlines regions of certain OMP
2   directives to separate functions, converts others into explicit calls to the
3   runtime library (libgomp) and so forth
4
5Copyright (C) 2005-2022 Free Software Foundation, Inc.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
55#include "alloc-pool.h"
56#include "symbol-summary.h"
57#include "gomp-constants.h"
58#include "gimple-pretty-print.h"
59#include "stringpool.h"
60#include "attribs.h"
61#include "tree-eh.h"
62#include "opts.h"
63
64/* OMP region information.  Every parallel and workshare
65   directive is enclosed between two markers, the OMP_* directive
66   and a corresponding GIMPLE_OMP_RETURN statement.  */
67
68struct omp_region
69{
70  /* The enclosing region.  */
71  struct omp_region *outer;
72
73  /* First child region.  */
74  struct omp_region *inner;
75
76  /* Next peer region.  */
77  struct omp_region *next;
78
79  /* Block containing the omp directive as its last stmt.  */
80  basic_block entry;
81
82  /* Block containing the GIMPLE_OMP_RETURN as its last stmt.  */
83  basic_block exit;
84
85  /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt.  */
86  basic_block cont;
87
88  /* If this is a combined parallel+workshare region, this is a list
89     of additional arguments needed by the combined parallel+workshare
90     library call.  */
91  vec<tree, va_gc> *ws_args;
92
93  /* The code for the omp directive of this region.  */
94  enum gimple_code type;
95
96  /* Schedule kind, only used for GIMPLE_OMP_FOR type regions.  */
97  enum omp_clause_schedule_kind sched_kind;
98
99  /* Schedule modifiers.  */
100  unsigned char sched_modifiers;
101
102  /* True if this is a combined parallel+workshare region.  */
103  bool is_combined_parallel;
104
105  /* Copy of fd.lastprivate_conditional != 0.  */
106  bool has_lastprivate_conditional;
107
108  /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
109     a depend clause.  */
110  gomp_ordered *ord_stmt;
111};
112
113static struct omp_region *root_omp_region;
114static bool omp_any_child_fn_dumped;
115
116static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
117				     bool = false);
118static gphi *find_phi_with_arg_on_edge (tree, edge);
119static void expand_omp (struct omp_region *region);
120
121/* Return true if REGION is a combined parallel+workshare region.  */
122
123static inline bool
124is_combined_parallel (struct omp_region *region)
125{
126  return region->is_combined_parallel;
127}
128
129/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
130   is the immediate dominator of PAR_ENTRY_BB, return true if there
131   are no data dependencies that would prevent expanding the parallel
132   directive at PAR_ENTRY_BB as a combined parallel+workshare region.
133
134   When expanding a combined parallel+workshare region, the call to
135   the child function may need additional arguments in the case of
136   GIMPLE_OMP_FOR regions.  In some cases, these arguments are
137   computed out of variables passed in from the parent to the child
138   via 'struct .omp_data_s'.  For instance:
139
140	#pragma omp parallel for schedule (guided, i * 4)
141	for (j ...)
142
143   Is lowered into:
144
145	# BLOCK 2 (PAR_ENTRY_BB)
146	.omp_data_o.i = i;
147	#pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
148
149	# BLOCK 3 (WS_ENTRY_BB)
150	.omp_data_i = &.omp_data_o;
151	D.1667 = .omp_data_i->i;
152	D.1598 = D.1667 * 4;
153	#pragma omp for schedule (guided, D.1598)
154
155   When we outline the parallel region, the call to the child function
156   'bar.omp_fn.0' will need the value D.1598 in its argument list, but
157   that value is computed *after* the call site.  So, in principle we
158   cannot do the transformation.
159
160   To see whether the code in WS_ENTRY_BB blocks the combined
161   parallel+workshare call, we collect all the variables used in the
162   GIMPLE_OMP_FOR header check whether they appear on the LHS of any
163   statement in WS_ENTRY_BB.  If so, then we cannot emit the combined
164   call.
165
166   FIXME.  If we had the SSA form built at this point, we could merely
167   hoist the code in block 3 into block 2 and be done with it.  But at
168   this point we don't have dataflow information and though we could
169   hack something up here, it is really not worth the aggravation.  */
170
171static bool
172workshare_safe_to_combine_p (basic_block ws_entry_bb)
173{
174  struct omp_for_data fd;
175  gimple *ws_stmt = last_stmt (ws_entry_bb);
176
177  if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
178    return true;
179
180  gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
181  if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
182    return false;
183
184  omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
185
186  if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
187    return false;
188  if (fd.iter_type != long_integer_type_node)
189    return false;
190
191  /* FIXME.  We give up too easily here.  If any of these arguments
192     are not constants, they will likely involve variables that have
193     been mapped into fields of .omp_data_s for sharing with the child
194     function.  With appropriate data flow, it would be possible to
195     see through this.  */
196  if (!is_gimple_min_invariant (fd.loop.n1)
197      || !is_gimple_min_invariant (fd.loop.n2)
198      || !is_gimple_min_invariant (fd.loop.step)
199      || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
200    return false;
201
202  return true;
203}
204
205/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
206   presence (SIMD_SCHEDULE).  */
207
208static tree
209omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
210{
211  if (!simd_schedule || integer_zerop (chunk_size))
212    return chunk_size;
213
214  poly_uint64 vf = omp_max_vf ();
215  if (known_eq (vf, 1U))
216    return chunk_size;
217
218  tree type = TREE_TYPE (chunk_size);
219  chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
220			    build_int_cst (type, vf - 1));
221  return fold_build2 (BIT_AND_EXPR, type, chunk_size,
222		      build_int_cst (type, -vf));
223}
224
225/* Collect additional arguments needed to emit a combined
226   parallel+workshare call.  WS_STMT is the workshare directive being
227   expanded.  */
228
229static vec<tree, va_gc> *
230get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
231{
232  tree t;
233  location_t loc = gimple_location (ws_stmt);
234  vec<tree, va_gc> *ws_args;
235
236  if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
237    {
238      struct omp_for_data fd;
239      tree n1, n2;
240
241      omp_extract_for_data (for_stmt, &fd, NULL);
242      n1 = fd.loop.n1;
243      n2 = fd.loop.n2;
244
245      if (gimple_omp_for_combined_into_p (for_stmt))
246	{
247	  tree innerc
248	    = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
249			       OMP_CLAUSE__LOOPTEMP_);
250	  gcc_assert (innerc);
251	  n1 = OMP_CLAUSE_DECL (innerc);
252	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
253				    OMP_CLAUSE__LOOPTEMP_);
254	  gcc_assert (innerc);
255	  n2 = OMP_CLAUSE_DECL (innerc);
256	}
257
258      vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
259
260      t = fold_convert_loc (loc, long_integer_type_node, n1);
261      ws_args->quick_push (t);
262
263      t = fold_convert_loc (loc, long_integer_type_node, n2);
264      ws_args->quick_push (t);
265
266      t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
267      ws_args->quick_push (t);
268
269      if (fd.chunk_size)
270	{
271	  t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
272	  t = omp_adjust_chunk_size (t, fd.simd_schedule);
273	  ws_args->quick_push (t);
274	}
275
276      return ws_args;
277    }
278  else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
279    {
280      /* Number of sections is equal to the number of edges from the
281	 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
282	 the exit of the sections region.  */
283      basic_block bb = single_succ (gimple_bb (ws_stmt));
284      t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
285      vec_alloc (ws_args, 1);
286      ws_args->quick_push (t);
287      return ws_args;
288    }
289
290  gcc_unreachable ();
291}
292
293/* Discover whether REGION is a combined parallel+workshare region.  */
294
295static void
296determine_parallel_type (struct omp_region *region)
297{
298  basic_block par_entry_bb, par_exit_bb;
299  basic_block ws_entry_bb, ws_exit_bb;
300
301  if (region == NULL || region->inner == NULL
302      || region->exit == NULL || region->inner->exit == NULL
303      || region->inner->cont == NULL)
304    return;
305
306  /* We only support parallel+for and parallel+sections.  */
307  if (region->type != GIMPLE_OMP_PARALLEL
308      || (region->inner->type != GIMPLE_OMP_FOR
309	  && region->inner->type != GIMPLE_OMP_SECTIONS))
310    return;
311
312  /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
313     WS_EXIT_BB -> PAR_EXIT_BB.  */
314  par_entry_bb = region->entry;
315  par_exit_bb = region->exit;
316  ws_entry_bb = region->inner->entry;
317  ws_exit_bb = region->inner->exit;
318
319  /* Give up for task reductions on the parallel, while it is implementable,
320     adding another big set of APIs or slowing down the normal paths is
321     not acceptable.  */
322  tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
323  if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
324    return;
325
326  if (single_succ (par_entry_bb) == ws_entry_bb
327      && single_succ (ws_exit_bb) == par_exit_bb
328      && workshare_safe_to_combine_p (ws_entry_bb)
329      && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
330	  || (last_and_only_stmt (ws_entry_bb)
331	      && last_and_only_stmt (par_exit_bb))))
332    {
333      gimple *par_stmt = last_stmt (par_entry_bb);
334      gimple *ws_stmt = last_stmt (ws_entry_bb);
335
336      if (region->inner->type == GIMPLE_OMP_FOR)
337	{
338	  /* If this is a combined parallel loop, we need to determine
339	     whether or not to use the combined library calls.  There
340	     are two cases where we do not apply the transformation:
341	     static loops and any kind of ordered loop.  In the first
342	     case, we already open code the loop so there is no need
343	     to do anything else.  In the latter case, the combined
344	     parallel loop call would still need extra synchronization
345	     to implement ordered semantics, so there would not be any
346	     gain in using the combined call.  */
347	  tree clauses = gimple_omp_for_clauses (ws_stmt);
348	  tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
349	  if (c == NULL
350	      || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
351		  == OMP_CLAUSE_SCHEDULE_STATIC)
352	      || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
353	      || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
354	      || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
355		  && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
356	    return;
357	}
358      else if (region->inner->type == GIMPLE_OMP_SECTIONS
359	       && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
360				    OMP_CLAUSE__REDUCTEMP_)
361		   || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
362				       OMP_CLAUSE__CONDTEMP_)))
363	return;
364
365      region->is_combined_parallel = true;
366      region->inner->is_combined_parallel = true;
367      region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
368    }
369}
370
371/* Debugging dumps for parallel regions.  */
372void dump_omp_region (FILE *, struct omp_region *, int);
373void debug_omp_region (struct omp_region *);
374void debug_all_omp_regions (void);
375
376/* Dump the parallel region tree rooted at REGION.  */
377
378void
379dump_omp_region (FILE *file, struct omp_region *region, int indent)
380{
381  fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
382	   gimple_code_name[region->type]);
383
384  if (region->inner)
385    dump_omp_region (file, region->inner, indent + 4);
386
387  if (region->cont)
388    {
389      fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
390	       region->cont->index);
391    }
392
393  if (region->exit)
394    fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
395	     region->exit->index);
396  else
397    fprintf (file, "%*s[no exit marker]\n", indent, "");
398
399  if (region->next)
400    dump_omp_region (file, region->next, indent);
401}
402
403DEBUG_FUNCTION void
404debug_omp_region (struct omp_region *region)
405{
406  dump_omp_region (stderr, region, 0);
407}
408
409DEBUG_FUNCTION void
410debug_all_omp_regions (void)
411{
412  dump_omp_region (stderr, root_omp_region, 0);
413}
414
415/* Create a new parallel region starting at STMT inside region PARENT.  */
416
417static struct omp_region *
418new_omp_region (basic_block bb, enum gimple_code type,
419		struct omp_region *parent)
420{
421  struct omp_region *region = XCNEW (struct omp_region);
422
423  region->outer = parent;
424  region->entry = bb;
425  region->type = type;
426
427  if (parent)
428    {
429      /* This is a nested region.  Add it to the list of inner
430	 regions in PARENT.  */
431      region->next = parent->inner;
432      parent->inner = region;
433    }
434  else
435    {
436      /* This is a toplevel region.  Add it to the list of toplevel
437	 regions in ROOT_OMP_REGION.  */
438      region->next = root_omp_region;
439      root_omp_region = region;
440    }
441
442  return region;
443}
444
445/* Release the memory associated with the region tree rooted at REGION.  */
446
447static void
448free_omp_region_1 (struct omp_region *region)
449{
450  struct omp_region *i, *n;
451
452  for (i = region->inner; i ; i = n)
453    {
454      n = i->next;
455      free_omp_region_1 (i);
456    }
457
458  free (region);
459}
460
461/* Release the memory for the entire omp region tree.  */
462
463void
464omp_free_regions (void)
465{
466  struct omp_region *r, *n;
467  for (r = root_omp_region; r ; r = n)
468    {
469      n = r->next;
470      free_omp_region_1 (r);
471    }
472  root_omp_region = NULL;
473}
474
475/* A convenience function to build an empty GIMPLE_COND with just the
476   condition.  */
477
478static gcond *
479gimple_build_cond_empty (tree cond)
480{
481  enum tree_code pred_code;
482  tree lhs, rhs;
483
484  gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
485  return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
486}
487
488/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
489   Add CHILD_FNDECL to decl chain of the supercontext of the block
490   ENTRY_BLOCK - this is the block which originally contained the
491   code from which CHILD_FNDECL was created.
492
493   Together, these actions ensure that the debug info for the outlined
494   function will be emitted with the correct lexical scope.  */
495
496static void
497adjust_context_and_scope (struct omp_region *region, tree entry_block,
498			  tree child_fndecl)
499{
500  tree parent_fndecl = NULL_TREE;
501  gimple *entry_stmt;
502  /* OMP expansion expands inner regions before outer ones, so if
503     we e.g. have explicit task region nested in parallel region, when
504     expanding the task region current_function_decl will be the original
505     source function, but we actually want to use as context the child
506     function of the parallel.  */
507  for (region = region->outer;
508       region && parent_fndecl == NULL_TREE; region = region->outer)
509    switch (region->type)
510      {
511      case GIMPLE_OMP_PARALLEL:
512      case GIMPLE_OMP_TASK:
513      case GIMPLE_OMP_TEAMS:
514	entry_stmt = last_stmt (region->entry);
515	parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
516	break;
517      case GIMPLE_OMP_TARGET:
518	entry_stmt = last_stmt (region->entry);
519	parent_fndecl
520	  = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
521	break;
522      default:
523	break;
524      }
525
526  if (parent_fndecl == NULL_TREE)
527    parent_fndecl = current_function_decl;
528  DECL_CONTEXT (child_fndecl) = parent_fndecl;
529
530  if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
531    {
532      tree b = BLOCK_SUPERCONTEXT (entry_block);
533      if (TREE_CODE (b) == BLOCK)
534        {
535	  DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
536	  BLOCK_VARS (b) = child_fndecl;
537	}
538    }
539}
540
541/* Build the function calls to GOMP_parallel etc to actually
542   generate the parallel operation.  REGION is the parallel region
543   being expanded.  BB is the block where to insert the code.  WS_ARGS
544   will be set if this is a call to a combined parallel+workshare
545   construct, it contains the list of additional arguments needed by
546   the workshare construct.  */
547
548static void
549expand_parallel_call (struct omp_region *region, basic_block bb,
550		      gomp_parallel *entry_stmt,
551		      vec<tree, va_gc> *ws_args)
552{
553  tree t, t1, t2, val, cond, c, clauses, flags;
554  gimple_stmt_iterator gsi;
555  gimple *stmt;
556  enum built_in_function start_ix;
557  int start_ix2;
558  location_t clause_loc;
559  vec<tree, va_gc> *args;
560
561  clauses = gimple_omp_parallel_clauses (entry_stmt);
562
563  /* Determine what flavor of GOMP_parallel we will be
564     emitting.  */
565  start_ix = BUILT_IN_GOMP_PARALLEL;
566  tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
567  if (rtmp)
568    start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
569  else if (is_combined_parallel (region))
570    {
571      switch (region->inner->type)
572	{
573	case GIMPLE_OMP_FOR:
574	  gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
575	  switch (region->inner->sched_kind)
576	    {
577	    case OMP_CLAUSE_SCHEDULE_RUNTIME:
578	      /* For lastprivate(conditional:), our implementation
579		 requires monotonic behavior.  */
580	      if (region->inner->has_lastprivate_conditional != 0)
581		start_ix2 = 3;
582	      else if ((region->inner->sched_modifiers
583		       & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
584		start_ix2 = 6;
585	      else if ((region->inner->sched_modifiers
586			& OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
587		start_ix2 = 7;
588	      else
589		start_ix2 = 3;
590	      break;
591	    case OMP_CLAUSE_SCHEDULE_DYNAMIC:
592	    case OMP_CLAUSE_SCHEDULE_GUIDED:
593	      if ((region->inner->sched_modifiers
594		   & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
595		  && !region->inner->has_lastprivate_conditional)
596		{
597		  start_ix2 = 3 + region->inner->sched_kind;
598		  break;
599		}
600	      /* FALLTHRU */
601	    default:
602	      start_ix2 = region->inner->sched_kind;
603	      break;
604	    }
605	  start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
606	  start_ix = (enum built_in_function) start_ix2;
607	  break;
608	case GIMPLE_OMP_SECTIONS:
609	  start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
610	  break;
611	default:
612	  gcc_unreachable ();
613	}
614    }
615
616  /* By default, the value of NUM_THREADS is zero (selected at run time)
617     and there is no conditional.  */
618  cond = NULL_TREE;
619  val = build_int_cst (unsigned_type_node, 0);
620  flags = build_int_cst (unsigned_type_node, 0);
621
622  c = omp_find_clause (clauses, OMP_CLAUSE_IF);
623  if (c)
624    cond = OMP_CLAUSE_IF_EXPR (c);
625
626  c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
627  if (c)
628    {
629      val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
630      clause_loc = OMP_CLAUSE_LOCATION (c);
631    }
632  else
633    clause_loc = gimple_location (entry_stmt);
634
635  c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
636  if (c)
637    flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
638
639  /* Ensure 'val' is of the correct type.  */
640  val = fold_convert_loc (clause_loc, unsigned_type_node, val);
641
642  /* If we found the clause 'if (cond)', build either
643     (cond != 0) or (cond ? val : 1u).  */
644  if (cond)
645    {
646      cond = gimple_boolify (cond);
647
648      if (integer_zerop (val))
649	val = fold_build2_loc (clause_loc,
650			   EQ_EXPR, unsigned_type_node, cond,
651			   build_int_cst (TREE_TYPE (cond), 0));
652      else
653	{
654	  basic_block cond_bb, then_bb, else_bb;
655	  edge e, e_then, e_else;
656	  tree tmp_then, tmp_else, tmp_join, tmp_var;
657
658	  tmp_var = create_tmp_var (TREE_TYPE (val));
659	  if (gimple_in_ssa_p (cfun))
660	    {
661	      tmp_then = make_ssa_name (tmp_var);
662	      tmp_else = make_ssa_name (tmp_var);
663	      tmp_join = make_ssa_name (tmp_var);
664	    }
665	  else
666	    {
667	      tmp_then = tmp_var;
668	      tmp_else = tmp_var;
669	      tmp_join = tmp_var;
670	    }
671
672	  e = split_block_after_labels (bb);
673	  cond_bb = e->src;
674	  bb = e->dest;
675	  remove_edge (e);
676
677	  then_bb = create_empty_bb (cond_bb);
678	  else_bb = create_empty_bb (then_bb);
679	  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
680	  set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
681
682	  stmt = gimple_build_cond_empty (cond);
683	  gsi = gsi_start_bb (cond_bb);
684	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
685
686	  gsi = gsi_start_bb (then_bb);
687	  expand_omp_build_assign (&gsi, tmp_then, val, true);
688
689	  gsi = gsi_start_bb (else_bb);
690	  expand_omp_build_assign (&gsi, tmp_else,
691				   build_int_cst (unsigned_type_node, 1),
692				   true);
693
694	  make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
695	  make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
696	  add_bb_to_loop (then_bb, cond_bb->loop_father);
697	  add_bb_to_loop (else_bb, cond_bb->loop_father);
698	  e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
699	  e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
700
701	  if (gimple_in_ssa_p (cfun))
702	    {
703	      gphi *phi = create_phi_node (tmp_join, bb);
704	      add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
705	      add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
706	    }
707
708	  val = tmp_join;
709	}
710
711      gsi = gsi_start_bb (bb);
712      val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
713				      false, GSI_CONTINUE_LINKING);
714    }
715
716  gsi = gsi_last_nondebug_bb (bb);
717  t = gimple_omp_parallel_data_arg (entry_stmt);
718  if (t == NULL)
719    t1 = null_pointer_node;
720  else
721    t1 = build_fold_addr_expr (t);
722  tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
723  t2 = build_fold_addr_expr (child_fndecl);
724
725  vec_alloc (args, 4 + vec_safe_length (ws_args));
726  args->quick_push (t2);
727  args->quick_push (t1);
728  args->quick_push (val);
729  if (ws_args)
730    args->splice (*ws_args);
731  args->quick_push (flags);
732
733  t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
734			       builtin_decl_explicit (start_ix), args);
735
736  if (rtmp)
737    {
738      tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
739      t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
740		  fold_convert (type,
741				fold_convert (pointer_sized_int_node, t)));
742    }
743  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
744			    false, GSI_CONTINUE_LINKING);
745}
746
747/* Build the function call to GOMP_task to actually
748   generate the task operation.  BB is the block where to insert the code.  */
749
750static void
751expand_task_call (struct omp_region *region, basic_block bb,
752		  gomp_task *entry_stmt)
753{
754  tree t1, t2, t3;
755  gimple_stmt_iterator gsi;
756  location_t loc = gimple_location (entry_stmt);
757
758  tree clauses = gimple_omp_task_clauses (entry_stmt);
759
760  tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
761  tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
762  tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
763  tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
764  tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
765  tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
766  tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
767
768  unsigned int iflags
769    = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
770      | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
771      | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
772
773  bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
774  tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
775  tree num_tasks = NULL_TREE;
776  bool ull = false;
777  if (taskloop_p)
778    {
779      gimple *g = last_stmt (region->outer->entry);
780      gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
781		  && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
782      struct omp_for_data fd;
783      omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
784      startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
785      endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
786				OMP_CLAUSE__LOOPTEMP_);
787      startvar = OMP_CLAUSE_DECL (startvar);
788      endvar = OMP_CLAUSE_DECL (endvar);
789      step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
790      if (fd.loop.cond_code == LT_EXPR)
791	iflags |= GOMP_TASK_FLAG_UP;
792      tree tclauses = gimple_omp_for_clauses (g);
793      num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
794      if (num_tasks)
795	{
796	  if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
797	    iflags |= GOMP_TASK_FLAG_STRICT;
798	  num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
799	}
800      else
801	{
802	  num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
803	  if (num_tasks)
804	    {
805	      iflags |= GOMP_TASK_FLAG_GRAINSIZE;
806	      if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
807		iflags |= GOMP_TASK_FLAG_STRICT;
808	      num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
809	    }
810	  else
811	    num_tasks = integer_zero_node;
812	}
813      num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
814      if (ifc == NULL_TREE)
815	iflags |= GOMP_TASK_FLAG_IF;
816      if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
817	iflags |= GOMP_TASK_FLAG_NOGROUP;
818      ull = fd.iter_type == long_long_unsigned_type_node;
819      if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
820	iflags |= GOMP_TASK_FLAG_REDUCTION;
821    }
822  else
823    {
824      if (priority)
825	iflags |= GOMP_TASK_FLAG_PRIORITY;
826      if (detach)
827	iflags |= GOMP_TASK_FLAG_DETACH;
828    }
829
830  tree flags = build_int_cst (unsigned_type_node, iflags);
831
832  tree cond = boolean_true_node;
833  if (ifc)
834    {
835      if (taskloop_p)
836	{
837	  tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
838	  t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
839			       build_int_cst (unsigned_type_node,
840					      GOMP_TASK_FLAG_IF),
841			       build_int_cst (unsigned_type_node, 0));
842	  flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
843				   flags, t);
844	}
845      else
846	cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
847    }
848
849  if (finalc)
850    {
851      tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
852      t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
853			   build_int_cst (unsigned_type_node,
854					  GOMP_TASK_FLAG_FINAL),
855			   build_int_cst (unsigned_type_node, 0));
856      flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
857    }
858  if (depend)
859    depend = OMP_CLAUSE_DECL (depend);
860  else
861    depend = build_int_cst (ptr_type_node, 0);
862  if (priority)
863    priority = fold_convert (integer_type_node,
864			     OMP_CLAUSE_PRIORITY_EXPR (priority));
865  else
866    priority = integer_zero_node;
867
868  gsi = gsi_last_nondebug_bb (bb);
869
870  detach = (detach
871	    ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
872	    : null_pointer_node);
873
874  tree t = gimple_omp_task_data_arg (entry_stmt);
875  if (t == NULL)
876    t2 = null_pointer_node;
877  else
878    t2 = build_fold_addr_expr_loc (loc, t);
879  t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
880  t = gimple_omp_task_copy_fn (entry_stmt);
881  if (t == NULL)
882    t3 = null_pointer_node;
883  else
884    t3 = build_fold_addr_expr_loc (loc, t);
885
886  if (taskloop_p)
887    t = build_call_expr (ull
888			 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
889			 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
890			 11, t1, t2, t3,
891			 gimple_omp_task_arg_size (entry_stmt),
892			 gimple_omp_task_arg_align (entry_stmt), flags,
893			 num_tasks, priority, startvar, endvar, step);
894  else
895    t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
896			 10, t1, t2, t3,
897			 gimple_omp_task_arg_size (entry_stmt),
898			 gimple_omp_task_arg_align (entry_stmt), cond, flags,
899			 depend, priority, detach);
900
901  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
902			    false, GSI_CONTINUE_LINKING);
903}
904
905/* Build the function call to GOMP_taskwait_depend to actually
906   generate the taskwait operation.  BB is the block where to insert the
907   code.  */
908
909static void
910expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
911{
912  tree clauses = gimple_omp_task_clauses (entry_stmt);
913  tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
914  if (depend == NULL_TREE)
915    return;
916
917  depend = OMP_CLAUSE_DECL (depend);
918
919  gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
920  tree t
921    = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
922		       1, depend);
923
924  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
925			    false, GSI_CONTINUE_LINKING);
926}
927
928/* Build the function call to GOMP_teams_reg to actually
929   generate the host teams operation.  REGION is the teams region
930   being expanded.  BB is the block where to insert the code.  */
931
932static void
933expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
934{
935  tree clauses = gimple_omp_teams_clauses (entry_stmt);
936  tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
937  if (num_teams == NULL_TREE)
938    num_teams = build_int_cst (unsigned_type_node, 0);
939  else
940    {
941      num_teams = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (num_teams);
942      num_teams = fold_convert (unsigned_type_node, num_teams);
943    }
944  tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
945  if (thread_limit == NULL_TREE)
946    thread_limit = build_int_cst (unsigned_type_node, 0);
947  else
948    {
949      thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
950      thread_limit = fold_convert (unsigned_type_node, thread_limit);
951    }
952
953  gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
954  tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
955  if (t == NULL)
956    t1 = null_pointer_node;
957  else
958    t1 = build_fold_addr_expr (t);
959  tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
960  tree t2 = build_fold_addr_expr (child_fndecl);
961
962  vec<tree, va_gc> *args;
963  vec_alloc (args, 5);
964  args->quick_push (t2);
965  args->quick_push (t1);
966  args->quick_push (num_teams);
967  args->quick_push (thread_limit);
968  /* For future extensibility.  */
969  args->quick_push (build_zero_cst (unsigned_type_node));
970
971  t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
972			       builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
973			       args);
974
975  force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
976			    false, GSI_CONTINUE_LINKING);
977}
978
979/* Chain all the DECLs in LIST by their TREE_CHAIN fields.  */
980
981static tree
982vec2chain (vec<tree, va_gc> *v)
983{
984  tree chain = NULL_TREE, t;
985  unsigned ix;
986
987  FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
988    {
989      DECL_CHAIN (t) = chain;
990      chain = t;
991    }
992
993  return chain;
994}
995
996/* Remove barriers in REGION->EXIT's block.  Note that this is only
997   valid for GIMPLE_OMP_PARALLEL regions.  Since the end of a parallel region
998   is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
999   left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1000   removed.  */
1001
1002static void
1003remove_exit_barrier (struct omp_region *region)
1004{
1005  gimple_stmt_iterator gsi;
1006  basic_block exit_bb;
1007  edge_iterator ei;
1008  edge e;
1009  gimple *stmt;
1010  int any_addressable_vars = -1;
1011
1012  exit_bb = region->exit;
1013
1014  /* If the parallel region doesn't return, we don't have REGION->EXIT
1015     block at all.  */
1016  if (! exit_bb)
1017    return;
1018
1019  /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN.  The
1020     workshare's GIMPLE_OMP_RETURN will be in a preceding block.  The kinds of
1021     statements that can appear in between are extremely limited -- no
1022     memory operations at all.  Here, we allow nothing at all, so the
1023     only thing we allow to precede this GIMPLE_OMP_RETURN is a label.  */
1024  gsi = gsi_last_nondebug_bb (exit_bb);
1025  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1026  gsi_prev_nondebug (&gsi);
1027  if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1028    return;
1029
1030  FOR_EACH_EDGE (e, ei, exit_bb->preds)
1031    {
1032      gsi = gsi_last_nondebug_bb (e->src);
1033      if (gsi_end_p (gsi))
1034	continue;
1035      stmt = gsi_stmt (gsi);
1036      if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1037	  && !gimple_omp_return_nowait_p (stmt))
1038	{
1039	  /* OpenMP 3.0 tasks unfortunately prevent this optimization
1040	     in many cases.  If there could be tasks queued, the barrier
1041	     might be needed to let the tasks run before some local
1042	     variable of the parallel that the task uses as shared
1043	     runs out of scope.  The task can be spawned either
1044	     from within current function (this would be easy to check)
1045	     or from some function it calls and gets passed an address
1046	     of such a variable.  */
1047	  if (any_addressable_vars < 0)
1048	    {
1049	      gomp_parallel *parallel_stmt
1050		= as_a <gomp_parallel *> (last_stmt (region->entry));
1051	      tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1052	      tree local_decls, block, decl;
1053	      unsigned ix;
1054
1055	      any_addressable_vars = 0;
1056	      FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1057		if (TREE_ADDRESSABLE (decl))
1058		  {
1059		    any_addressable_vars = 1;
1060		    break;
1061		  }
1062	      for (block = gimple_block (stmt);
1063		   !any_addressable_vars
1064		   && block
1065		   && TREE_CODE (block) == BLOCK;
1066		   block = BLOCK_SUPERCONTEXT (block))
1067		{
1068		  for (local_decls = BLOCK_VARS (block);
1069		       local_decls;
1070		       local_decls = DECL_CHAIN (local_decls))
1071		    if (TREE_ADDRESSABLE (local_decls))
1072		      {
1073			any_addressable_vars = 1;
1074			break;
1075		      }
1076		  if (block == gimple_block (parallel_stmt))
1077		    break;
1078		}
1079	    }
1080	  if (!any_addressable_vars)
1081	    gimple_omp_return_set_nowait (stmt);
1082	}
1083    }
1084}
1085
1086static void
1087remove_exit_barriers (struct omp_region *region)
1088{
1089  if (region->type == GIMPLE_OMP_PARALLEL)
1090    remove_exit_barrier (region);
1091
1092  if (region->inner)
1093    {
1094      region = region->inner;
1095      remove_exit_barriers (region);
1096      while (region->next)
1097	{
1098	  region = region->next;
1099	  remove_exit_barriers (region);
1100	}
1101    }
1102}
1103
1104/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1105   calls.  These can't be declared as const functions, but
1106   within one parallel body they are constant, so they can be
1107   transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1108   which are declared const.  Similarly for task body, except
1109   that in untied task omp_get_thread_num () can change at any task
1110   scheduling point.  */
1111
1112static void
1113optimize_omp_library_calls (gimple *entry_stmt)
1114{
1115  basic_block bb;
1116  gimple_stmt_iterator gsi;
1117  tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1118  tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1119  tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1120  tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1121  bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1122		      && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1123					  OMP_CLAUSE_UNTIED) != NULL);
1124
1125  FOR_EACH_BB_FN (bb, cfun)
1126    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1127      {
1128	gimple *call = gsi_stmt (gsi);
1129	tree decl;
1130
1131	if (is_gimple_call (call)
1132	    && (decl = gimple_call_fndecl (call))
1133	    && DECL_EXTERNAL (decl)
1134	    && TREE_PUBLIC (decl)
1135	    && DECL_INITIAL (decl) == NULL)
1136	  {
1137	    tree built_in;
1138
1139	    if (DECL_NAME (decl) == thr_num_id)
1140	      {
1141		/* In #pragma omp task untied omp_get_thread_num () can change
1142		   during the execution of the task region.  */
1143		if (untied_task)
1144		  continue;
1145		built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1146	      }
1147	    else if (DECL_NAME (decl) == num_thr_id)
1148	      built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1149	    else
1150	      continue;
1151
1152	    if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1153		|| gimple_call_num_args (call) != 0)
1154	      continue;
1155
1156	    if (flag_exceptions && !TREE_NOTHROW (decl))
1157	      continue;
1158
1159	    if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1160		|| !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1161					TREE_TYPE (TREE_TYPE (built_in))))
1162	      continue;
1163
1164	    gimple_call_set_fndecl (call, built_in);
1165	  }
1166      }
1167}
1168
1169/* Callback for expand_omp_build_assign.  Return non-NULL if *tp needs to be
1170   regimplified.  */
1171
1172static tree
1173expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1174{
1175  tree t = *tp;
1176
1177  /* Any variable with DECL_VALUE_EXPR needs to be regimplified.  */
1178  if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1179    return t;
1180
1181  if (TREE_CODE (t) == ADDR_EXPR)
1182    recompute_tree_invariant_for_addr_expr (t);
1183
1184  *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1185  return NULL_TREE;
1186}
1187
1188/* Prepend or append TO = FROM assignment before or after *GSI_P.  */
1189
1190static void
1191expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1192			 bool after)
1193{
1194  bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1195  from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1196				   !after, after ? GSI_CONTINUE_LINKING
1197						 : GSI_SAME_STMT);
1198  gimple *stmt = gimple_build_assign (to, from);
1199  if (after)
1200    gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1201  else
1202    gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1203  if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1204      || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1205    {
1206      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1207      gimple_regimplify_operands (stmt, &gsi);
1208    }
1209}
1210
1211/* Prepend or append LHS CODE RHS condition before or after *GSI_P.  */
1212
1213static gcond *
1214expand_omp_build_cond (gimple_stmt_iterator *gsi_p, enum tree_code code,
1215		       tree lhs, tree rhs, bool after = false)
1216{
1217  gcond *cond_stmt = gimple_build_cond (code, lhs, rhs, NULL_TREE, NULL_TREE);
1218  if (after)
1219    gsi_insert_after (gsi_p, cond_stmt, GSI_CONTINUE_LINKING);
1220  else
1221    gsi_insert_before (gsi_p, cond_stmt, GSI_SAME_STMT);
1222  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
1223		 NULL, NULL)
1224      || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
1225		    NULL, NULL))
1226    {
1227      gimple_stmt_iterator gsi = gsi_for_stmt (cond_stmt);
1228      gimple_regimplify_operands (cond_stmt, &gsi);
1229    }
1230  return cond_stmt;
1231}
1232
1233/* Expand the OpenMP parallel or task directive starting at REGION.  */
1234
1235static void
1236expand_omp_taskreg (struct omp_region *region)
1237{
1238  basic_block entry_bb, exit_bb, new_bb;
1239  struct function *child_cfun;
1240  tree child_fn, block, t;
1241  gimple_stmt_iterator gsi;
1242  gimple *entry_stmt, *stmt;
1243  edge e;
1244  vec<tree, va_gc> *ws_args;
1245
1246  entry_stmt = last_stmt (region->entry);
1247  if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1248      && gimple_omp_task_taskwait_p (entry_stmt))
1249    {
1250      new_bb = region->entry;
1251      gsi = gsi_last_nondebug_bb (region->entry);
1252      gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1253      gsi_remove (&gsi, true);
1254      expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1255      return;
1256    }
1257
1258  child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1259  child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1260
1261  entry_bb = region->entry;
1262  if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1263    exit_bb = region->cont;
1264  else
1265    exit_bb = region->exit;
1266
1267  if (is_combined_parallel (region))
1268    ws_args = region->ws_args;
1269  else
1270    ws_args = NULL;
1271
1272  if (child_cfun->cfg)
1273    {
1274      /* Due to inlining, it may happen that we have already outlined
1275	 the region, in which case all we need to do is make the
1276	 sub-graph unreachable and emit the parallel call.  */
1277      edge entry_succ_e, exit_succ_e;
1278
1279      entry_succ_e = single_succ_edge (entry_bb);
1280
1281      gsi = gsi_last_nondebug_bb (entry_bb);
1282      gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1283		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1284		  || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1285      gsi_remove (&gsi, true);
1286
1287      new_bb = entry_bb;
1288      if (exit_bb)
1289	{
1290	  exit_succ_e = single_succ_edge (exit_bb);
1291	  make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1292	}
1293      remove_edge_and_dominated_blocks (entry_succ_e);
1294    }
1295  else
1296    {
1297      unsigned srcidx, dstidx, num;
1298
1299      /* If the parallel region needs data sent from the parent
1300	 function, then the very first statement (except possible
1301	 tree profile counter updates) of the parallel body
1302	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
1303	 &.OMP_DATA_O is passed as an argument to the child function,
1304	 we need to replace it with the argument as seen by the child
1305	 function.
1306
1307	 In most cases, this will end up being the identity assignment
1308	 .OMP_DATA_I = .OMP_DATA_I.  However, if the parallel body had
1309	 a function call that has been inlined, the original PARM_DECL
1310	 .OMP_DATA_I may have been converted into a different local
1311	 variable.  In which case, we need to keep the assignment.  */
1312      if (gimple_omp_taskreg_data_arg (entry_stmt))
1313	{
1314	  basic_block entry_succ_bb
1315	    = single_succ_p (entry_bb) ? single_succ (entry_bb)
1316				       : FALLTHRU_EDGE (entry_bb)->dest;
1317	  tree arg;
1318	  gimple *parcopy_stmt = NULL;
1319
1320	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1321	    {
1322	      gimple *stmt;
1323
1324	      gcc_assert (!gsi_end_p (gsi));
1325	      stmt = gsi_stmt (gsi);
1326	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
1327		continue;
1328
1329	      if (gimple_num_ops (stmt) == 2)
1330		{
1331		  tree arg = gimple_assign_rhs1 (stmt);
1332
1333		  /* We're ignore the subcode because we're
1334		     effectively doing a STRIP_NOPS.  */
1335
1336		  if (TREE_CODE (arg) == ADDR_EXPR
1337		      && (TREE_OPERAND (arg, 0)
1338			  == gimple_omp_taskreg_data_arg (entry_stmt)))
1339		    {
1340		      parcopy_stmt = stmt;
1341		      break;
1342		    }
1343		}
1344	    }
1345
1346	  gcc_assert (parcopy_stmt != NULL);
1347	  arg = DECL_ARGUMENTS (child_fn);
1348
1349	  if (!gimple_in_ssa_p (cfun))
1350	    {
1351	      if (gimple_assign_lhs (parcopy_stmt) == arg)
1352		gsi_remove (&gsi, true);
1353	      else
1354		{
1355		  /* ?? Is setting the subcode really necessary ??  */
1356		  gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1357		  gimple_assign_set_rhs1 (parcopy_stmt, arg);
1358		}
1359	    }
1360	  else
1361	    {
1362	      tree lhs = gimple_assign_lhs (parcopy_stmt);
1363	      gcc_assert (SSA_NAME_VAR (lhs) == arg);
1364	      /* We'd like to set the rhs to the default def in the child_fn,
1365		 but it's too early to create ssa names in the child_fn.
1366		 Instead, we set the rhs to the parm.  In
1367		 move_sese_region_to_fn, we introduce a default def for the
1368		 parm, map the parm to it's default def, and once we encounter
1369		 this stmt, replace the parm with the default def.  */
1370	      gimple_assign_set_rhs1 (parcopy_stmt, arg);
1371	      update_stmt (parcopy_stmt);
1372	    }
1373	}
1374
1375      /* Declare local variables needed in CHILD_CFUN.  */
1376      block = DECL_INITIAL (child_fn);
1377      BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1378      /* The gimplifier could record temporaries in parallel/task block
1379	 rather than in containing function's local_decls chain,
1380	 which would mean cgraph missed finalizing them.  Do it now.  */
1381      for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1382	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1383	  varpool_node::finalize_decl (t);
1384      DECL_SAVED_TREE (child_fn) = NULL;
1385      /* We'll create a CFG for child_fn, so no gimple body is needed.  */
1386      gimple_set_body (child_fn, NULL);
1387      TREE_USED (block) = 1;
1388
1389      /* Reset DECL_CONTEXT on function arguments.  */
1390      for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1391	DECL_CONTEXT (t) = child_fn;
1392
1393      /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1394	 so that it can be moved to the child function.  */
1395      gsi = gsi_last_nondebug_bb (entry_bb);
1396      stmt = gsi_stmt (gsi);
1397      gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1398			   || gimple_code (stmt) == GIMPLE_OMP_TASK
1399			   || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1400      e = split_block (entry_bb, stmt);
1401      gsi_remove (&gsi, true);
1402      entry_bb = e->dest;
1403      edge e2 = NULL;
1404      if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1405	single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1406      else
1407	{
1408	  e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1409	  gcc_assert (e2->dest == region->exit);
1410	  remove_edge (BRANCH_EDGE (entry_bb));
1411	  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1412	  gsi = gsi_last_nondebug_bb (region->exit);
1413	  gcc_assert (!gsi_end_p (gsi)
1414		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1415	  gsi_remove (&gsi, true);
1416	}
1417
1418      /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR.  */
1419      if (exit_bb)
1420	{
1421	  gsi = gsi_last_nondebug_bb (exit_bb);
1422	  gcc_assert (!gsi_end_p (gsi)
1423		      && (gimple_code (gsi_stmt (gsi))
1424			  == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1425	  stmt = gimple_build_return (NULL);
1426	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1427	  gsi_remove (&gsi, true);
1428	}
1429
1430      /* Move the parallel region into CHILD_CFUN.  */
1431
1432      if (gimple_in_ssa_p (cfun))
1433	{
1434	  init_tree_ssa (child_cfun);
1435	  init_ssa_operands (child_cfun);
1436	  child_cfun->gimple_df->in_ssa_p = true;
1437	  block = NULL_TREE;
1438	}
1439      else
1440	block = gimple_block (entry_stmt);
1441
1442      new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1443      if (exit_bb)
1444	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1445      if (e2)
1446	{
1447	  basic_block dest_bb = e2->dest;
1448	  if (!exit_bb)
1449	    make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1450	  remove_edge (e2);
1451	  set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1452	}
1453      /* When the OMP expansion process cannot guarantee an up-to-date
1454	 loop tree arrange for the child function to fixup loops.  */
1455      if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1456	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1457
1458      /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
1459      num = vec_safe_length (child_cfun->local_decls);
1460      for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1461	{
1462	  t = (*child_cfun->local_decls)[srcidx];
1463	  if (DECL_CONTEXT (t) == cfun->decl)
1464	    continue;
1465	  if (srcidx != dstidx)
1466	    (*child_cfun->local_decls)[dstidx] = t;
1467	  dstidx++;
1468	}
1469      if (dstidx != num)
1470	vec_safe_truncate (child_cfun->local_decls, dstidx);
1471
1472      /* Inform the callgraph about the new function.  */
1473      child_cfun->curr_properties = cfun->curr_properties;
1474      child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1475      child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1476      cgraph_node *node = cgraph_node::get_create (child_fn);
1477      node->parallelized_function = 1;
1478      cgraph_node::add_new_function (child_fn, true);
1479
1480      bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1481		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1482
1483      /* Fix the callgraph edges for child_cfun.  Those for cfun will be
1484	 fixed in a following pass.  */
1485      push_cfun (child_cfun);
1486      if (need_asm)
1487	assign_assembler_name_if_needed (child_fn);
1488
1489      if (optimize)
1490	optimize_omp_library_calls (entry_stmt);
1491      update_max_bb_count ();
1492      cgraph_edge::rebuild_edges ();
1493
1494      /* Some EH regions might become dead, see PR34608.  If
1495	 pass_cleanup_cfg isn't the first pass to happen with the
1496	 new child, these dead EH edges might cause problems.
1497	 Clean them up now.  */
1498      if (flag_exceptions)
1499	{
1500	  basic_block bb;
1501	  bool changed = false;
1502
1503	  FOR_EACH_BB_FN (bb, cfun)
1504	    changed |= gimple_purge_dead_eh_edges (bb);
1505	  if (changed)
1506	    cleanup_tree_cfg ();
1507	}
1508      if (gimple_in_ssa_p (cfun))
1509	update_ssa (TODO_update_ssa);
1510      if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1511	verify_loop_structure ();
1512      pop_cfun ();
1513
1514      if (dump_file && !gimple_in_ssa_p (cfun))
1515	{
1516	  omp_any_child_fn_dumped = true;
1517	  dump_function_header (dump_file, child_fn, dump_flags);
1518	  dump_function_to_file (child_fn, dump_file, dump_flags);
1519	}
1520    }
1521
1522  adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1523
1524  if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1525    expand_parallel_call (region, new_bb,
1526			  as_a <gomp_parallel *> (entry_stmt), ws_args);
1527  else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1528    expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1529  else
1530    expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1531  if (gimple_in_ssa_p (cfun))
1532    update_ssa (TODO_update_ssa_only_virtuals);
1533}
1534
1535/* Information about members of an OpenACC collapsed loop nest.  */
1536
1537struct oacc_collapse
1538{
1539  tree base;  /* Base value.  */
1540  tree iters; /* Number of steps.  */
1541  tree step;  /* Step size.  */
1542  tree tile;  /* Tile increment (if tiled).  */
1543  tree outer; /* Tile iterator var. */
1544};
1545
1546/* Helper for expand_oacc_for.  Determine collapsed loop information.
1547   Fill in COUNTS array.  Emit any initialization code before GSI.
1548   Return the calculated outer loop bound of BOUND_TYPE.  */
1549
1550static tree
1551expand_oacc_collapse_init (const struct omp_for_data *fd,
1552			   gimple_stmt_iterator *gsi,
1553			   oacc_collapse *counts, tree diff_type,
1554			   tree bound_type, location_t loc)
1555{
1556  tree tiling = fd->tiling;
1557  tree total = build_int_cst (bound_type, 1);
1558  int ix;
1559
1560  gcc_assert (integer_onep (fd->loop.step));
1561  gcc_assert (integer_zerop (fd->loop.n1));
1562
1563  /* When tiling, the first operand of the tile clause applies to the
1564     innermost loop, and we work outwards from there.  Seems
1565     backwards, but whatever.  */
1566  for (ix = fd->collapse; ix--;)
1567    {
1568      const omp_for_data_loop *loop = &fd->loops[ix];
1569
1570      tree iter_type = TREE_TYPE (loop->v);
1571      tree plus_type = iter_type;
1572
1573      gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
1574
1575      if (POINTER_TYPE_P (iter_type))
1576	plus_type = sizetype;
1577
1578      if (tiling)
1579	{
1580	  tree num = build_int_cst (integer_type_node, fd->collapse);
1581	  tree loop_no = build_int_cst (integer_type_node, ix);
1582	  tree tile = TREE_VALUE (tiling);
1583	  gcall *call
1584	    = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1585					  /* gwv-outer=*/integer_zero_node,
1586					  /* gwv-inner=*/integer_zero_node);
1587
1588	  counts[ix].outer = create_tmp_var (iter_type, ".outer");
1589	  counts[ix].tile = create_tmp_var (diff_type, ".tile");
1590	  gimple_call_set_lhs (call, counts[ix].tile);
1591	  gimple_set_location (call, loc);
1592	  gsi_insert_before (gsi, call, GSI_SAME_STMT);
1593
1594	  tiling = TREE_CHAIN (tiling);
1595	}
1596      else
1597	{
1598	  counts[ix].tile = NULL;
1599	  counts[ix].outer = loop->v;
1600	}
1601
1602      tree b = loop->n1;
1603      tree e = loop->n2;
1604      tree s = loop->step;
1605      bool up = loop->cond_code == LT_EXPR;
1606      tree dir = build_int_cst (diff_type, up ? +1 : -1);
1607      bool negating;
1608      tree expr;
1609
1610      b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1611				    true, GSI_SAME_STMT);
1612      e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1613				    true, GSI_SAME_STMT);
1614
1615      /* Convert the step, avoiding possible unsigned->signed overflow.  */
1616      negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1617      if (negating)
1618	s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1619      s = fold_convert (diff_type, s);
1620      if (negating)
1621	s = fold_build1 (NEGATE_EXPR, diff_type, s);
1622      s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1623				    true, GSI_SAME_STMT);
1624
1625      /* Determine the range, avoiding possible unsigned->signed overflow.  */
1626      negating = !up && TYPE_UNSIGNED (iter_type);
1627      expr = fold_build2 (MINUS_EXPR, plus_type,
1628			  fold_convert (plus_type, negating ? b : e),
1629			  fold_convert (plus_type, negating ? e : b));
1630      expr = fold_convert (diff_type, expr);
1631      if (negating)
1632	expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1633      tree range = force_gimple_operand_gsi
1634	(gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1635
1636      /* Determine number of iterations.  */
1637      expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1638      expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1639      expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1640
1641      tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1642					     true, GSI_SAME_STMT);
1643
1644      counts[ix].base = b;
1645      counts[ix].iters = iters;
1646      counts[ix].step = s;
1647
1648      total = fold_build2 (MULT_EXPR, bound_type, total,
1649			   fold_convert (bound_type, iters));
1650    }
1651
1652  return total;
1653}
1654
1655/* Emit initializers for collapsed loop members.  INNER is true if
1656   this is for the element loop of a TILE.  IVAR is the outer
1657   loop iteration variable, from which collapsed loop iteration values
1658   are  calculated.  COUNTS array has been initialized by
1659   expand_oacc_collapse_inits.  */
1660
1661static void
1662expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1663			   gimple_stmt_iterator *gsi,
1664			   const oacc_collapse *counts, tree ivar,
1665			   tree diff_type)
1666{
1667  tree ivar_type = TREE_TYPE (ivar);
1668
1669  /*  The most rapidly changing iteration variable is the innermost
1670      one.  */
1671  for (int ix = fd->collapse; ix--;)
1672    {
1673      const omp_for_data_loop *loop = &fd->loops[ix];
1674      const oacc_collapse *collapse = &counts[ix];
1675      tree v = inner ? loop->v : collapse->outer;
1676      tree iter_type = TREE_TYPE (v);
1677      tree plus_type = iter_type;
1678      enum tree_code plus_code = PLUS_EXPR;
1679      tree expr;
1680
1681      if (POINTER_TYPE_P (iter_type))
1682	{
1683	  plus_code = POINTER_PLUS_EXPR;
1684	  plus_type = sizetype;
1685	}
1686
1687      expr = ivar;
1688      if (ix)
1689	{
1690	  tree mod = fold_convert (ivar_type, collapse->iters);
1691	  ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1692	  expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1693	  ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1694					   true, GSI_SAME_STMT);
1695	}
1696
1697      expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1698			  fold_convert (diff_type, collapse->step));
1699      expr = fold_build2 (plus_code, iter_type,
1700			  inner ? collapse->outer : collapse->base,
1701			  fold_convert (plus_type, expr));
1702      expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1703				       true, GSI_SAME_STMT);
1704      gassign *ass = gimple_build_assign (v, expr);
1705      gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1706    }
1707}
1708
1709/* Helper function for expand_omp_{for_*,simd}.  If this is the outermost
1710   of the combined collapse > 1 loop constructs, generate code like:
1711	if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1712	if (cond3 is <)
1713	  adj = STEP3 - 1;
1714	else
1715	  adj = STEP3 + 1;
1716	count3 = (adj + N32 - N31) / STEP3;
1717	if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1718	if (cond2 is <)
1719	  adj = STEP2 - 1;
1720	else
1721	  adj = STEP2 + 1;
1722	count2 = (adj + N22 - N21) / STEP2;
1723	if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1724	if (cond1 is <)
1725	  adj = STEP1 - 1;
1726	else
1727	  adj = STEP1 + 1;
1728	count1 = (adj + N12 - N11) / STEP1;
1729	count = count1 * count2 * count3;
1730   Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1731	count = 0;
1732   and set ZERO_ITER_BB to that bb.  If this isn't the outermost
1733   of the combined loop constructs, just initialize COUNTS array
1734   from the _looptemp_ clauses.  For loop nests with non-rectangular
1735   loops, do this only for the rectangular loops.  Then pick
1736   the loops which reference outer vars in their bound expressions
1737   and the loops which they refer to and for this sub-nest compute
1738   number of iterations.  For triangular loops use Faulhaber's formula,
1739   otherwise as a fallback, compute by iterating the loops.
1740   If e.g. the sub-nest is
1741	for (I = N11; I COND1 N12; I += STEP1)
1742	for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1743	for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1744   do:
1745	COUNT = 0;
1746	for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1747	for (tmpj = M21 * tmpi + N21;
1748	     tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1749	  {
1750	    int tmpk1 = M31 * tmpj + N31;
1751	    int tmpk2 = M32 * tmpj + N32;
1752	    if (tmpk1 COND3 tmpk2)
1753	      {
1754		if (COND3 is <)
1755		  adj = STEP3 - 1;
1756		else
1757		  adj = STEP3 + 1;
1758		COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1759	      }
1760	  }
1761   and finally multiply the counts of the rectangular loops not
1762   in the sub-nest with COUNT.  Also, as counts[fd->last_nonrect]
1763   store number of iterations of the loops from fd->first_nonrect
1764   to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1765   by the counts of rectangular loops not referenced in any non-rectangular
1766   loops sandwitched in between those.  */
1767
1768/* NOTE: It *could* be better to moosh all of the BBs together,
1769   creating one larger BB with all the computation and the unexpected
1770   jump at the end.  I.e.
1771
1772   bool zero3, zero2, zero1, zero;
1773
1774   zero3 = N32 c3 N31;
1775   count3 = (N32 - N31) /[cl] STEP3;
1776   zero2 = N22 c2 N21;
1777   count2 = (N22 - N21) /[cl] STEP2;
1778   zero1 = N12 c1 N11;
1779   count1 = (N12 - N11) /[cl] STEP1;
1780   zero = zero3 || zero2 || zero1;
1781   count = count1 * count2 * count3;
1782   if (__builtin_expect(zero, false)) goto zero_iter_bb;
1783
1784   After all, we expect the zero=false, and thus we expect to have to
1785   evaluate all of the comparison expressions, so short-circuiting
1786   oughtn't be a win.  Since the condition isn't protecting a
1787   denominator, we're not concerned about divide-by-zero, so we can
1788   fully evaluate count even if a numerator turned out to be wrong.
1789
1790   It seems like putting this all together would create much better
1791   scheduling opportunities, and less pressure on the chip's branch
1792   predictor.  */
1793
1794static void
1795expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1796			    basic_block &entry_bb, tree *counts,
1797			    basic_block &zero_iter1_bb, int &first_zero_iter1,
1798			    basic_block &zero_iter2_bb, int &first_zero_iter2,
1799			    basic_block &l2_dom_bb)
1800{
1801  tree t, type = TREE_TYPE (fd->loop.v);
1802  edge e, ne;
1803  int i;
1804
1805  /* Collapsed loops need work for expansion into SSA form.  */
1806  gcc_assert (!gimple_in_ssa_p (cfun));
1807
1808  if (gimple_omp_for_combined_into_p (fd->for_stmt)
1809      && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1810    {
1811      gcc_assert (fd->ordered == 0);
1812      /* First two _looptemp_ clauses are for istart/iend, counts[0]
1813	 isn't supposed to be handled, as the inner loop doesn't
1814	 use it.  */
1815      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1816				     OMP_CLAUSE__LOOPTEMP_);
1817      gcc_assert (innerc);
1818      for (i = 0; i < fd->collapse; i++)
1819	{
1820	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1821				    OMP_CLAUSE__LOOPTEMP_);
1822	  gcc_assert (innerc);
1823	  if (i)
1824	    counts[i] = OMP_CLAUSE_DECL (innerc);
1825	  else
1826	    counts[0] = NULL_TREE;
1827	}
1828      if (fd->non_rect
1829	  && fd->last_nonrect == fd->first_nonrect + 1
1830	  && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1831	{
1832	  tree c[4];
1833	  for (i = 0; i < 4; i++)
1834	    {
1835	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1836					OMP_CLAUSE__LOOPTEMP_);
1837	      gcc_assert (innerc);
1838	      c[i] = OMP_CLAUSE_DECL (innerc);
1839	    }
1840	  counts[0] = c[0];
1841	  fd->first_inner_iterations = c[1];
1842	  fd->factor = c[2];
1843	  fd->adjn1 = c[3];
1844	}
1845      return;
1846    }
1847
1848  for (i = fd->collapse; i < fd->ordered; i++)
1849    {
1850      tree itype = TREE_TYPE (fd->loops[i].v);
1851      counts[i] = NULL_TREE;
1852      t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1853		       fold_convert (itype, fd->loops[i].n1),
1854		       fold_convert (itype, fd->loops[i].n2));
1855      if (t && integer_zerop (t))
1856	{
1857	  for (i = fd->collapse; i < fd->ordered; i++)
1858	    counts[i] = build_int_cst (type, 0);
1859	  break;
1860	}
1861    }
1862  bool rect_count_seen = false;
1863  for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1864    {
1865      tree itype = TREE_TYPE (fd->loops[i].v);
1866
1867      if (i >= fd->collapse && counts[i])
1868	continue;
1869      if (fd->non_rect)
1870	{
1871	  /* Skip loops that use outer iterators in their expressions
1872	     during this phase.  */
1873	  if (fd->loops[i].m1 || fd->loops[i].m2)
1874	    {
1875	      counts[i] = build_zero_cst (type);
1876	      continue;
1877	    }
1878	}
1879      if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1880	  && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1881				fold_convert (itype, fd->loops[i].n1),
1882				fold_convert (itype, fd->loops[i].n2)))
1883	      == NULL_TREE || !integer_onep (t)))
1884	{
1885	  gcond *cond_stmt;
1886	  tree n1, n2;
1887	  n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1888	  n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1889					 true, GSI_SAME_STMT);
1890	  n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1891	  n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1892					 true, GSI_SAME_STMT);
1893	  cond_stmt = expand_omp_build_cond (gsi, fd->loops[i].cond_code,
1894					     n1, n2);
1895	  e = split_block (entry_bb, cond_stmt);
1896	  basic_block &zero_iter_bb
1897	    = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1898	  int &first_zero_iter
1899	    = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1900	  if (zero_iter_bb == NULL)
1901	    {
1902	      gassign *assign_stmt;
1903	      first_zero_iter = i;
1904	      zero_iter_bb = create_empty_bb (entry_bb);
1905	      add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1906	      *gsi = gsi_after_labels (zero_iter_bb);
1907	      if (i < fd->collapse)
1908		assign_stmt = gimple_build_assign (fd->loop.n2,
1909						   build_zero_cst (type));
1910	      else
1911		{
1912		  counts[i] = create_tmp_reg (type, ".count");
1913		  assign_stmt
1914		    = gimple_build_assign (counts[i], build_zero_cst (type));
1915		}
1916	      gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1917	      set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1918				       entry_bb);
1919	    }
1920	  ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1921	  ne->probability = profile_probability::very_unlikely ();
1922	  e->flags = EDGE_TRUE_VALUE;
1923	  e->probability = ne->probability.invert ();
1924	  if (l2_dom_bb == NULL)
1925	    l2_dom_bb = entry_bb;
1926	  entry_bb = e->dest;
1927	  *gsi = gsi_last_nondebug_bb (entry_bb);
1928	}
1929
1930      if (POINTER_TYPE_P (itype))
1931	itype = signed_type_for (itype);
1932      t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1933				 ? -1 : 1));
1934      t = fold_build2 (PLUS_EXPR, itype,
1935		       fold_convert (itype, fd->loops[i].step), t);
1936      t = fold_build2 (PLUS_EXPR, itype, t,
1937		       fold_convert (itype, fd->loops[i].n2));
1938      t = fold_build2 (MINUS_EXPR, itype, t,
1939		       fold_convert (itype, fd->loops[i].n1));
1940      /* ?? We could probably use CEIL_DIV_EXPR instead of
1941	 TRUNC_DIV_EXPR and adjusting by hand.  Unless we can't
1942	 generate the same code in the end because generically we
1943	 don't know that the values involved must be negative for
1944	 GT??  */
1945      if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1946	t = fold_build2 (TRUNC_DIV_EXPR, itype,
1947			 fold_build1 (NEGATE_EXPR, itype, t),
1948			 fold_build1 (NEGATE_EXPR, itype,
1949				      fold_convert (itype,
1950						    fd->loops[i].step)));
1951      else
1952	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1953			 fold_convert (itype, fd->loops[i].step));
1954      t = fold_convert (type, t);
1955      if (TREE_CODE (t) == INTEGER_CST)
1956	counts[i] = t;
1957      else
1958	{
1959	  if (i < fd->collapse || i != first_zero_iter2)
1960	    counts[i] = create_tmp_reg (type, ".count");
1961	  expand_omp_build_assign (gsi, counts[i], t);
1962	}
1963      if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1964	{
1965	  if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1966	    continue;
1967	  if (!rect_count_seen)
1968	    {
1969	      t = counts[i];
1970	      rect_count_seen = true;
1971	    }
1972	  else
1973	    t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1974	  expand_omp_build_assign (gsi, fd->loop.n2, t);
1975	}
1976    }
1977  if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1978    {
1979      gcc_assert (fd->last_nonrect != -1);
1980
1981      counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1982      expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1983			       build_zero_cst (type));
1984      for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1985	if (fd->loops[i].m1
1986	    || fd->loops[i].m2
1987	    || fd->loops[i].non_rect_referenced)
1988	  break;
1989      if (i == fd->last_nonrect
1990	  && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1991	  && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
1992	  && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1993	{
1994	  int o = fd->first_nonrect;
1995	  tree itype = TREE_TYPE (fd->loops[o].v);
1996	  tree n1o = create_tmp_reg (itype, ".n1o");
1997	  t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1998	  expand_omp_build_assign (gsi, n1o, t);
1999	  tree n2o = create_tmp_reg (itype, ".n2o");
2000	  t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
2001	  expand_omp_build_assign (gsi, n2o, t);
2002	  if (fd->loops[i].m1 && fd->loops[i].m2)
2003	    t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
2004			     unshare_expr (fd->loops[i].m1));
2005	  else if (fd->loops[i].m1)
2006	    t = fold_build1 (NEGATE_EXPR, itype,
2007			     unshare_expr (fd->loops[i].m1));
2008	  else
2009	    t = unshare_expr (fd->loops[i].m2);
2010	  tree m2minusm1
2011	    = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2012					true, GSI_SAME_STMT);
2013
2014	  gimple_stmt_iterator gsi2 = *gsi;
2015	  gsi_prev (&gsi2);
2016	  e = split_block (entry_bb, gsi_stmt (gsi2));
2017	  e = split_block (e->dest, (gimple *) NULL);
2018	  basic_block bb1 = e->src;
2019	  entry_bb = e->dest;
2020	  *gsi = gsi_after_labels (entry_bb);
2021
2022	  gsi2 = gsi_after_labels (bb1);
2023	  tree ostep = fold_convert (itype, fd->loops[o].step);
2024	  t = build_int_cst (itype, (fd->loops[o].cond_code
2025				     == LT_EXPR ? -1 : 1));
2026	  t = fold_build2 (PLUS_EXPR, itype, ostep, t);
2027	  t = fold_build2 (PLUS_EXPR, itype, t, n2o);
2028	  t = fold_build2 (MINUS_EXPR, itype, t, n1o);
2029	  if (TYPE_UNSIGNED (itype)
2030	      && fd->loops[o].cond_code == GT_EXPR)
2031	    t = fold_build2 (TRUNC_DIV_EXPR, itype,
2032			     fold_build1 (NEGATE_EXPR, itype, t),
2033			     fold_build1 (NEGATE_EXPR, itype, ostep));
2034	  else
2035	    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2036	  tree outer_niters
2037	    = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2038					true, GSI_SAME_STMT);
2039	  t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2040			   build_one_cst (itype));
2041	  t = fold_build2 (MULT_EXPR, itype, t, ostep);
2042	  t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2043	  tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2044						true, GSI_SAME_STMT);
2045	  tree n1, n2, n1e, n2e;
2046	  t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2047	  if (fd->loops[i].m1)
2048	    {
2049	      n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2050	      n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2051	      n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2052	    }
2053	  else
2054	    n1 = t;
2055	  n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2056					 true, GSI_SAME_STMT);
2057	  t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2058	  if (fd->loops[i].m2)
2059	    {
2060	      n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2061	      n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2062	      n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2063	    }
2064	  else
2065	    n2 = t;
2066	  n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2067					 true, GSI_SAME_STMT);
2068	  t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2069	  if (fd->loops[i].m1)
2070	    {
2071	      n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2072	      n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2073	      n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2074	    }
2075	  else
2076	    n1e = t;
2077	  n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2078					  true, GSI_SAME_STMT);
2079	  t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2080	  if (fd->loops[i].m2)
2081	    {
2082	      n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2083	      n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2084	      n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2085	    }
2086	  else
2087	    n2e = t;
2088	  n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2089					  true, GSI_SAME_STMT);
2090	  gcond *cond_stmt
2091	    = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2092				     n1, n2);
2093	  e = split_block (bb1, cond_stmt);
2094	  e->flags = EDGE_TRUE_VALUE;
2095	  e->probability = profile_probability::likely ().guessed ();
2096	  basic_block bb2 = e->dest;
2097	  gsi2 = gsi_after_labels (bb2);
2098
2099	  cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2100					     n1e, n2e);
2101	  e = split_block (bb2, cond_stmt);
2102	  e->flags = EDGE_TRUE_VALUE;
2103	  e->probability = profile_probability::likely ().guessed ();
2104	  gsi2 = gsi_after_labels (e->dest);
2105
2106	  tree step = fold_convert (itype, fd->loops[i].step);
2107	  t = build_int_cst (itype, (fd->loops[i].cond_code
2108				     == LT_EXPR ? -1 : 1));
2109	  t = fold_build2 (PLUS_EXPR, itype, step, t);
2110	  t = fold_build2 (PLUS_EXPR, itype, t, n2);
2111	  t = fold_build2 (MINUS_EXPR, itype, t, n1);
2112	  if (TYPE_UNSIGNED (itype)
2113	      && fd->loops[i].cond_code == GT_EXPR)
2114	    t = fold_build2 (TRUNC_DIV_EXPR, itype,
2115			     fold_build1 (NEGATE_EXPR, itype, t),
2116			     fold_build1 (NEGATE_EXPR, itype, step));
2117	  else
2118	    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2119	  tree first_inner_iterations
2120	    = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2121					true, GSI_SAME_STMT);
2122	  t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2123	  if (TYPE_UNSIGNED (itype)
2124	      && fd->loops[i].cond_code == GT_EXPR)
2125	    t = fold_build2 (TRUNC_DIV_EXPR, itype,
2126			     fold_build1 (NEGATE_EXPR, itype, t),
2127			     fold_build1 (NEGATE_EXPR, itype, step));
2128	  else
2129	    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2130	  tree factor
2131	    = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2132					true, GSI_SAME_STMT);
2133	  t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2134			   build_one_cst (itype));
2135	  t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2136	  t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2137	  t = fold_build2 (MULT_EXPR, itype, factor, t);
2138	  t = fold_build2 (PLUS_EXPR, itype,
2139			   fold_build2 (MULT_EXPR, itype, outer_niters,
2140					first_inner_iterations), t);
2141	  expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2142				   fold_convert (type, t));
2143
2144	  basic_block bb3 = create_empty_bb (bb1);
2145	  add_bb_to_loop (bb3, bb1->loop_father);
2146
2147	  e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2148	  e->probability = profile_probability::unlikely ().guessed ();
2149
2150	  gsi2 = gsi_after_labels (bb3);
2151	  cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2152					     n1e, n2e);
2153	  e = split_block (bb3, cond_stmt);
2154	  e->flags = EDGE_TRUE_VALUE;
2155	  e->probability = profile_probability::likely ().guessed ();
2156	  basic_block bb4 = e->dest;
2157
2158	  ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2159	  ne->probability = e->probability.invert ();
2160
2161	  basic_block bb5 = create_empty_bb (bb2);
2162	  add_bb_to_loop (bb5, bb2->loop_father);
2163
2164	  ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2165	  ne->probability = profile_probability::unlikely ().guessed ();
2166
2167	  for (int j = 0; j < 2; j++)
2168	    {
2169	      gsi2 = gsi_after_labels (j ? bb5 : bb4);
2170	      t = fold_build2 (MINUS_EXPR, itype,
2171			       unshare_expr (fd->loops[i].n1),
2172			       unshare_expr (fd->loops[i].n2));
2173	      t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2174	      tree tem
2175		= force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2176					    true, GSI_SAME_STMT);
2177	      t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2178	      t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2179	      t = fold_build2 (MINUS_EXPR, itype, tem, t);
2180	      tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2181					      true, GSI_SAME_STMT);
2182	      t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2183	      if (fd->loops[i].m1)
2184		{
2185		  n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2186		  n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2187		  n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2188		}
2189	      else
2190		n1 = t;
2191	      n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2192					     true, GSI_SAME_STMT);
2193	      t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2194	      if (fd->loops[i].m2)
2195		{
2196		  n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2197		  n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2198		  n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2199		}
2200	      else
2201		n2 = t;
2202	      n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2203					     true, GSI_SAME_STMT);
2204	      expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2205
2206	      cond_stmt = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2207						 n1, n2);
2208	      e = split_block (gsi_bb (gsi2), cond_stmt);
2209	      e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2210	      e->probability = profile_probability::unlikely ().guessed ();
2211	      ne = make_edge (e->src, bb1,
2212			      j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2213	      ne->probability = e->probability.invert ();
2214	      gsi2 = gsi_after_labels (e->dest);
2215
2216	      t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2217	      expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2218
2219	      make_edge (e->dest, bb1, EDGE_FALLTHRU);
2220	    }
2221
2222	  set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2223	  set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2224	  set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2225
2226	  if (fd->first_nonrect + 1 == fd->last_nonrect)
2227	    {
2228	      fd->first_inner_iterations = first_inner_iterations;
2229	      fd->factor = factor;
2230	      fd->adjn1 = n1o;
2231	    }
2232	}
2233      else
2234	{
2235	  /* Fallback implementation.  Evaluate the loops with m1/m2
2236	     non-NULL as well as their outer loops at runtime using temporaries
2237	     instead of the original iteration variables, and in the
2238	     body just bump the counter.  */
2239	  gimple_stmt_iterator gsi2 = *gsi;
2240	  gsi_prev (&gsi2);
2241	  e = split_block (entry_bb, gsi_stmt (gsi2));
2242	  e = split_block (e->dest, (gimple *) NULL);
2243	  basic_block cur_bb = e->src;
2244	  basic_block next_bb = e->dest;
2245	  entry_bb = e->dest;
2246	  *gsi = gsi_after_labels (entry_bb);
2247
2248	  tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2249	  memset (vs, 0, fd->last_nonrect * sizeof (tree));
2250
2251	  for (i = 0; i <= fd->last_nonrect; i++)
2252	    {
2253	      if (fd->loops[i].m1 == NULL_TREE
2254		  && fd->loops[i].m2 == NULL_TREE
2255		  && !fd->loops[i].non_rect_referenced)
2256		continue;
2257
2258	      tree itype = TREE_TYPE (fd->loops[i].v);
2259
2260	      gsi2 = gsi_after_labels (cur_bb);
2261	      tree n1, n2;
2262	      t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2263	      if (fd->loops[i].m1 == NULL_TREE)
2264		n1 = t;
2265	      else if (POINTER_TYPE_P (itype))
2266		{
2267		  gcc_assert (integer_onep (fd->loops[i].m1));
2268		  t = fold_convert (sizetype,
2269				    unshare_expr (fd->loops[i].n1));
2270		  n1 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2271		}
2272	      else
2273		{
2274		  n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2275		  n1 = fold_build2 (MULT_EXPR, itype,
2276				    vs[i - fd->loops[i].outer], n1);
2277		  n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2278		}
2279	      n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2280					     true, GSI_SAME_STMT);
2281	      if (i < fd->last_nonrect)
2282		{
2283		  vs[i] = create_tmp_reg (itype, ".it");
2284		  expand_omp_build_assign (&gsi2, vs[i], n1);
2285		}
2286	      t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2287	      if (fd->loops[i].m2 == NULL_TREE)
2288		n2 = t;
2289	      else if (POINTER_TYPE_P (itype))
2290		{
2291		  gcc_assert (integer_onep (fd->loops[i].m2));
2292		  t = fold_convert (sizetype,
2293				    unshare_expr (fd->loops[i].n2));
2294		  n2 = fold_build_pointer_plus (vs[i - fd->loops[i].outer], t);
2295		}
2296	      else
2297		{
2298		  n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2299		  n2 = fold_build2 (MULT_EXPR, itype,
2300				    vs[i - fd->loops[i].outer], n2);
2301		  n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2302		}
2303	      n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2304					     true, GSI_SAME_STMT);
2305	      if (POINTER_TYPE_P (itype))
2306		itype = signed_type_for (itype);
2307	      if (i == fd->last_nonrect)
2308		{
2309		  gcond *cond_stmt
2310		    = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2311					     n1, n2);
2312		  e = split_block (cur_bb, cond_stmt);
2313		  e->flags = EDGE_TRUE_VALUE;
2314		  ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2315		  e->probability = profile_probability::likely ().guessed ();
2316		  ne->probability = e->probability.invert ();
2317		  gsi2 = gsi_after_labels (e->dest);
2318
2319		  t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2320					     ? -1 : 1));
2321		  t = fold_build2 (PLUS_EXPR, itype,
2322				   fold_convert (itype, fd->loops[i].step), t);
2323		  t = fold_build2 (PLUS_EXPR, itype, t,
2324				   fold_convert (itype, n2));
2325		  t = fold_build2 (MINUS_EXPR, itype, t,
2326				   fold_convert (itype, n1));
2327		  tree step = fold_convert (itype, fd->loops[i].step);
2328		  if (TYPE_UNSIGNED (itype)
2329		      && fd->loops[i].cond_code == GT_EXPR)
2330		    t = fold_build2 (TRUNC_DIV_EXPR, itype,
2331				     fold_build1 (NEGATE_EXPR, itype, t),
2332				     fold_build1 (NEGATE_EXPR, itype, step));
2333		  else
2334		    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2335		  t = fold_convert (type, t);
2336		  t = fold_build2 (PLUS_EXPR, type,
2337				   counts[fd->last_nonrect], t);
2338		  t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2339						true, GSI_SAME_STMT);
2340		  expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2341		  e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2342		  set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2343		  break;
2344		}
2345	      e = split_block (cur_bb, last_stmt (cur_bb));
2346
2347	      basic_block new_cur_bb = create_empty_bb (cur_bb);
2348	      add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2349
2350	      gsi2 = gsi_after_labels (e->dest);
2351	      tree step = fold_convert (itype,
2352					unshare_expr (fd->loops[i].step));
2353	      if (POINTER_TYPE_P (TREE_TYPE (vs[i])))
2354		t = fold_build_pointer_plus (vs[i],
2355					     fold_convert (sizetype, step));
2356	      else
2357		t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2358	      t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2359					    true, GSI_SAME_STMT);
2360	      expand_omp_build_assign (&gsi2, vs[i], t);
2361
2362	      ne = split_block (e->dest, last_stmt (e->dest));
2363	      gsi2 = gsi_after_labels (ne->dest);
2364
2365	      expand_omp_build_cond (&gsi2, fd->loops[i].cond_code, vs[i], n2);
2366	      edge e3, e4;
2367	      if (next_bb == entry_bb)
2368		{
2369		  e3 = find_edge (ne->dest, next_bb);
2370		  e3->flags = EDGE_FALSE_VALUE;
2371		}
2372	      else
2373		e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2374	      e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2375	      e4->probability = profile_probability::likely ().guessed ();
2376	      e3->probability = e4->probability.invert ();
2377	      basic_block esrc = e->src;
2378	      make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2379	      cur_bb = new_cur_bb;
2380	      basic_block latch_bb = next_bb;
2381	      next_bb = e->dest;
2382	      remove_edge (e);
2383	      set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2384	      set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2385	      set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2386	    }
2387	}
2388      t = NULL_TREE;
2389      for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2390	if (!fd->loops[i].non_rect_referenced
2391	    && fd->loops[i].m1 == NULL_TREE
2392	    && fd->loops[i].m2 == NULL_TREE)
2393	  {
2394	    if (t == NULL_TREE)
2395	      t = counts[i];
2396	    else
2397	      t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2398	  }
2399      if (t)
2400	{
2401	  t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2402	  expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2403	}
2404      if (!rect_count_seen)
2405	t = counts[fd->last_nonrect];
2406      else
2407	t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2408			 counts[fd->last_nonrect]);
2409      expand_omp_build_assign (gsi, fd->loop.n2, t);
2410    }
2411  else if (fd->non_rect)
2412    {
2413      tree t = fd->loop.n2;
2414      gcc_assert (TREE_CODE (t) == INTEGER_CST);
2415      int non_rect_referenced = 0, non_rect = 0;
2416      for (i = 0; i < fd->collapse; i++)
2417	{
2418	  if ((i < fd->first_nonrect || i > fd->last_nonrect)
2419	      && !integer_zerop (counts[i]))
2420	    t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2421	  if (fd->loops[i].non_rect_referenced)
2422	    non_rect_referenced++;
2423	  if (fd->loops[i].m1 || fd->loops[i].m2)
2424	    non_rect++;
2425	}
2426      gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2427      counts[fd->last_nonrect] = t;
2428    }
2429}
2430
2431/* Helper function for expand_omp_{for_*,simd}.  Generate code like:
2432	T = V;
2433	V3 = N31 + (T % count3) * STEP3;
2434	T = T / count3;
2435	V2 = N21 + (T % count2) * STEP2;
2436	T = T / count2;
2437	V1 = N11 + T * STEP1;
2438   if this loop doesn't have an inner loop construct combined with it.
2439   If it does have an inner loop construct combined with it and the
2440   iteration count isn't known constant, store values from counts array
2441   into its _looptemp_ temporaries instead.
2442   For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2443   inclusive), use the count of all those loops together, and either
2444   find quadratic etc. equation roots, or as a fallback, do:
2445	COUNT = 0;
2446	for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2447	for (tmpj = M21 * tmpi + N21;
2448	     tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2449	  {
2450	    int tmpk1 = M31 * tmpj + N31;
2451	    int tmpk2 = M32 * tmpj + N32;
2452	    if (tmpk1 COND3 tmpk2)
2453	      {
2454		if (COND3 is <)
2455		  adj = STEP3 - 1;
2456		else
2457		  adj = STEP3 + 1;
2458		int temp = (adj + tmpk2 - tmpk1) / STEP3;
2459		if (COUNT + temp > T)
2460		  {
2461		    V1 = tmpi;
2462		    V2 = tmpj;
2463		    V3 = tmpk1 + (T - COUNT) * STEP3;
2464		    goto done;
2465		  }
2466		else
2467		  COUNT += temp;
2468	      }
2469	  }
2470	done:;
2471   but for optional innermost or outermost rectangular loops that aren't
2472   referenced by other loop expressions keep doing the division/modulo.  */
2473
2474static void
2475expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2476			  tree *counts, tree *nonrect_bounds,
2477			  gimple *inner_stmt, tree startvar)
2478{
2479  int i;
2480  if (gimple_omp_for_combined_p (fd->for_stmt))
2481    {
2482      /* If fd->loop.n2 is constant, then no propagation of the counts
2483	 is needed, they are constant.  */
2484      if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2485	return;
2486
2487      tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2488		     ? gimple_omp_taskreg_clauses (inner_stmt)
2489		     : gimple_omp_for_clauses (inner_stmt);
2490      /* First two _looptemp_ clauses are for istart/iend, counts[0]
2491	 isn't supposed to be handled, as the inner loop doesn't
2492	 use it.  */
2493      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2494      gcc_assert (innerc);
2495      int count = 0;
2496      if (fd->non_rect
2497	  && fd->last_nonrect == fd->first_nonrect + 1
2498	  && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2499	count = 4;
2500      for (i = 0; i < fd->collapse + count; i++)
2501	{
2502	  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2503				    OMP_CLAUSE__LOOPTEMP_);
2504	  gcc_assert (innerc);
2505	  if (i)
2506	    {
2507	      tree tem = OMP_CLAUSE_DECL (innerc);
2508	      tree t;
2509	      if (i < fd->collapse)
2510		t = counts[i];
2511	      else
2512		switch (i - fd->collapse)
2513		  {
2514		  case 0: t = counts[0]; break;
2515		  case 1: t = fd->first_inner_iterations; break;
2516		  case 2: t = fd->factor; break;
2517		  case 3: t = fd->adjn1; break;
2518		  default: gcc_unreachable ();
2519		  }
2520	      t = fold_convert (TREE_TYPE (tem), t);
2521	      t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2522					    false, GSI_CONTINUE_LINKING);
2523	      gassign *stmt = gimple_build_assign (tem, t);
2524	      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2525	    }
2526	}
2527      return;
2528    }
2529
2530  tree type = TREE_TYPE (fd->loop.v);
2531  tree tem = create_tmp_reg (type, ".tem");
2532  gassign *stmt = gimple_build_assign (tem, startvar);
2533  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2534
2535  for (i = fd->collapse - 1; i >= 0; i--)
2536    {
2537      tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2538      itype = vtype;
2539      if (POINTER_TYPE_P (vtype))
2540	itype = signed_type_for (vtype);
2541      if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2542	t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2543      else
2544	t = tem;
2545      if (i == fd->last_nonrect)
2546	{
2547	  t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2548					false, GSI_CONTINUE_LINKING);
2549	  tree stopval = t;
2550	  tree idx = create_tmp_reg (type, ".count");
2551	  expand_omp_build_assign (gsi, idx,
2552				   build_zero_cst (type), true);
2553	  basic_block bb_triang = NULL, bb_triang_dom = NULL;
2554	  if (fd->first_nonrect + 1 == fd->last_nonrect
2555	      && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2556		  || fd->first_inner_iterations)
2557	      && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2558		  != CODE_FOR_nothing)
2559	      && !integer_zerop (fd->loop.n2))
2560	    {
2561	      tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2562	      tree itype = TREE_TYPE (fd->loops[i].v);
2563	      tree first_inner_iterations = fd->first_inner_iterations;
2564	      tree factor = fd->factor;
2565	      gcond *cond_stmt
2566		= expand_omp_build_cond (gsi, NE_EXPR, factor,
2567					 build_zero_cst (TREE_TYPE (factor)));
2568	      edge e = split_block (gsi_bb (*gsi), cond_stmt);
2569	      basic_block bb0 = e->src;
2570	      e->flags = EDGE_TRUE_VALUE;
2571	      e->probability = profile_probability::likely ();
2572	      bb_triang_dom = bb0;
2573	      *gsi = gsi_after_labels (e->dest);
2574	      tree slltype = long_long_integer_type_node;
2575	      tree ulltype = long_long_unsigned_type_node;
2576	      tree stopvalull = fold_convert (ulltype, stopval);
2577	      stopvalull
2578		= force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2579					    false, GSI_CONTINUE_LINKING);
2580	      first_inner_iterations
2581		= fold_convert (slltype, first_inner_iterations);
2582	      first_inner_iterations
2583		= force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2584					    NULL_TREE, false,
2585					    GSI_CONTINUE_LINKING);
2586	      factor = fold_convert (slltype, factor);
2587	      factor
2588		= force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2589					    false, GSI_CONTINUE_LINKING);
2590	      tree first_inner_iterationsd
2591		= fold_build1 (FLOAT_EXPR, double_type_node,
2592			       first_inner_iterations);
2593	      first_inner_iterationsd
2594		= force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2595					    NULL_TREE, false,
2596					    GSI_CONTINUE_LINKING);
2597	      tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2598					  factor);
2599	      factord = force_gimple_operand_gsi (gsi, factord, true,
2600						  NULL_TREE, false,
2601						  GSI_CONTINUE_LINKING);
2602	      tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2603					   stopvalull);
2604	      stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2605						   NULL_TREE, false,
2606						   GSI_CONTINUE_LINKING);
2607	      /* Temporarily disable flag_rounding_math, values will be
2608		 decimal numbers divided by 2 and worst case imprecisions
2609		 due to too large values ought to be caught later by the
2610		 checks for fallback.  */
2611	      int save_flag_rounding_math = flag_rounding_math;
2612	      flag_rounding_math = 0;
2613	      t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2614			       build_real (double_type_node, dconst2));
2615	      tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2616				     first_inner_iterationsd, t);
2617	      t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2618					     GSI_CONTINUE_LINKING);
2619	      t = fold_build2 (MULT_EXPR, double_type_node, factord,
2620			       build_real (double_type_node, dconst2));
2621	      t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2622	      t = fold_build2 (PLUS_EXPR, double_type_node, t,
2623			       fold_build2 (MULT_EXPR, double_type_node,
2624					    t3, t3));
2625	      flag_rounding_math = save_flag_rounding_math;
2626	      t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2627					    GSI_CONTINUE_LINKING);
2628	      if (flag_exceptions
2629		  && cfun->can_throw_non_call_exceptions
2630		  && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2631		{
2632		  tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2633					  build_zero_cst (double_type_node));
2634		  tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2635						  false, GSI_CONTINUE_LINKING);
2636		  cond_stmt = gimple_build_cond (NE_EXPR, tem,
2637						 boolean_false_node,
2638						 NULL_TREE, NULL_TREE);
2639		}
2640	      else
2641		cond_stmt
2642		  = gimple_build_cond (LT_EXPR, t,
2643				       build_zero_cst (double_type_node),
2644				       NULL_TREE, NULL_TREE);
2645	      gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2646	      e = split_block (gsi_bb (*gsi), cond_stmt);
2647	      basic_block bb1 = e->src;
2648	      e->flags = EDGE_FALSE_VALUE;
2649	      e->probability = profile_probability::very_likely ();
2650	      *gsi = gsi_after_labels (e->dest);
2651	      gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2652	      tree sqrtr = create_tmp_var (double_type_node);
2653	      gimple_call_set_lhs (call, sqrtr);
2654	      gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2655	      t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2656	      t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2657	      t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2658	      tree c = create_tmp_var (ulltype);
2659	      tree d = create_tmp_var (ulltype);
2660	      expand_omp_build_assign (gsi, c, t, true);
2661	      t = fold_build2 (MINUS_EXPR, ulltype, c,
2662			       build_one_cst (ulltype));
2663	      t = fold_build2 (MULT_EXPR, ulltype, c, t);
2664	      t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2665	      t = fold_build2 (MULT_EXPR, ulltype,
2666			       fold_convert (ulltype, fd->factor), t);
2667	      tree t2
2668		= fold_build2 (MULT_EXPR, ulltype, c,
2669			       fold_convert (ulltype,
2670					     fd->first_inner_iterations));
2671	      t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2672	      expand_omp_build_assign (gsi, d, t, true);
2673	      t = fold_build2 (MULT_EXPR, ulltype,
2674			       fold_convert (ulltype, fd->factor), c);
2675	      t = fold_build2 (PLUS_EXPR, ulltype,
2676			       t, fold_convert (ulltype,
2677						fd->first_inner_iterations));
2678	      t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2679					     GSI_CONTINUE_LINKING);
2680	      cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2681					     NULL_TREE, NULL_TREE);
2682	      gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2683	      e = split_block (gsi_bb (*gsi), cond_stmt);
2684	      basic_block bb2 = e->src;
2685	      e->flags = EDGE_TRUE_VALUE;
2686	      e->probability = profile_probability::very_likely ();
2687	      *gsi = gsi_after_labels (e->dest);
2688	      t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2689	      t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2690					    GSI_CONTINUE_LINKING);
2691	      cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2692					     NULL_TREE, NULL_TREE);
2693	      gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2694	      e = split_block (gsi_bb (*gsi), cond_stmt);
2695	      basic_block bb3 = e->src;
2696	      e->flags = EDGE_FALSE_VALUE;
2697	      e->probability = profile_probability::very_likely ();
2698	      *gsi = gsi_after_labels (e->dest);
2699	      t = fold_convert (itype, c);
2700	      t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2701	      t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2702	      t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2703					    GSI_CONTINUE_LINKING);
2704	      expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2705	      t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2706	      t2 = fold_convert (itype, t2);
2707	      t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2708	      t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2709	      if (fd->loops[i].m1)
2710		{
2711		  t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2712		  t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2713		}
2714	      expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2715	      e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2716	      bb_triang = e->src;
2717	      *gsi = gsi_after_labels (e->dest);
2718	      remove_edge (e);
2719	      e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2720	      e->probability = profile_probability::very_unlikely ();
2721	      e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2722	      e->probability = profile_probability::very_unlikely ();
2723	      e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2724	      e->probability = profile_probability::very_unlikely ();
2725
2726	      basic_block bb4 = create_empty_bb (bb0);
2727	      add_bb_to_loop (bb4, bb0->loop_father);
2728	      e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2729	      e->probability = profile_probability::unlikely ();
2730	      make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2731	      set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2732	      set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2733	      gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2734	      t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2735				counts[i], counts[i - 1]);
2736	      t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2737					     GSI_CONTINUE_LINKING);
2738	      t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2739	      t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2740	      t = fold_convert (itype, t);
2741	      t2 = fold_convert (itype, t2);
2742	      t = fold_build2 (MULT_EXPR, itype, t,
2743			       fold_convert (itype, fd->loops[i].step));
2744	      t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2745	      t2 = fold_build2 (MULT_EXPR, itype, t2,
2746				fold_convert (itype, fd->loops[i - 1].step));
2747	      t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2748	      t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2749					     false, GSI_CONTINUE_LINKING);
2750	      stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2751	      gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2752	      if (fd->loops[i].m1)
2753		{
2754		  t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2755				    fd->loops[i - 1].v);
2756		  t = fold_build2 (PLUS_EXPR, itype, t, t2);
2757		}
2758	      t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2759					    false, GSI_CONTINUE_LINKING);
2760	      stmt = gimple_build_assign (fd->loops[i].v, t);
2761	      gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2762	    }
2763	  /* Fallback implementation.  Evaluate the loops in between
2764	     (inclusive) fd->first_nonrect and fd->last_nonrect at
2765	     runtime unsing temporaries instead of the original iteration
2766	     variables, in the body just bump the counter and compare
2767	     with the desired value.  */
2768	  gimple_stmt_iterator gsi2 = *gsi;
2769	  basic_block entry_bb = gsi_bb (gsi2);
2770	  edge e = split_block (entry_bb, gsi_stmt (gsi2));
2771	  e = split_block (e->dest, (gimple *) NULL);
2772	  basic_block dom_bb = NULL;
2773	  basic_block cur_bb = e->src;
2774	  basic_block next_bb = e->dest;
2775	  entry_bb = e->dest;
2776	  *gsi = gsi_after_labels (entry_bb);
2777
2778	  tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2779	  tree n1 = NULL_TREE, n2 = NULL_TREE;
2780	  memset (vs, 0, fd->last_nonrect * sizeof (tree));
2781
2782	  for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2783	    {
2784	      tree itype = TREE_TYPE (fd->loops[j].v);
2785	      bool rect_p = (fd->loops[j].m1 == NULL_TREE
2786			     && fd->loops[j].m2 == NULL_TREE
2787			     && !fd->loops[j].non_rect_referenced);
2788	      gsi2 = gsi_after_labels (cur_bb);
2789	      t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2790	      if (fd->loops[j].m1 == NULL_TREE)
2791		n1 = rect_p ? build_zero_cst (type) : t;
2792	      else if (POINTER_TYPE_P (itype))
2793		{
2794		  gcc_assert (integer_onep (fd->loops[j].m1));
2795		  t = fold_convert (sizetype,
2796				    unshare_expr (fd->loops[j].n1));
2797		  n1 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2798		}
2799	      else
2800		{
2801		  n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2802		  n1 = fold_build2 (MULT_EXPR, itype,
2803				    vs[j - fd->loops[j].outer], n1);
2804		  n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2805		}
2806	      n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2807					     true, GSI_SAME_STMT);
2808	      if (j < fd->last_nonrect)
2809		{
2810		  vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2811		  expand_omp_build_assign (&gsi2, vs[j], n1);
2812		}
2813	      t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2814	      if (fd->loops[j].m2 == NULL_TREE)
2815		n2 = rect_p ? counts[j] : t;
2816	      else if (POINTER_TYPE_P (itype))
2817		{
2818		  gcc_assert (integer_onep (fd->loops[j].m2));
2819		  t = fold_convert (sizetype,
2820				    unshare_expr (fd->loops[j].n2));
2821		  n2 = fold_build_pointer_plus (vs[j - fd->loops[j].outer], t);
2822		}
2823	      else
2824		{
2825		  n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2826		  n2 = fold_build2 (MULT_EXPR, itype,
2827				    vs[j - fd->loops[j].outer], n2);
2828		  n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2829		}
2830	      n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2831					     true, GSI_SAME_STMT);
2832	      if (POINTER_TYPE_P (itype))
2833		itype = signed_type_for (itype);
2834	      if (j == fd->last_nonrect)
2835		{
2836		  gcond *cond_stmt
2837		    = expand_omp_build_cond (&gsi2, fd->loops[i].cond_code,
2838					     n1, n2);
2839		  e = split_block (cur_bb, cond_stmt);
2840		  e->flags = EDGE_TRUE_VALUE;
2841		  edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2842		  e->probability = profile_probability::likely ().guessed ();
2843		  ne->probability = e->probability.invert ();
2844		  gsi2 = gsi_after_labels (e->dest);
2845
2846		  t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2847					     ? -1 : 1));
2848		  t = fold_build2 (PLUS_EXPR, itype,
2849				   fold_convert (itype, fd->loops[j].step), t);
2850		  t = fold_build2 (PLUS_EXPR, itype, t,
2851				   fold_convert (itype, n2));
2852		  t = fold_build2 (MINUS_EXPR, itype, t,
2853				   fold_convert (itype, n1));
2854		  tree step = fold_convert (itype, fd->loops[j].step);
2855		  if (TYPE_UNSIGNED (itype)
2856		      && fd->loops[j].cond_code == GT_EXPR)
2857		    t = fold_build2 (TRUNC_DIV_EXPR, itype,
2858				     fold_build1 (NEGATE_EXPR, itype, t),
2859				     fold_build1 (NEGATE_EXPR, itype, step));
2860		  else
2861		    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2862		  t = fold_convert (type, t);
2863		  t = fold_build2 (PLUS_EXPR, type, idx, t);
2864		  t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2865						true, GSI_SAME_STMT);
2866		  e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2867		  set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2868		  cond_stmt
2869		    = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2870					 NULL_TREE);
2871		  gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2872		  e = split_block (gsi_bb (gsi2), cond_stmt);
2873		  e->flags = EDGE_TRUE_VALUE;
2874		  e->probability = profile_probability::likely ().guessed ();
2875		  ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2876		  ne->probability = e->probability.invert ();
2877		  gsi2 = gsi_after_labels (e->dest);
2878		  expand_omp_build_assign (&gsi2, idx, t);
2879		  set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2880		  break;
2881		}
2882	      e = split_block (cur_bb, last_stmt (cur_bb));
2883
2884	      basic_block new_cur_bb = create_empty_bb (cur_bb);
2885	      add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2886
2887	      gsi2 = gsi_after_labels (e->dest);
2888	      if (rect_p)
2889		t = fold_build2 (PLUS_EXPR, type, vs[j],
2890				 build_one_cst (type));
2891	      else
2892		{
2893		  tree step
2894		    = fold_convert (itype, unshare_expr (fd->loops[j].step));
2895		  if (POINTER_TYPE_P (vtype))
2896		    t = fold_build_pointer_plus (vs[j], fold_convert (sizetype,
2897								      step));
2898		  else
2899		    t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2900		}
2901	      t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2902					    true, GSI_SAME_STMT);
2903	      expand_omp_build_assign (&gsi2, vs[j], t);
2904
2905	      edge ne = split_block (e->dest, last_stmt (e->dest));
2906	      gsi2 = gsi_after_labels (ne->dest);
2907
2908	      gcond *cond_stmt;
2909	      if (next_bb == entry_bb)
2910		/* No need to actually check the outermost condition.  */
2911		cond_stmt
2912		  = gimple_build_cond (EQ_EXPR, boolean_true_node,
2913				       boolean_true_node,
2914				       NULL_TREE, NULL_TREE);
2915	      else
2916		cond_stmt
2917		  = gimple_build_cond (rect_p ? LT_EXPR
2918					      : fd->loops[j].cond_code,
2919				       vs[j], n2, NULL_TREE, NULL_TREE);
2920	      gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2921	      edge e3, e4;
2922	      if (next_bb == entry_bb)
2923		{
2924		  e3 = find_edge (ne->dest, next_bb);
2925		  e3->flags = EDGE_FALSE_VALUE;
2926		  dom_bb = ne->dest;
2927		}
2928	      else
2929		e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2930	      e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2931	      e4->probability = profile_probability::likely ().guessed ();
2932	      e3->probability = e4->probability.invert ();
2933	      basic_block esrc = e->src;
2934	      make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2935	      cur_bb = new_cur_bb;
2936	      basic_block latch_bb = next_bb;
2937	      next_bb = e->dest;
2938	      remove_edge (e);
2939	      set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2940	      set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2941	      set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2942	    }
2943	  for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2944	    {
2945	      tree vtype = TREE_TYPE (fd->loops[j].v);
2946	      tree itype = vtype;
2947	      if (POINTER_TYPE_P (itype))
2948		itype = signed_type_for (itype);
2949	      bool rect_p = (fd->loops[j].m1 == NULL_TREE
2950			     && fd->loops[j].m2 == NULL_TREE
2951			     && !fd->loops[j].non_rect_referenced);
2952	      if (j == fd->last_nonrect)
2953		{
2954		  t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2955		  t = fold_convert (itype, t);
2956		  tree t2
2957		    = fold_convert (itype, unshare_expr (fd->loops[j].step));
2958		  t = fold_build2 (MULT_EXPR, itype, t, t2);
2959		  if (POINTER_TYPE_P (vtype))
2960		    t = fold_build_pointer_plus (n1,
2961						 fold_convert (sizetype, t));
2962		  else
2963		    t = fold_build2 (PLUS_EXPR, itype, n1, t);
2964		}
2965	      else if (rect_p)
2966		{
2967		  t = fold_convert (itype, vs[j]);
2968		  t = fold_build2 (MULT_EXPR, itype, t,
2969				   fold_convert (itype, fd->loops[j].step));
2970		  if (POINTER_TYPE_P (vtype))
2971		    t = fold_build_pointer_plus (fd->loops[j].n1,
2972						 fold_convert (sizetype, t));
2973		  else
2974		    t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2975		}
2976	      else
2977		t = vs[j];
2978	      t = force_gimple_operand_gsi (gsi, t, false,
2979					    NULL_TREE, true,
2980					    GSI_SAME_STMT);
2981	      stmt = gimple_build_assign (fd->loops[j].v, t);
2982	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2983	    }
2984	  if (gsi_end_p (*gsi))
2985	    *gsi = gsi_last_bb (gsi_bb (*gsi));
2986	  else
2987	    gsi_prev (gsi);
2988	  if (bb_triang)
2989	    {
2990	      e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2991	      make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2992	      *gsi = gsi_after_labels (e->dest);
2993	      if (!gsi_end_p (*gsi))
2994		gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2995	      set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2996	    }
2997	}
2998      else
2999	{
3000	  t = fold_convert (itype, t);
3001	  t = fold_build2 (MULT_EXPR, itype, t,
3002			   fold_convert (itype, fd->loops[i].step));
3003	  if (POINTER_TYPE_P (vtype))
3004	    t = fold_build_pointer_plus (fd->loops[i].n1, t);
3005	  else
3006	    t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
3007	  t = force_gimple_operand_gsi (gsi, t,
3008					DECL_P (fd->loops[i].v)
3009					&& TREE_ADDRESSABLE (fd->loops[i].v),
3010					NULL_TREE, false,
3011					GSI_CONTINUE_LINKING);
3012	  stmt = gimple_build_assign (fd->loops[i].v, t);
3013	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3014	}
3015      if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
3016	{
3017	  t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
3018	  t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
3019					false, GSI_CONTINUE_LINKING);
3020	  stmt = gimple_build_assign (tem, t);
3021	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3022	}
3023      if (i == fd->last_nonrect)
3024	i = fd->first_nonrect;
3025    }
3026  if (fd->non_rect)
3027    for (i = 0; i <= fd->last_nonrect; i++)
3028      if (fd->loops[i].m2)
3029	{
3030	  tree itype = TREE_TYPE (fd->loops[i].v);
3031
3032	  tree t;
3033	  if (POINTER_TYPE_P (itype))
3034	    {
3035	      gcc_assert (integer_onep (fd->loops[i].m2));
3036	      t = fold_convert (sizetype, unshare_expr (fd->loops[i].n2));
3037	      t = fold_build_pointer_plus (fd->loops[i - fd->loops[i].outer].v,
3038					   t);
3039	    }
3040	  else
3041	    {
3042	      t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
3043	      t = fold_build2 (MULT_EXPR, itype,
3044			       fd->loops[i - fd->loops[i].outer].v, t);
3045	      t = fold_build2 (PLUS_EXPR, itype, t,
3046			       fold_convert (itype,
3047					     unshare_expr (fd->loops[i].n2)));
3048	    }
3049	  nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
3050	  t = force_gimple_operand_gsi (gsi, t, false,
3051					NULL_TREE, false,
3052					GSI_CONTINUE_LINKING);
3053	  stmt = gimple_build_assign (nonrect_bounds[i], t);
3054	  gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
3055	}
3056}
3057
3058/* Helper function for expand_omp_for_*.  Generate code like:
3059    L10:
3060	V3 += STEP3;
3061	if (V3 cond3 N32) goto BODY_BB; else goto L11;
3062    L11:
3063	V3 = N31;
3064	V2 += STEP2;
3065	if (V2 cond2 N22) goto BODY_BB; else goto L12;
3066    L12:
3067	V2 = N21;
3068	V1 += STEP1;
3069	goto BODY_BB;
3070   For non-rectangular loops, use temporaries stored in nonrect_bounds
3071   for the upper bounds if M?2 multiplier is present.  Given e.g.
3072   for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3073   for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3074   for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3075   for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
3076   do:
3077    L10:
3078	V4 += STEP4;
3079	if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3080    L11:
3081	V4 = N41 + M41 * V2; // This can be left out if the loop
3082			     // refers to the immediate parent loop
3083	V3 += STEP3;
3084	if (V3 cond3 N32) goto BODY_BB; else goto L12;
3085    L12:
3086	V3 = N31;
3087	V2 += STEP2;
3088	if (V2 cond2 N22) goto L120; else goto L13;
3089    L120:
3090	V4 = N41 + M41 * V2;
3091	NONRECT_BOUND4 = N42 + M42 * V2;
3092	if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3093    L13:
3094	V2 = N21;
3095	V1 += STEP1;
3096	goto L120;  */
3097
3098static basic_block
3099extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3100			     basic_block cont_bb, basic_block body_bb)
3101{
3102  basic_block last_bb, bb, collapse_bb = NULL;
3103  int i;
3104  gimple_stmt_iterator gsi;
3105  edge e;
3106  tree t;
3107  gimple *stmt;
3108
3109  last_bb = cont_bb;
3110  for (i = fd->collapse - 1; i >= 0; i--)
3111    {
3112      tree vtype = TREE_TYPE (fd->loops[i].v);
3113
3114      bb = create_empty_bb (last_bb);
3115      add_bb_to_loop (bb, last_bb->loop_father);
3116      gsi = gsi_start_bb (bb);
3117
3118      if (i < fd->collapse - 1)
3119	{
3120	  e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3121	  e->probability
3122	    = profile_probability::guessed_always ().apply_scale (1, 8);
3123
3124	  struct omp_for_data_loop *l = &fd->loops[i + 1];
3125	  if (l->m1 == NULL_TREE || l->outer != 1)
3126	    {
3127	      t = l->n1;
3128	      if (l->m1)
3129		{
3130		  if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3131		    t = fold_build_pointer_plus (fd->loops[i + 1 - l->outer].v,
3132						 fold_convert (sizetype, t));
3133		  else
3134		    {
3135		      tree t2
3136			= fold_build2 (MULT_EXPR, TREE_TYPE (t),
3137				       fd->loops[i + 1 - l->outer].v, l->m1);
3138		      t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3139		    }
3140		}
3141	      t = force_gimple_operand_gsi (&gsi, t,
3142					    DECL_P (l->v)
3143					    && TREE_ADDRESSABLE (l->v),
3144					    NULL_TREE, false,
3145					    GSI_CONTINUE_LINKING);
3146	      stmt = gimple_build_assign (l->v, t);
3147	      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3148	    }
3149	}
3150      else
3151	collapse_bb = bb;
3152
3153      set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3154
3155      if (POINTER_TYPE_P (vtype))
3156	t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3157      else
3158	t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3159      t = force_gimple_operand_gsi (&gsi, t,
3160				    DECL_P (fd->loops[i].v)
3161				    && TREE_ADDRESSABLE (fd->loops[i].v),
3162				    NULL_TREE, false, GSI_CONTINUE_LINKING);
3163      stmt = gimple_build_assign (fd->loops[i].v, t);
3164      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3165
3166      if (fd->loops[i].non_rect_referenced)
3167	{
3168	  basic_block update_bb = NULL, prev_bb = NULL;
3169	  for (int j = i + 1; j <= fd->last_nonrect; j++)
3170	    if (j - fd->loops[j].outer == i)
3171	      {
3172		tree n1, n2;
3173		struct omp_for_data_loop *l = &fd->loops[j];
3174		basic_block this_bb = create_empty_bb (last_bb);
3175		add_bb_to_loop (this_bb, last_bb->loop_father);
3176		gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3177		if (prev_bb)
3178		  {
3179		    e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3180		    e->probability
3181		      = profile_probability::guessed_always ().apply_scale (7,
3182									    8);
3183		    set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3184		  }
3185		if (l->m1)
3186		  {
3187		    if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3188		      t = fold_build_pointer_plus (fd->loops[i].v,
3189						   fold_convert (sizetype,
3190								 l->n1));
3191		    else
3192		      {
3193			t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3194					 fd->loops[i].v);
3195			t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v),
3196					 t, l->n1);
3197		      }
3198		    n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3199						   false,
3200						   GSI_CONTINUE_LINKING);
3201		    stmt = gimple_build_assign (l->v, n1);
3202		    gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3203		    n1 = l->v;
3204		  }
3205		else
3206		  n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3207						 NULL_TREE, false,
3208						 GSI_CONTINUE_LINKING);
3209		if (l->m2)
3210		  {
3211		    if (POINTER_TYPE_P (TREE_TYPE (l->v)))
3212		      t = fold_build_pointer_plus (fd->loops[i].v,
3213						   fold_convert (sizetype,
3214								 l->n2));
3215		    else
3216		      {
3217			t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3218					 fd->loops[i].v);
3219			t = fold_build2 (PLUS_EXPR,
3220					 TREE_TYPE (nonrect_bounds[j]),
3221					 t, unshare_expr (l->n2));
3222		      }
3223		    n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3224						   false,
3225						   GSI_CONTINUE_LINKING);
3226		    stmt = gimple_build_assign (nonrect_bounds[j], n2);
3227		    gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3228		    n2 = nonrect_bounds[j];
3229		  }
3230		else
3231		  n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3232						 true, NULL_TREE, false,
3233						 GSI_CONTINUE_LINKING);
3234		gcond *cond_stmt
3235		  = gimple_build_cond (l->cond_code, n1, n2,
3236				       NULL_TREE, NULL_TREE);
3237		gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3238		if (update_bb == NULL)
3239		  update_bb = this_bb;
3240		e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3241		e->probability
3242		  = profile_probability::guessed_always ().apply_scale (1, 8);
3243		if (prev_bb == NULL)
3244		  set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3245		prev_bb = this_bb;
3246	      }
3247	  e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3248	  e->probability
3249	    = profile_probability::guessed_always ().apply_scale (7, 8);
3250	  body_bb = update_bb;
3251	}
3252
3253      if (i > 0)
3254	{
3255	  if (fd->loops[i].m2)
3256	    t = nonrect_bounds[i];
3257	  else
3258	    t = unshare_expr (fd->loops[i].n2);
3259	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3260					false, GSI_CONTINUE_LINKING);
3261	  tree v = fd->loops[i].v;
3262	  if (DECL_P (v) && TREE_ADDRESSABLE (v))
3263	    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3264					  false, GSI_CONTINUE_LINKING);
3265	  t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3266	  stmt = gimple_build_cond_empty (t);
3267	  gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3268	  if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3269			 expand_omp_regimplify_p, NULL, NULL)
3270	      || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3271			    expand_omp_regimplify_p, NULL, NULL))
3272	    gimple_regimplify_operands (stmt, &gsi);
3273	  e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3274	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3275	}
3276      else
3277	make_edge (bb, body_bb, EDGE_FALLTHRU);
3278      set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3279      last_bb = bb;
3280    }
3281
3282  return collapse_bb;
3283}
3284
3285/* Expand #pragma omp ordered depend(source).  */
3286
3287static void
3288expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3289			   tree *counts, location_t loc)
3290{
3291  enum built_in_function source_ix
3292    = fd->iter_type == long_integer_type_node
3293      ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3294  gimple *g
3295    = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3296			 build_fold_addr_expr (counts[fd->ordered]));
3297  gimple_set_location (g, loc);
3298  gsi_insert_before (gsi, g, GSI_SAME_STMT);
3299}
3300
3301/* Expand a single depend from #pragma omp ordered depend(sink:...).  */
3302
3303static void
3304expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3305			 tree *counts, tree c, location_t loc)
3306{
3307  auto_vec<tree, 10> args;
3308  enum built_in_function sink_ix
3309    = fd->iter_type == long_integer_type_node
3310      ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3311  tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3312  int i;
3313  gimple_stmt_iterator gsi2 = *gsi;
3314  bool warned_step = false;
3315
3316  for (i = 0; i < fd->ordered; i++)
3317    {
3318      tree step = NULL_TREE;
3319      off = TREE_PURPOSE (deps);
3320      if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3321	{
3322	  step = TREE_OPERAND (off, 1);
3323	  off = TREE_OPERAND (off, 0);
3324	}
3325      if (!integer_zerop (off))
3326	{
3327	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
3328		      || fd->loops[i].cond_code == GT_EXPR);
3329	  bool forward = fd->loops[i].cond_code == LT_EXPR;
3330	  if (step)
3331	    {
3332	      /* Non-simple Fortran DO loops.  If step is variable,
3333		 we don't know at compile even the direction, so can't
3334		 warn.  */
3335	      if (TREE_CODE (step) != INTEGER_CST)
3336		break;
3337	      forward = tree_int_cst_sgn (step) != -1;
3338	    }
3339	  if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3340	    warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3341				"waiting for lexically later iteration");
3342	  break;
3343	}
3344      deps = TREE_CHAIN (deps);
3345    }
3346  /* If all offsets corresponding to the collapsed loops are zero,
3347     this depend clause can be ignored.  FIXME: but there is still a
3348     flush needed.  We need to emit one __sync_synchronize () for it
3349     though (perhaps conditionally)?  Solve this together with the
3350     conservative dependence folding optimization.
3351  if (i >= fd->collapse)
3352    return;  */
3353
3354  deps = OMP_CLAUSE_DECL (c);
3355  gsi_prev (&gsi2);
3356  edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3357  edge e2 = split_block_after_labels (e1->dest);
3358
3359  gsi2 = gsi_after_labels (e1->dest);
3360  *gsi = gsi_last_bb (e1->src);
3361  for (i = 0; i < fd->ordered; i++)
3362    {
3363      tree itype = TREE_TYPE (fd->loops[i].v);
3364      tree step = NULL_TREE;
3365      tree orig_off = NULL_TREE;
3366      if (POINTER_TYPE_P (itype))
3367	itype = sizetype;
3368      if (i)
3369	deps = TREE_CHAIN (deps);
3370      off = TREE_PURPOSE (deps);
3371      if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3372	{
3373	  step = TREE_OPERAND (off, 1);
3374	  off = TREE_OPERAND (off, 0);
3375	  gcc_assert (fd->loops[i].cond_code == LT_EXPR
3376		      && integer_onep (fd->loops[i].step)
3377		      && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3378	}
3379      tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3380      if (step)
3381	{
3382	  off = fold_convert_loc (loc, itype, off);
3383	  orig_off = off;
3384	  off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3385	}
3386
3387      if (integer_zerop (off))
3388	t = boolean_true_node;
3389      else
3390	{
3391	  tree a;
3392	  tree co = fold_convert_loc (loc, itype, off);
3393	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3394	    {
3395	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3396		co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3397	      a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3398				   TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3399				   co);
3400	    }
3401	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3402	    a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3403				 fd->loops[i].v, co);
3404	  else
3405	    a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3406				 fd->loops[i].v, co);
3407	  if (step)
3408	    {
3409	      tree t1, t2;
3410	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3411		t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3412				      fd->loops[i].n1);
3413	      else
3414		t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3415				      fd->loops[i].n2);
3416	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3417		t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3418				      fd->loops[i].n2);
3419	      else
3420		t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3421				      fd->loops[i].n1);
3422	      t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3423				   step, build_int_cst (TREE_TYPE (step), 0));
3424	      if (TREE_CODE (step) != INTEGER_CST)
3425		{
3426		  t1 = unshare_expr (t1);
3427		  t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3428						 false, GSI_CONTINUE_LINKING);
3429		  t2 = unshare_expr (t2);
3430		  t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3431						 false, GSI_CONTINUE_LINKING);
3432		}
3433	      t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3434				   t, t2, t1);
3435	    }
3436	  else if (fd->loops[i].cond_code == LT_EXPR)
3437	    {
3438	      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3439		t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3440				     fd->loops[i].n1);
3441	      else
3442		t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3443				     fd->loops[i].n2);
3444	    }
3445	  else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3446	    t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3447				 fd->loops[i].n2);
3448	  else
3449	    t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3450				 fd->loops[i].n1);
3451	}
3452      if (cond)
3453	cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3454      else
3455	cond = t;
3456
3457      off = fold_convert_loc (loc, itype, off);
3458
3459      if (step
3460	  || (fd->loops[i].cond_code == LT_EXPR
3461	      ? !integer_onep (fd->loops[i].step)
3462	      : !integer_minus_onep (fd->loops[i].step)))
3463	{
3464	  if (step == NULL_TREE
3465	      && TYPE_UNSIGNED (itype)
3466	      && fd->loops[i].cond_code == GT_EXPR)
3467	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3468				 fold_build1_loc (loc, NEGATE_EXPR, itype,
3469						  s));
3470	  else
3471	    t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3472				 orig_off ? orig_off : off, s);
3473	  t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3474			       build_int_cst (itype, 0));
3475	  if (integer_zerop (t) && !warned_step)
3476	    {
3477	      warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3478				  "refers to iteration never in the iteration "
3479				  "space");
3480	      warned_step = true;
3481	    }
3482	  cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3483				  cond, t);
3484	}
3485
3486      if (i <= fd->collapse - 1 && fd->collapse > 1)
3487	t = fd->loop.v;
3488      else if (counts[i])
3489	t = counts[i];
3490      else
3491	{
3492	  t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3493			       fd->loops[i].v, fd->loops[i].n1);
3494	  t = fold_convert_loc (loc, fd->iter_type, t);
3495	}
3496      if (step)
3497	/* We have divided off by step already earlier.  */;
3498      else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3499	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3500			       fold_build1_loc (loc, NEGATE_EXPR, itype,
3501						s));
3502      else
3503	off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3504      if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3505	off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3506      off = fold_convert_loc (loc, fd->iter_type, off);
3507      if (i <= fd->collapse - 1 && fd->collapse > 1)
3508	{
3509	  if (i)
3510	    off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3511				   off);
3512	  if (i < fd->collapse - 1)
3513	    {
3514	      coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3515				      counts[i]);
3516	      continue;
3517	    }
3518	}
3519      off = unshare_expr (off);
3520      t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3521      t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3522				    true, GSI_SAME_STMT);
3523      args.safe_push (t);
3524    }
3525  gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3526  gimple_set_location (g, loc);
3527  gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3528
3529  cond = unshare_expr (cond);
3530  cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3531				   GSI_CONTINUE_LINKING);
3532  gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3533  edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3534  e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3535  e1->probability = e3->probability.invert ();
3536  e1->flags = EDGE_TRUE_VALUE;
3537  set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3538
3539  *gsi = gsi_after_labels (e2->dest);
3540}
3541
3542/* Expand all #pragma omp ordered depend(source) and
3543   #pragma omp ordered depend(sink:...) constructs in the current
3544   #pragma omp for ordered(n) region.  */
3545
3546static void
3547expand_omp_ordered_source_sink (struct omp_region *region,
3548				struct omp_for_data *fd, tree *counts,
3549				basic_block cont_bb)
3550{
3551  struct omp_region *inner;
3552  int i;
3553  for (i = fd->collapse - 1; i < fd->ordered; i++)
3554    if (i == fd->collapse - 1 && fd->collapse > 1)
3555      counts[i] = NULL_TREE;
3556    else if (i >= fd->collapse && !cont_bb)
3557      counts[i] = build_zero_cst (fd->iter_type);
3558    else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3559	     && integer_onep (fd->loops[i].step))
3560      counts[i] = NULL_TREE;
3561    else
3562      counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3563  tree atype
3564    = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3565  counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3566  TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3567
3568  for (inner = region->inner; inner; inner = inner->next)
3569    if (inner->type == GIMPLE_OMP_ORDERED)
3570      {
3571	gomp_ordered *ord_stmt = inner->ord_stmt;
3572	gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3573	location_t loc = gimple_location (ord_stmt);
3574	tree c;
3575	for (c = gimple_omp_ordered_clauses (ord_stmt);
3576	     c; c = OMP_CLAUSE_CHAIN (c))
3577	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3578	    break;
3579	if (c)
3580	  expand_omp_ordered_source (&gsi, fd, counts, loc);
3581	for (c = gimple_omp_ordered_clauses (ord_stmt);
3582	     c; c = OMP_CLAUSE_CHAIN (c))
3583	  if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3584	    expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3585	gsi_remove (&gsi, true);
3586      }
3587}
3588
3589/* Wrap the body into fd->ordered - fd->collapse loops that aren't
3590   collapsed.  */
3591
3592static basic_block
3593expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3594			      basic_block cont_bb, basic_block body_bb,
3595			      basic_block l0_bb, bool ordered_lastprivate)
3596{
3597  if (fd->ordered == fd->collapse)
3598    return cont_bb;
3599
3600  if (!cont_bb)
3601    {
3602      gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3603      for (int i = fd->collapse; i < fd->ordered; i++)
3604	{
3605	  tree type = TREE_TYPE (fd->loops[i].v);
3606	  tree n1 = fold_convert (type, fd->loops[i].n1);
3607	  expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3608	  tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3609			      size_int (i - fd->collapse + 1),
3610			      NULL_TREE, NULL_TREE);
3611	  expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3612	}
3613      return NULL;
3614    }
3615
3616  for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3617    {
3618      tree t, type = TREE_TYPE (fd->loops[i].v);
3619      gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3620      expand_omp_build_assign (&gsi, fd->loops[i].v,
3621			       fold_convert (type, fd->loops[i].n1));
3622      if (counts[i])
3623	expand_omp_build_assign (&gsi, counts[i],
3624				 build_zero_cst (fd->iter_type));
3625      tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3626			  size_int (i - fd->collapse + 1),
3627			  NULL_TREE, NULL_TREE);
3628      expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3629      if (!gsi_end_p (gsi))
3630	gsi_prev (&gsi);
3631      else
3632	gsi = gsi_last_bb (body_bb);
3633      edge e1 = split_block (body_bb, gsi_stmt (gsi));
3634      basic_block new_body = e1->dest;
3635      if (body_bb == cont_bb)
3636	cont_bb = new_body;
3637      edge e2 = NULL;
3638      basic_block new_header;
3639      if (EDGE_COUNT (cont_bb->preds) > 0)
3640	{
3641	  gsi = gsi_last_bb (cont_bb);
3642	  if (POINTER_TYPE_P (type))
3643	    t = fold_build_pointer_plus (fd->loops[i].v,
3644					 fold_convert (sizetype,
3645						       fd->loops[i].step));
3646	  else
3647	    t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3648			     fold_convert (type, fd->loops[i].step));
3649	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3650	  if (counts[i])
3651	    {
3652	      t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3653			       build_int_cst (fd->iter_type, 1));
3654	      expand_omp_build_assign (&gsi, counts[i], t);
3655	      t = counts[i];
3656	    }
3657	  else
3658	    {
3659	      t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3660			       fd->loops[i].v, fd->loops[i].n1);
3661	      t = fold_convert (fd->iter_type, t);
3662	      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3663					    true, GSI_SAME_STMT);
3664	    }
3665	  aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3666			 size_int (i - fd->collapse + 1),
3667			 NULL_TREE, NULL_TREE);
3668	  expand_omp_build_assign (&gsi, aref, t);
3669	  gsi_prev (&gsi);
3670	  e2 = split_block (cont_bb, gsi_stmt (gsi));
3671	  new_header = e2->dest;
3672	}
3673      else
3674	new_header = cont_bb;
3675      gsi = gsi_after_labels (new_header);
3676      tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3677					 true, GSI_SAME_STMT);
3678      tree n2
3679	= force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3680				    true, NULL_TREE, true, GSI_SAME_STMT);
3681      t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3682      gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3683      edge e3 = split_block (new_header, gsi_stmt (gsi));
3684      cont_bb = e3->dest;
3685      remove_edge (e1);
3686      make_edge (body_bb, new_header, EDGE_FALLTHRU);
3687      e3->flags = EDGE_FALSE_VALUE;
3688      e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3689      e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3690      e1->probability = e3->probability.invert ();
3691
3692      set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3693      set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3694
3695      if (e2)
3696	{
3697	  class loop *loop = alloc_loop ();
3698	  loop->header = new_header;
3699	  loop->latch = e2->src;
3700	  add_loop (loop, l0_bb->loop_father);
3701	}
3702    }
3703
3704  /* If there are any lastprivate clauses and it is possible some loops
3705     might have zero iterations, ensure all the decls are initialized,
3706     otherwise we could crash evaluating C++ class iterators with lastprivate
3707     clauses.  */
3708  bool need_inits = false;
3709  for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3710    if (need_inits)
3711      {
3712	tree type = TREE_TYPE (fd->loops[i].v);
3713	gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3714	expand_omp_build_assign (&gsi, fd->loops[i].v,
3715				 fold_convert (type, fd->loops[i].n1));
3716      }
3717    else
3718      {
3719	tree type = TREE_TYPE (fd->loops[i].v);
3720	tree this_cond = fold_build2 (fd->loops[i].cond_code,
3721				      boolean_type_node,
3722				      fold_convert (type, fd->loops[i].n1),
3723				      fold_convert (type, fd->loops[i].n2));
3724	if (!integer_onep (this_cond))
3725	  need_inits = true;
3726      }
3727
3728  return cont_bb;
3729}
3730
3731/* A subroutine of expand_omp_for.  Generate code for a parallel
3732   loop with any schedule.  Given parameters:
3733
3734	for (V = N1; V cond N2; V += STEP) BODY;
3735
3736   where COND is "<" or ">", we generate pseudocode
3737
3738	more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3739	if (more) goto L0; else goto L3;
3740    L0:
3741	V = istart0;
3742	iend = iend0;
3743    L1:
3744	BODY;
3745	V += STEP;
3746	if (V cond iend) goto L1; else goto L2;
3747    L2:
3748	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3749    L3:
3750
3751    If this is a combined omp parallel loop, instead of the call to
3752    GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3753    If this is gimple_omp_for_combined_p loop, then instead of assigning
3754    V and iend in L0 we assign the first two _looptemp_ clause decls of the
3755    inner GIMPLE_OMP_FOR and V += STEP; and
3756    if (V cond iend) goto L1; else goto L2; are removed.
3757
3758    For collapsed loops, given parameters:
3759      collapse(3)
3760      for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3761	for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3762	  for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3763	    BODY;
3764
3765    we generate pseudocode
3766
3767	if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3768	if (cond3 is <)
3769	  adj = STEP3 - 1;
3770	else
3771	  adj = STEP3 + 1;
3772	count3 = (adj + N32 - N31) / STEP3;
3773	if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3774	if (cond2 is <)
3775	  adj = STEP2 - 1;
3776	else
3777	  adj = STEP2 + 1;
3778	count2 = (adj + N22 - N21) / STEP2;
3779	if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3780	if (cond1 is <)
3781	  adj = STEP1 - 1;
3782	else
3783	  adj = STEP1 + 1;
3784	count1 = (adj + N12 - N11) / STEP1;
3785	count = count1 * count2 * count3;
3786	goto Z1;
3787    Z0:
3788	count = 0;
3789    Z1:
3790	more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3791	if (more) goto L0; else goto L3;
3792    L0:
3793	V = istart0;
3794	T = V;
3795	V3 = N31 + (T % count3) * STEP3;
3796	T = T / count3;
3797	V2 = N21 + (T % count2) * STEP2;
3798	T = T / count2;
3799	V1 = N11 + T * STEP1;
3800	iend = iend0;
3801    L1:
3802	BODY;
3803	V += 1;
3804	if (V < iend) goto L10; else goto L2;
3805    L10:
3806	V3 += STEP3;
3807	if (V3 cond3 N32) goto L1; else goto L11;
3808    L11:
3809	V3 = N31;
3810	V2 += STEP2;
3811	if (V2 cond2 N22) goto L1; else goto L12;
3812    L12:
3813	V2 = N21;
3814	V1 += STEP1;
3815	goto L1;
3816    L2:
3817	if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3818    L3:
3819
3820      */
3821
3822static void
3823expand_omp_for_generic (struct omp_region *region,
3824			struct omp_for_data *fd,
3825			enum built_in_function start_fn,
3826			enum built_in_function next_fn,
3827			tree sched_arg,
3828			gimple *inner_stmt)
3829{
3830  tree type, istart0, iend0, iend;
3831  tree t, vmain, vback, bias = NULL_TREE;
3832  basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3833  basic_block l2_bb = NULL, l3_bb = NULL;
3834  gimple_stmt_iterator gsi;
3835  gassign *assign_stmt;
3836  bool in_combined_parallel = is_combined_parallel (region);
3837  bool broken_loop = region->cont == NULL;
3838  edge e, ne;
3839  tree *counts = NULL;
3840  int i;
3841  bool ordered_lastprivate = false;
3842
3843  gcc_assert (!broken_loop || !in_combined_parallel);
3844  gcc_assert (fd->iter_type == long_integer_type_node
3845	      || !in_combined_parallel);
3846
3847  entry_bb = region->entry;
3848  cont_bb = region->cont;
3849  collapse_bb = NULL;
3850  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3851  gcc_assert (broken_loop
3852	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3853  l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3854  l1_bb = single_succ (l0_bb);
3855  if (!broken_loop)
3856    {
3857      l2_bb = create_empty_bb (cont_bb);
3858      gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3859		  || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3860		      == l1_bb));
3861      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3862    }
3863  else
3864    l2_bb = NULL;
3865  l3_bb = BRANCH_EDGE (entry_bb)->dest;
3866  exit_bb = region->exit;
3867
3868  gsi = gsi_last_nondebug_bb (entry_bb);
3869
3870  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3871  if (fd->ordered
3872      && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3873			  OMP_CLAUSE_LASTPRIVATE))
3874    ordered_lastprivate = false;
3875  tree reductions = NULL_TREE;
3876  tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3877  tree memv = NULL_TREE;
3878  if (fd->lastprivate_conditional)
3879    {
3880      tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3881				OMP_CLAUSE__CONDTEMP_);
3882      if (fd->have_pointer_condtemp)
3883	condtemp = OMP_CLAUSE_DECL (c);
3884      c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3885      cond_var = OMP_CLAUSE_DECL (c);
3886    }
3887  if (sched_arg)
3888    {
3889      if (fd->have_reductemp)
3890	{
3891	  tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3892				    OMP_CLAUSE__REDUCTEMP_);
3893	  reductions = OMP_CLAUSE_DECL (c);
3894	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3895	  gimple *g = SSA_NAME_DEF_STMT (reductions);
3896	  reductions = gimple_assign_rhs1 (g);
3897	  OMP_CLAUSE_DECL (c) = reductions;
3898	  entry_bb = gimple_bb (g);
3899	  edge e = split_block (entry_bb, g);
3900	  if (region->entry == entry_bb)
3901	    region->entry = e->dest;
3902	  gsi = gsi_last_bb (entry_bb);
3903	}
3904      else
3905	reductions = null_pointer_node;
3906      if (fd->have_pointer_condtemp)
3907	{
3908	  tree type = TREE_TYPE (condtemp);
3909	  memv = create_tmp_var (type);
3910	  TREE_ADDRESSABLE (memv) = 1;
3911	  unsigned HOST_WIDE_INT sz
3912	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3913	  sz *= fd->lastprivate_conditional;
3914	  expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3915				   false);
3916	  mem = build_fold_addr_expr (memv);
3917	}
3918      else
3919	mem = null_pointer_node;
3920    }
3921  if (fd->collapse > 1 || fd->ordered)
3922    {
3923      int first_zero_iter1 = -1, first_zero_iter2 = -1;
3924      basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3925
3926      counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3927      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3928				  zero_iter1_bb, first_zero_iter1,
3929				  zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3930
3931      if (zero_iter1_bb)
3932	{
3933	  /* Some counts[i] vars might be uninitialized if
3934	     some loop has zero iterations.  But the body shouldn't
3935	     be executed in that case, so just avoid uninit warnings.  */
3936	  for (i = first_zero_iter1;
3937	       i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3938	    if (SSA_VAR_P (counts[i]))
3939	      suppress_warning (counts[i], OPT_Wuninitialized);
3940	  gsi_prev (&gsi);
3941	  e = split_block (entry_bb, gsi_stmt (gsi));
3942	  entry_bb = e->dest;
3943	  make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3944	  gsi = gsi_last_nondebug_bb (entry_bb);
3945	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3946				   get_immediate_dominator (CDI_DOMINATORS,
3947							    zero_iter1_bb));
3948	}
3949      if (zero_iter2_bb)
3950	{
3951	  /* Some counts[i] vars might be uninitialized if
3952	     some loop has zero iterations.  But the body shouldn't
3953	     be executed in that case, so just avoid uninit warnings.  */
3954	  for (i = first_zero_iter2; i < fd->ordered; i++)
3955	    if (SSA_VAR_P (counts[i]))
3956	      suppress_warning (counts[i], OPT_Wuninitialized);
3957	  if (zero_iter1_bb)
3958	    make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3959	  else
3960	    {
3961	      gsi_prev (&gsi);
3962	      e = split_block (entry_bb, gsi_stmt (gsi));
3963	      entry_bb = e->dest;
3964	      make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3965	      gsi = gsi_last_nondebug_bb (entry_bb);
3966	      set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3967				       get_immediate_dominator
3968					 (CDI_DOMINATORS, zero_iter2_bb));
3969	    }
3970	}
3971      if (fd->collapse == 1)
3972	{
3973	  counts[0] = fd->loop.n2;
3974	  fd->loop = fd->loops[0];
3975	}
3976    }
3977
3978  type = TREE_TYPE (fd->loop.v);
3979  istart0 = create_tmp_var (fd->iter_type, ".istart0");
3980  iend0 = create_tmp_var (fd->iter_type, ".iend0");
3981  TREE_ADDRESSABLE (istart0) = 1;
3982  TREE_ADDRESSABLE (iend0) = 1;
3983
3984  /* See if we need to bias by LLONG_MIN.  */
3985  if (fd->iter_type == long_long_unsigned_type_node
3986      && TREE_CODE (type) == INTEGER_TYPE
3987      && !TYPE_UNSIGNED (type)
3988      && fd->ordered == 0)
3989    {
3990      tree n1, n2;
3991
3992      if (fd->loop.cond_code == LT_EXPR)
3993	{
3994	  n1 = fd->loop.n1;
3995	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3996	}
3997      else
3998	{
3999	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4000	  n2 = fd->loop.n1;
4001	}
4002      if (TREE_CODE (n1) != INTEGER_CST
4003	  || TREE_CODE (n2) != INTEGER_CST
4004	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4005	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4006    }
4007
4008  gimple_stmt_iterator gsif = gsi;
4009  gsi_prev (&gsif);
4010
4011  tree arr = NULL_TREE;
4012  if (in_combined_parallel)
4013    {
4014      gcc_assert (fd->ordered == 0);
4015      /* In a combined parallel loop, emit a call to
4016	 GOMP_loop_foo_next.  */
4017      t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4018			   build_fold_addr_expr (istart0),
4019			   build_fold_addr_expr (iend0));
4020    }
4021  else
4022    {
4023      tree t0, t1, t2, t3, t4;
4024      /* If this is not a combined parallel loop, emit a call to
4025	 GOMP_loop_foo_start in ENTRY_BB.  */
4026      t4 = build_fold_addr_expr (iend0);
4027      t3 = build_fold_addr_expr (istart0);
4028      if (fd->ordered)
4029	{
4030	  t0 = build_int_cst (unsigned_type_node,
4031			      fd->ordered - fd->collapse + 1);
4032	  arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
4033							fd->ordered
4034							- fd->collapse + 1),
4035				".omp_counts");
4036	  DECL_NAMELESS (arr) = 1;
4037	  TREE_ADDRESSABLE (arr) = 1;
4038	  TREE_STATIC (arr) = 1;
4039	  vec<constructor_elt, va_gc> *v;
4040	  vec_alloc (v, fd->ordered - fd->collapse + 1);
4041	  int idx;
4042
4043	  for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
4044	    {
4045	      tree c;
4046	      if (idx == 0 && fd->collapse > 1)
4047		c = fd->loop.n2;
4048	      else
4049		c = counts[idx + fd->collapse - 1];
4050	      tree purpose = size_int (idx);
4051	      CONSTRUCTOR_APPEND_ELT (v, purpose, c);
4052	      if (TREE_CODE (c) != INTEGER_CST)
4053		TREE_STATIC (arr) = 0;
4054	    }
4055
4056	  DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
4057	  if (!TREE_STATIC (arr))
4058	    force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
4059						    void_type_node, arr),
4060				      true, NULL_TREE, true, GSI_SAME_STMT);
4061	  t1 = build_fold_addr_expr (arr);
4062	  t2 = NULL_TREE;
4063	}
4064      else
4065	{
4066	  t2 = fold_convert (fd->iter_type, fd->loop.step);
4067	  t1 = fd->loop.n2;
4068	  t0 = fd->loop.n1;
4069	  if (gimple_omp_for_combined_into_p (fd->for_stmt))
4070	    {
4071	      tree innerc
4072		= omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4073				   OMP_CLAUSE__LOOPTEMP_);
4074	      gcc_assert (innerc);
4075	      t0 = OMP_CLAUSE_DECL (innerc);
4076	      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4077					OMP_CLAUSE__LOOPTEMP_);
4078	      gcc_assert (innerc);
4079	      t1 = OMP_CLAUSE_DECL (innerc);
4080	    }
4081	  if (POINTER_TYPE_P (TREE_TYPE (t0))
4082	      && TYPE_PRECISION (TREE_TYPE (t0))
4083		 != TYPE_PRECISION (fd->iter_type))
4084	    {
4085	      /* Avoid casting pointers to integer of a different size.  */
4086	      tree itype = signed_type_for (type);
4087	      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4088	      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4089	    }
4090	  else
4091	    {
4092	      t1 = fold_convert (fd->iter_type, t1);
4093	      t0 = fold_convert (fd->iter_type, t0);
4094	    }
4095	  if (bias)
4096	    {
4097	      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4098	      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4099	    }
4100	}
4101      if (fd->iter_type == long_integer_type_node || fd->ordered)
4102	{
4103	  if (fd->chunk_size)
4104	    {
4105	      t = fold_convert (fd->iter_type, fd->chunk_size);
4106	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
4107	      if (sched_arg)
4108		{
4109		  if (fd->ordered)
4110		    t = build_call_expr (builtin_decl_explicit (start_fn),
4111					 8, t0, t1, sched_arg, t, t3, t4,
4112					 reductions, mem);
4113		  else
4114		    t = build_call_expr (builtin_decl_explicit (start_fn),
4115					 9, t0, t1, t2, sched_arg, t, t3, t4,
4116					 reductions, mem);
4117		}
4118	      else if (fd->ordered)
4119		t = build_call_expr (builtin_decl_explicit (start_fn),
4120				     5, t0, t1, t, t3, t4);
4121	      else
4122		t = build_call_expr (builtin_decl_explicit (start_fn),
4123				     6, t0, t1, t2, t, t3, t4);
4124	    }
4125	  else if (fd->ordered)
4126	    t = build_call_expr (builtin_decl_explicit (start_fn),
4127				 4, t0, t1, t3, t4);
4128	  else
4129	    t = build_call_expr (builtin_decl_explicit (start_fn),
4130				 5, t0, t1, t2, t3, t4);
4131	}
4132      else
4133	{
4134	  tree t5;
4135	  tree c_bool_type;
4136	  tree bfn_decl;
4137
4138	  /* The GOMP_loop_ull_*start functions have additional boolean
4139	     argument, true for < loops and false for > loops.
4140	     In Fortran, the C bool type can be different from
4141	     boolean_type_node.  */
4142	  bfn_decl = builtin_decl_explicit (start_fn);
4143	  c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4144	  t5 = build_int_cst (c_bool_type,
4145			      fd->loop.cond_code == LT_EXPR ? 1 : 0);
4146	  if (fd->chunk_size)
4147	    {
4148	      tree bfn_decl = builtin_decl_explicit (start_fn);
4149	      t = fold_convert (fd->iter_type, fd->chunk_size);
4150	      t = omp_adjust_chunk_size (t, fd->simd_schedule);
4151	      if (sched_arg)
4152		t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4153				     t, t3, t4, reductions, mem);
4154	      else
4155		t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4156	    }
4157	  else
4158	    t = build_call_expr (builtin_decl_explicit (start_fn),
4159				 6, t5, t0, t1, t2, t3, t4);
4160	}
4161    }
4162  if (TREE_TYPE (t) != boolean_type_node)
4163    t = fold_build2 (NE_EXPR, boolean_type_node,
4164		     t, build_int_cst (TREE_TYPE (t), 0));
4165  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4166				true, GSI_SAME_STMT);
4167  if (arr && !TREE_STATIC (arr))
4168    {
4169      tree clobber = build_clobber (TREE_TYPE (arr));
4170      gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4171			 GSI_SAME_STMT);
4172    }
4173  if (fd->have_pointer_condtemp)
4174    expand_omp_build_assign (&gsi, condtemp, memv, false);
4175  if (fd->have_reductemp)
4176    {
4177      gimple *g = gsi_stmt (gsi);
4178      gsi_remove (&gsi, true);
4179      release_ssa_name (gimple_assign_lhs (g));
4180
4181      entry_bb = region->entry;
4182      gsi = gsi_last_nondebug_bb (entry_bb);
4183
4184      gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4185    }
4186  gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4187
4188  /* Remove the GIMPLE_OMP_FOR statement.  */
4189  gsi_remove (&gsi, true);
4190
4191  if (gsi_end_p (gsif))
4192    gsif = gsi_after_labels (gsi_bb (gsif));
4193  gsi_next (&gsif);
4194
4195  /* Iteration setup for sequential loop goes in L0_BB.  */
4196  tree startvar = fd->loop.v;
4197  tree endvar = NULL_TREE;
4198
4199  if (gimple_omp_for_combined_p (fd->for_stmt))
4200    {
4201      gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4202		  && gimple_omp_for_kind (inner_stmt)
4203		     == GF_OMP_FOR_KIND_SIMD);
4204      tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4205				     OMP_CLAUSE__LOOPTEMP_);
4206      gcc_assert (innerc);
4207      startvar = OMP_CLAUSE_DECL (innerc);
4208      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4209				OMP_CLAUSE__LOOPTEMP_);
4210      gcc_assert (innerc);
4211      endvar = OMP_CLAUSE_DECL (innerc);
4212    }
4213
4214  gsi = gsi_start_bb (l0_bb);
4215  t = istart0;
4216  if (fd->ordered && fd->collapse == 1)
4217    t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4218		     fold_convert (fd->iter_type, fd->loop.step));
4219  else if (bias)
4220    t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4221  if (fd->ordered && fd->collapse == 1)
4222    {
4223      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4224	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4225			 fd->loop.n1, fold_convert (sizetype, t));
4226      else
4227	{
4228	  t = fold_convert (TREE_TYPE (startvar), t);
4229	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4230			   fd->loop.n1, t);
4231	}
4232    }
4233  else
4234    {
4235      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4236	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4237      t = fold_convert (TREE_TYPE (startvar), t);
4238    }
4239  t = force_gimple_operand_gsi (&gsi, t,
4240				DECL_P (startvar)
4241				&& TREE_ADDRESSABLE (startvar),
4242				NULL_TREE, false, GSI_CONTINUE_LINKING);
4243  assign_stmt = gimple_build_assign (startvar, t);
4244  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4245  if (cond_var)
4246    {
4247      tree itype = TREE_TYPE (cond_var);
4248      /* For lastprivate(conditional:) itervar, we need some iteration
4249	 counter that starts at unsigned non-zero and increases.
4250	 Prefer as few IVs as possible, so if we can use startvar
4251	 itself, use that, or startvar + constant (those would be
4252	 incremented with step), and as last resort use the s0 + 1
4253	 incremented by 1.  */
4254      if ((fd->ordered && fd->collapse == 1)
4255	  || bias
4256	  || POINTER_TYPE_P (type)
4257	  || TREE_CODE (fd->loop.n1) != INTEGER_CST
4258	  || fd->loop.cond_code != LT_EXPR)
4259	t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4260			 build_int_cst (itype, 1));
4261      else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4262	t = fold_convert (itype, t);
4263      else
4264	{
4265	  tree c = fold_convert (itype, fd->loop.n1);
4266	  c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4267	  t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4268	}
4269      t = force_gimple_operand_gsi (&gsi, t, false,
4270				    NULL_TREE, false, GSI_CONTINUE_LINKING);
4271      assign_stmt = gimple_build_assign (cond_var, t);
4272      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4273    }
4274
4275  t = iend0;
4276  if (fd->ordered && fd->collapse == 1)
4277    t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4278		     fold_convert (fd->iter_type, fd->loop.step));
4279  else if (bias)
4280    t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4281  if (fd->ordered && fd->collapse == 1)
4282    {
4283      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4284	t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4285			 fd->loop.n1, fold_convert (sizetype, t));
4286      else
4287	{
4288	  t = fold_convert (TREE_TYPE (startvar), t);
4289	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4290			   fd->loop.n1, t);
4291	}
4292    }
4293  else
4294    {
4295      if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4296	t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4297      t = fold_convert (TREE_TYPE (startvar), t);
4298    }
4299  iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4300				   false, GSI_CONTINUE_LINKING);
4301  if (endvar)
4302    {
4303      assign_stmt = gimple_build_assign (endvar, iend);
4304      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4305      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4306	assign_stmt = gimple_build_assign (fd->loop.v, iend);
4307      else
4308	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4309      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4310    }
4311  /* Handle linear clause adjustments.  */
4312  tree itercnt = NULL_TREE;
4313  if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4314    for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4315	 c; c = OMP_CLAUSE_CHAIN (c))
4316      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4317	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4318	{
4319	  tree d = OMP_CLAUSE_DECL (c);
4320	  tree t = d, a, dest;
4321	  if (omp_privatize_by_reference (t))
4322	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4323	  tree type = TREE_TYPE (t);
4324	  if (POINTER_TYPE_P (type))
4325	    type = sizetype;
4326	  dest = unshare_expr (t);
4327	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
4328	  expand_omp_build_assign (&gsif, v, t);
4329	  if (itercnt == NULL_TREE)
4330	    {
4331	      itercnt = startvar;
4332	      tree n1 = fd->loop.n1;
4333	      if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4334		{
4335		  itercnt
4336		    = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4337				    itercnt);
4338		  n1 = fold_convert (TREE_TYPE (itercnt), n1);
4339		}
4340	      itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4341				     itercnt, n1);
4342	      itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4343				     itercnt, fd->loop.step);
4344	      itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4345						  NULL_TREE, false,
4346						  GSI_CONTINUE_LINKING);
4347	    }
4348	  a = fold_build2 (MULT_EXPR, type,
4349			   fold_convert (type, itercnt),
4350			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4351	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4352			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4353	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4354					false, GSI_CONTINUE_LINKING);
4355	  expand_omp_build_assign (&gsi, dest, t, true);
4356	}
4357  if (fd->collapse > 1)
4358    expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4359
4360  if (fd->ordered)
4361    {
4362      /* Until now, counts array contained number of iterations or
4363	 variable containing it for ith loop.  From now on, we need
4364	 those counts only for collapsed loops, and only for the 2nd
4365	 till the last collapsed one.  Move those one element earlier,
4366	 we'll use counts[fd->collapse - 1] for the first source/sink
4367	 iteration counter and so on and counts[fd->ordered]
4368	 as the array holding the current counter values for
4369	 depend(source).  */
4370      if (fd->collapse > 1)
4371	memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4372      if (broken_loop)
4373	{
4374	  int i;
4375	  for (i = fd->collapse; i < fd->ordered; i++)
4376	    {
4377	      tree type = TREE_TYPE (fd->loops[i].v);
4378	      tree this_cond
4379		= fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4380			       fold_convert (type, fd->loops[i].n1),
4381			       fold_convert (type, fd->loops[i].n2));
4382	      if (!integer_onep (this_cond))
4383		break;
4384	    }
4385	  if (i < fd->ordered)
4386	    {
4387	      if (entry_bb->loop_father != l0_bb->loop_father)
4388		{
4389		  remove_bb_from_loops (l0_bb);
4390		  add_bb_to_loop (l0_bb, entry_bb->loop_father);
4391		  gcc_assert (single_succ (l0_bb) == l1_bb);
4392		}
4393	      cont_bb
4394		= create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4395	      add_bb_to_loop (cont_bb, l0_bb->loop_father);
4396	      gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4397	      gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4398	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4399	      make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4400	      make_edge (cont_bb, l1_bb, 0);
4401	      l2_bb = create_empty_bb (cont_bb);
4402	      broken_loop = false;
4403	    }
4404	}
4405      expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4406      cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4407					      l0_bb, ordered_lastprivate);
4408      if (counts[fd->collapse - 1])
4409	{
4410	  gcc_assert (fd->collapse == 1);
4411	  gsi = gsi_last_bb (l0_bb);
4412	  expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4413				   istart0, true);
4414	  if (cont_bb)
4415	    {
4416	      gsi = gsi_last_bb (cont_bb);
4417	      t = fold_build2 (PLUS_EXPR, fd->iter_type,
4418			       counts[fd->collapse - 1],
4419			       build_int_cst (fd->iter_type, 1));
4420	      expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4421	      tree aref = build4 (ARRAY_REF, fd->iter_type,
4422				  counts[fd->ordered], size_zero_node,
4423				  NULL_TREE, NULL_TREE);
4424	      expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4425	    }
4426	  t = counts[fd->collapse - 1];
4427	}
4428      else if (fd->collapse > 1)
4429	t = fd->loop.v;
4430      else
4431	{
4432	  t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4433			   fd->loops[0].v, fd->loops[0].n1);
4434	  t = fold_convert (fd->iter_type, t);
4435	}
4436      gsi = gsi_last_bb (l0_bb);
4437      tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4438			  size_zero_node, NULL_TREE, NULL_TREE);
4439      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4440				    false, GSI_CONTINUE_LINKING);
4441      expand_omp_build_assign (&gsi, aref, t, true);
4442    }
4443
4444  if (!broken_loop)
4445    {
4446      /* Code to control the increment and predicate for the sequential
4447	 loop goes in the CONT_BB.  */
4448      gsi = gsi_last_nondebug_bb (cont_bb);
4449      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4450      gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4451      vmain = gimple_omp_continue_control_use (cont_stmt);
4452      vback = gimple_omp_continue_control_def (cont_stmt);
4453
4454      if (cond_var)
4455	{
4456	  tree itype = TREE_TYPE (cond_var);
4457	  tree t2;
4458	  if ((fd->ordered && fd->collapse == 1)
4459	       || bias
4460	       || POINTER_TYPE_P (type)
4461	       || TREE_CODE (fd->loop.n1) != INTEGER_CST
4462	       || fd->loop.cond_code != LT_EXPR)
4463	    t2 = build_int_cst (itype, 1);
4464	  else
4465	    t2 = fold_convert (itype, fd->loop.step);
4466	  t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4467	  t2 = force_gimple_operand_gsi (&gsi, t2, false,
4468					 NULL_TREE, true, GSI_SAME_STMT);
4469	  assign_stmt = gimple_build_assign (cond_var, t2);
4470	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4471	}
4472
4473      if (!gimple_omp_for_combined_p (fd->for_stmt))
4474	{
4475	  if (POINTER_TYPE_P (type))
4476	    t = fold_build_pointer_plus (vmain, fd->loop.step);
4477	  else
4478	    t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4479	  t = force_gimple_operand_gsi (&gsi, t,
4480					DECL_P (vback)
4481					&& TREE_ADDRESSABLE (vback),
4482					NULL_TREE, true, GSI_SAME_STMT);
4483	  assign_stmt = gimple_build_assign (vback, t);
4484	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4485
4486	  if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4487	    {
4488	      tree tem;
4489	      if (fd->collapse > 1)
4490		tem = fd->loop.v;
4491	      else
4492		{
4493		  tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4494				     fd->loops[0].v, fd->loops[0].n1);
4495		  tem = fold_convert (fd->iter_type, tem);
4496		}
4497	      tree aref = build4 (ARRAY_REF, fd->iter_type,
4498				  counts[fd->ordered], size_zero_node,
4499				  NULL_TREE, NULL_TREE);
4500	      tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4501					      true, GSI_SAME_STMT);
4502	      expand_omp_build_assign (&gsi, aref, tem);
4503	    }
4504
4505	  t = build2 (fd->loop.cond_code, boolean_type_node,
4506		      DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4507		      iend);
4508	  gcond *cond_stmt = gimple_build_cond_empty (t);
4509	  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4510	}
4511
4512      /* Remove GIMPLE_OMP_CONTINUE.  */
4513      gsi_remove (&gsi, true);
4514
4515      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4516	collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4517
4518      /* Emit code to get the next parallel iteration in L2_BB.  */
4519      gsi = gsi_start_bb (l2_bb);
4520
4521      t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4522			   build_fold_addr_expr (istart0),
4523			   build_fold_addr_expr (iend0));
4524      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4525				    false, GSI_CONTINUE_LINKING);
4526      if (TREE_TYPE (t) != boolean_type_node)
4527	t = fold_build2 (NE_EXPR, boolean_type_node,
4528			 t, build_int_cst (TREE_TYPE (t), 0));
4529      gcond *cond_stmt = gimple_build_cond_empty (t);
4530      gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4531    }
4532
4533  /* Add the loop cleanup function.  */
4534  gsi = gsi_last_nondebug_bb (exit_bb);
4535  if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4536    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4537  else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4538    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4539  else
4540    t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4541  gcall *call_stmt = gimple_build_call (t, 0);
4542  if (fd->ordered)
4543    {
4544      tree arr = counts[fd->ordered];
4545      tree clobber = build_clobber (TREE_TYPE (arr));
4546      gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4547			GSI_SAME_STMT);
4548    }
4549  if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4550    {
4551      gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4552      if (fd->have_reductemp)
4553	{
4554	  gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4555					   gimple_call_lhs (call_stmt));
4556	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4557	}
4558    }
4559  gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4560  gsi_remove (&gsi, true);
4561
4562  /* Connect the new blocks.  */
4563  find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4564  find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4565
4566  if (!broken_loop)
4567    {
4568      gimple_seq phis;
4569
4570      e = find_edge (cont_bb, l3_bb);
4571      ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4572
4573      phis = phi_nodes (l3_bb);
4574      for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4575	{
4576	  gimple *phi = gsi_stmt (gsi);
4577	  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4578		   PHI_ARG_DEF_FROM_EDGE (phi, e));
4579	}
4580      remove_edge (e);
4581
4582      make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4583      e = find_edge (cont_bb, l1_bb);
4584      if (e == NULL)
4585	{
4586	  e = BRANCH_EDGE (cont_bb);
4587	  gcc_assert (single_succ (e->dest) == l1_bb);
4588	}
4589      if (gimple_omp_for_combined_p (fd->for_stmt))
4590	{
4591	  remove_edge (e);
4592	  e = NULL;
4593	}
4594      else if (fd->collapse > 1)
4595	{
4596	  remove_edge (e);
4597	  e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4598	}
4599      else
4600	e->flags = EDGE_TRUE_VALUE;
4601      if (e)
4602	{
4603	  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4604	  find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4605	}
4606      else
4607	{
4608	  e = find_edge (cont_bb, l2_bb);
4609	  e->flags = EDGE_FALLTHRU;
4610	}
4611      make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4612
4613      if (gimple_in_ssa_p (cfun))
4614	{
4615	  /* Add phis to the outer loop that connect to the phis in the inner,
4616	     original loop, and move the loop entry value of the inner phi to
4617	     the loop entry value of the outer phi.  */
4618	  gphi_iterator psi;
4619	  for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4620	    {
4621	      location_t locus;
4622	      gphi *nphi;
4623	      gphi *exit_phi = psi.phi ();
4624
4625	      if (virtual_operand_p (gimple_phi_result (exit_phi)))
4626		continue;
4627
4628	      edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4629	      tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4630
4631	      basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4632	      edge latch_to_l1 = find_edge (latch, l1_bb);
4633	      gphi *inner_phi
4634		= find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4635
4636	      tree t = gimple_phi_result (exit_phi);
4637	      tree new_res = copy_ssa_name (t, NULL);
4638	      nphi = create_phi_node (new_res, l0_bb);
4639
4640	      edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4641	      t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4642	      locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4643	      edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4644	      add_phi_arg (nphi, t, entry_to_l0, locus);
4645
4646	      edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4647	      add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4648
4649	      add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4650	    }
4651	}
4652
4653      set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4654			       recompute_dominator (CDI_DOMINATORS, l2_bb));
4655      set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4656			       recompute_dominator (CDI_DOMINATORS, l3_bb));
4657      set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4658			       recompute_dominator (CDI_DOMINATORS, l0_bb));
4659      set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4660			       recompute_dominator (CDI_DOMINATORS, l1_bb));
4661
4662      /* We enter expand_omp_for_generic with a loop.  This original loop may
4663	 have its own loop struct, or it may be part of an outer loop struct
4664	 (which may be the fake loop).  */
4665      class loop *outer_loop = entry_bb->loop_father;
4666      bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4667
4668      add_bb_to_loop (l2_bb, outer_loop);
4669
4670      /* We've added a new loop around the original loop.  Allocate the
4671	 corresponding loop struct.  */
4672      class loop *new_loop = alloc_loop ();
4673      new_loop->header = l0_bb;
4674      new_loop->latch = l2_bb;
4675      add_loop (new_loop, outer_loop);
4676
4677      /* Allocate a loop structure for the original loop unless we already
4678	 had one.  */
4679      if (!orig_loop_has_loop_struct
4680	  && !gimple_omp_for_combined_p (fd->for_stmt))
4681	{
4682	  class loop *orig_loop = alloc_loop ();
4683	  orig_loop->header = l1_bb;
4684	  /* The loop may have multiple latches.  */
4685	  add_loop (orig_loop, new_loop);
4686	}
4687    }
4688}
4689
4690/* Helper function for expand_omp_for_static_nochunk.  If PTR is NULL,
4691   compute needed allocation size.  If !ALLOC of team allocations,
4692   if ALLOC of thread allocation.  SZ is the initial needed size for
4693   other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4694   CNT number of elements of each array, for !ALLOC this is
4695   omp_get_num_threads (), for ALLOC number of iterations handled by the
4696   current thread.  If PTR is non-NULL, it is the start of the allocation
4697   and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4698   clauses pointers to the corresponding arrays.  */
4699
4700static tree
4701expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4702			   unsigned HOST_WIDE_INT alloc_align, tree cnt,
4703			   gimple_stmt_iterator *gsi, bool alloc)
4704{
4705  tree eltsz = NULL_TREE;
4706  unsigned HOST_WIDE_INT preval = 0;
4707  if (ptr && sz)
4708    ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4709		       ptr, size_int (sz));
4710  for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4711    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4712	&& !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4713	&& (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4714      {
4715	tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4716	unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4717	if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4718	  {
4719	    unsigned HOST_WIDE_INT szl
4720	      = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4721	    szl = least_bit_hwi (szl);
4722	    if (szl)
4723	      al = MIN (al, szl);
4724	  }
4725	if (ptr == NULL_TREE)
4726	  {
4727	    if (eltsz == NULL_TREE)
4728	      eltsz = TYPE_SIZE_UNIT (pointee_type);
4729	    else
4730	      eltsz = size_binop (PLUS_EXPR, eltsz,
4731				  TYPE_SIZE_UNIT (pointee_type));
4732	  }
4733	if (preval == 0 && al <= alloc_align)
4734	  {
4735	    unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4736	    sz += diff;
4737	    if (diff && ptr)
4738	      ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4739				 ptr, size_int (diff));
4740	  }
4741	else if (al > preval)
4742	  {
4743	    if (ptr)
4744	      {
4745		ptr = fold_convert (pointer_sized_int_node, ptr);
4746		ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4747				   build_int_cst (pointer_sized_int_node,
4748						  al - 1));
4749		ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4750				   build_int_cst (pointer_sized_int_node,
4751						  -(HOST_WIDE_INT) al));
4752		ptr = fold_convert (ptr_type_node, ptr);
4753	      }
4754	    else
4755	      sz += al - 1;
4756	  }
4757	if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4758	  preval = al;
4759	else
4760	  preval = 1;
4761	if (ptr)
4762	  {
4763	    expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4764	    ptr = OMP_CLAUSE_DECL (c);
4765	    ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4766			       size_binop (MULT_EXPR, cnt,
4767					   TYPE_SIZE_UNIT (pointee_type)));
4768	  }
4769      }
4770
4771  if (ptr == NULL_TREE)
4772    {
4773      eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4774      if (sz)
4775	eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4776      return eltsz;
4777    }
4778  else
4779    return ptr;
4780}
4781
4782/* Return the last _looptemp_ clause if one has been created for
4783   lastprivate on distribute parallel for{, simd} or taskloop.
4784   FD is the loop data and INNERC should be the second _looptemp_
4785   clause (the one holding the end of the range).
4786   This is followed by collapse - 1 _looptemp_ clauses for the
4787   counts[1] and up, and for triangular loops followed by 4
4788   further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4789   one factor and one adjn1).  After this there is optionally one
4790   _looptemp_ clause that this function returns.  */
4791
4792static tree
4793find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4794{
4795  gcc_assert (innerc);
4796  int count = fd->collapse - 1;
4797  if (fd->non_rect
4798      && fd->last_nonrect == fd->first_nonrect + 1
4799      && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4800    count += 4;
4801  for (int i = 0; i < count; i++)
4802    {
4803      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4804				OMP_CLAUSE__LOOPTEMP_);
4805      gcc_assert (innerc);
4806    }
4807  return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4808			  OMP_CLAUSE__LOOPTEMP_);
4809}
4810
4811/* A subroutine of expand_omp_for.  Generate code for a parallel
4812   loop with static schedule and no specified chunk size.  Given
4813   parameters:
4814
4815	for (V = N1; V cond N2; V += STEP) BODY;
4816
4817   where COND is "<" or ">", we generate pseudocode
4818
4819	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4820	if (cond is <)
4821	  adj = STEP - 1;
4822	else
4823	  adj = STEP + 1;
4824	if ((__typeof (V)) -1 > 0 && cond is >)
4825	  n = -(adj + N2 - N1) / -STEP;
4826	else
4827	  n = (adj + N2 - N1) / STEP;
4828	q = n / nthreads;
4829	tt = n % nthreads;
4830	if (threadid < tt) goto L3; else goto L4;
4831    L3:
4832	tt = 0;
4833	q = q + 1;
4834    L4:
4835	s0 = q * threadid + tt;
4836	e0 = s0 + q;
4837	V = s0 * STEP + N1;
4838	if (s0 >= e0) goto L2; else goto L0;
4839    L0:
4840	e = e0 * STEP + N1;
4841    L1:
4842	BODY;
4843	V += STEP;
4844	if (V cond e) goto L1;
4845    L2:
4846*/
4847
4848static void
4849expand_omp_for_static_nochunk (struct omp_region *region,
4850			       struct omp_for_data *fd,
4851			       gimple *inner_stmt)
4852{
4853  tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4854  tree type, itype, vmain, vback;
4855  basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4856  basic_block body_bb, cont_bb, collapse_bb = NULL;
4857  basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4858  basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4859  gimple_stmt_iterator gsi, gsip;
4860  edge ep;
4861  bool broken_loop = region->cont == NULL;
4862  tree *counts = NULL;
4863  tree n1, n2, step;
4864  tree reductions = NULL_TREE;
4865  tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4866
4867  itype = type = TREE_TYPE (fd->loop.v);
4868  if (POINTER_TYPE_P (type))
4869    itype = signed_type_for (type);
4870
4871  entry_bb = region->entry;
4872  cont_bb = region->cont;
4873  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4874  fin_bb = BRANCH_EDGE (entry_bb)->dest;
4875  gcc_assert (broken_loop
4876	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4877  seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4878  body_bb = single_succ (seq_start_bb);
4879  if (!broken_loop)
4880    {
4881      gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4882		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4883      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4884    }
4885  exit_bb = region->exit;
4886
4887  /* Iteration space partitioning goes in ENTRY_BB.  */
4888  gsi = gsi_last_nondebug_bb (entry_bb);
4889  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4890  gsip = gsi;
4891  gsi_prev (&gsip);
4892
4893  if (fd->collapse > 1)
4894    {
4895      int first_zero_iter = -1, dummy = -1;
4896      basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4897
4898      counts = XALLOCAVEC (tree, fd->collapse);
4899      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4900				  fin_bb, first_zero_iter,
4901				  dummy_bb, dummy, l2_dom_bb);
4902      t = NULL_TREE;
4903    }
4904  else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4905    t = integer_one_node;
4906  else
4907    t = fold_binary (fd->loop.cond_code, boolean_type_node,
4908		     fold_convert (type, fd->loop.n1),
4909		     fold_convert (type, fd->loop.n2));
4910  if (fd->collapse == 1
4911      && TYPE_UNSIGNED (type)
4912      && (t == NULL_TREE || !integer_onep (t)))
4913    {
4914      n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4915      n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4916				     true, GSI_SAME_STMT);
4917      n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4918      n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4919				     true, GSI_SAME_STMT);
4920      gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
4921						n1, n2);
4922      ep = split_block (entry_bb, cond_stmt);
4923      ep->flags = EDGE_TRUE_VALUE;
4924      entry_bb = ep->dest;
4925      ep->probability = profile_probability::very_likely ();
4926      ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4927      ep->probability = profile_probability::very_unlikely ();
4928      if (gimple_in_ssa_p (cfun))
4929	{
4930	  int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4931	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4932	       !gsi_end_p (gpi); gsi_next (&gpi))
4933	    {
4934	      gphi *phi = gpi.phi ();
4935	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4936			   ep, UNKNOWN_LOCATION);
4937	    }
4938	}
4939      gsi = gsi_last_bb (entry_bb);
4940    }
4941
4942  if (fd->lastprivate_conditional)
4943    {
4944      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4945      tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4946      if (fd->have_pointer_condtemp)
4947	condtemp = OMP_CLAUSE_DECL (c);
4948      c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4949      cond_var = OMP_CLAUSE_DECL (c);
4950    }
4951  if (fd->have_reductemp
4952      /* For scan, we don't want to reinitialize condtemp before the
4953	 second loop.  */
4954      || (fd->have_pointer_condtemp && !fd->have_scantemp)
4955      || fd->have_nonctrl_scantemp)
4956    {
4957      tree t1 = build_int_cst (long_integer_type_node, 0);
4958      tree t2 = build_int_cst (long_integer_type_node, 1);
4959      tree t3 = build_int_cstu (long_integer_type_node,
4960				(HOST_WIDE_INT_1U << 31) + 1);
4961      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4962      gimple_stmt_iterator gsi2 = gsi_none ();
4963      gimple *g = NULL;
4964      tree mem = null_pointer_node, memv = NULL_TREE;
4965      unsigned HOST_WIDE_INT condtemp_sz = 0;
4966      unsigned HOST_WIDE_INT alloc_align = 0;
4967      if (fd->have_reductemp)
4968	{
4969	  gcc_assert (!fd->have_nonctrl_scantemp);
4970	  tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4971	  reductions = OMP_CLAUSE_DECL (c);
4972	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4973	  g = SSA_NAME_DEF_STMT (reductions);
4974	  reductions = gimple_assign_rhs1 (g);
4975	  OMP_CLAUSE_DECL (c) = reductions;
4976	  gsi2 = gsi_for_stmt (g);
4977	}
4978      else
4979	{
4980	  if (gsi_end_p (gsip))
4981	    gsi2 = gsi_after_labels (region->entry);
4982	  else
4983	    gsi2 = gsip;
4984	  reductions = null_pointer_node;
4985	}
4986      if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4987	{
4988	  tree type;
4989	  if (fd->have_pointer_condtemp)
4990	    type = TREE_TYPE (condtemp);
4991	  else
4992	    type = ptr_type_node;
4993	  memv = create_tmp_var (type);
4994	  TREE_ADDRESSABLE (memv) = 1;
4995	  unsigned HOST_WIDE_INT sz = 0;
4996	  tree size = NULL_TREE;
4997	  if (fd->have_pointer_condtemp)
4998	    {
4999	      sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5000	      sz *= fd->lastprivate_conditional;
5001	      condtemp_sz = sz;
5002	    }
5003	  if (fd->have_nonctrl_scantemp)
5004	    {
5005	      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5006	      gimple *g = gimple_build_call (nthreads, 0);
5007	      nthreads = create_tmp_var (integer_type_node);
5008	      gimple_call_set_lhs (g, nthreads);
5009	      gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
5010	      nthreads = fold_convert (sizetype, nthreads);
5011	      alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
5012	      size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
5013						alloc_align, nthreads, NULL,
5014						false);
5015	      size = fold_convert (type, size);
5016	    }
5017	  else
5018	    size = build_int_cst (type, sz);
5019	  expand_omp_build_assign (&gsi2, memv, size, false);
5020	  mem = build_fold_addr_expr (memv);
5021	}
5022      tree t
5023	= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5024			   9, t1, t2, t2, t3, t1, null_pointer_node,
5025			   null_pointer_node, reductions, mem);
5026      force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5027				true, GSI_SAME_STMT);
5028      if (fd->have_pointer_condtemp)
5029	expand_omp_build_assign (&gsi2, condtemp, memv, false);
5030      if (fd->have_nonctrl_scantemp)
5031	{
5032	  tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
5033	  expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
5034				     alloc_align, nthreads, &gsi2, false);
5035	}
5036      if (fd->have_reductemp)
5037	{
5038	  gsi_remove (&gsi2, true);
5039	  release_ssa_name (gimple_assign_lhs (g));
5040	}
5041    }
5042  switch (gimple_omp_for_kind (fd->for_stmt))
5043    {
5044    case GF_OMP_FOR_KIND_FOR:
5045      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5046      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5047      break;
5048    case GF_OMP_FOR_KIND_DISTRIBUTE:
5049      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5050      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5051      break;
5052    default:
5053      gcc_unreachable ();
5054    }
5055  nthreads = build_call_expr (nthreads, 0);
5056  nthreads = fold_convert (itype, nthreads);
5057  nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5058				       true, GSI_SAME_STMT);
5059  threadid = build_call_expr (threadid, 0);
5060  threadid = fold_convert (itype, threadid);
5061  threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5062				       true, GSI_SAME_STMT);
5063
5064  n1 = fd->loop.n1;
5065  n2 = fd->loop.n2;
5066  step = fd->loop.step;
5067  if (gimple_omp_for_combined_into_p (fd->for_stmt))
5068    {
5069      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5070				     OMP_CLAUSE__LOOPTEMP_);
5071      gcc_assert (innerc);
5072      n1 = OMP_CLAUSE_DECL (innerc);
5073      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5074				OMP_CLAUSE__LOOPTEMP_);
5075      gcc_assert (innerc);
5076      n2 = OMP_CLAUSE_DECL (innerc);
5077    }
5078  n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5079				 true, NULL_TREE, true, GSI_SAME_STMT);
5080  n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5081				 true, NULL_TREE, true, GSI_SAME_STMT);
5082  step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5083				   true, NULL_TREE, true, GSI_SAME_STMT);
5084
5085  t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5086  t = fold_build2 (PLUS_EXPR, itype, step, t);
5087  t = fold_build2 (PLUS_EXPR, itype, t, n2);
5088  t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5089  if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5090    t = fold_build2 (TRUNC_DIV_EXPR, itype,
5091		     fold_build1 (NEGATE_EXPR, itype, t),
5092		     fold_build1 (NEGATE_EXPR, itype, step));
5093  else
5094    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5095  t = fold_convert (itype, t);
5096  n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5097
5098  q = create_tmp_reg (itype, "q");
5099  t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
5100  t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5101  gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
5102
5103  tt = create_tmp_reg (itype, "tt");
5104  t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5105  t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5106  gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5107
5108  t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5109  gcond *cond_stmt = gimple_build_cond_empty (t);
5110  gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5111
5112  second_bb = split_block (entry_bb, cond_stmt)->dest;
5113  gsi = gsi_last_nondebug_bb (second_bb);
5114  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5115
5116  gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5117		     GSI_SAME_STMT);
5118  gassign *assign_stmt
5119    = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5120  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5121
5122  third_bb = split_block (second_bb, assign_stmt)->dest;
5123  gsi = gsi_last_nondebug_bb (third_bb);
5124  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5125
5126  if (fd->have_nonctrl_scantemp)
5127    {
5128      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5129      tree controlp = NULL_TREE, controlb = NULL_TREE;
5130      for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5131	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5132	    && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5133	  {
5134	    if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5135	      controlb = OMP_CLAUSE_DECL (c);
5136	    else
5137	      controlp = OMP_CLAUSE_DECL (c);
5138	    if (controlb && controlp)
5139	      break;
5140	  }
5141      gcc_assert (controlp && controlb);
5142      tree cnt = create_tmp_var (sizetype);
5143      gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5144      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5145      unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5146      tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5147					   alloc_align, cnt, NULL, true);
5148      tree size = create_tmp_var (sizetype);
5149      expand_omp_build_assign (&gsi, size, sz, false);
5150      tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5151			      size, size_int (16384));
5152      expand_omp_build_assign (&gsi, controlb, cmp);
5153      g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5154			     NULL_TREE, NULL_TREE);
5155      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5156      fourth_bb = split_block (third_bb, g)->dest;
5157      gsi = gsi_last_nondebug_bb (fourth_bb);
5158      /* FIXME: Once we have allocators, this should use allocator.  */
5159      g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5160      gimple_call_set_lhs (g, controlp);
5161      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5162      expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5163				 &gsi, true);
5164      gsi_prev (&gsi);
5165      g = gsi_stmt (gsi);
5166      fifth_bb = split_block (fourth_bb, g)->dest;
5167      gsi = gsi_last_nondebug_bb (fifth_bb);
5168
5169      g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5170      gimple_call_set_lhs (g, controlp);
5171      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5172      tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5173      for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5174	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5175	    && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5176	  {
5177	    tree tmp = create_tmp_var (sizetype);
5178	    tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5179	    g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5180				     TYPE_SIZE_UNIT (pointee_type));
5181	    gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5182	    g = gimple_build_call (alloca_decl, 2, tmp,
5183				   size_int (TYPE_ALIGN (pointee_type)));
5184	    gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5185	    gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5186	  }
5187
5188      sixth_bb = split_block (fifth_bb, g)->dest;
5189      gsi = gsi_last_nondebug_bb (sixth_bb);
5190    }
5191
5192  t = build2 (MULT_EXPR, itype, q, threadid);
5193  t = build2 (PLUS_EXPR, itype, t, tt);
5194  s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5195
5196  t = fold_build2 (PLUS_EXPR, itype, s0, q);
5197  e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5198
5199  t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5200  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5201
5202  /* Remove the GIMPLE_OMP_FOR statement.  */
5203  gsi_remove (&gsi, true);
5204
5205  /* Setup code for sequential iteration goes in SEQ_START_BB.  */
5206  gsi = gsi_start_bb (seq_start_bb);
5207
5208  tree startvar = fd->loop.v;
5209  tree endvar = NULL_TREE;
5210
5211  if (gimple_omp_for_combined_p (fd->for_stmt))
5212    {
5213      tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5214		     ? gimple_omp_parallel_clauses (inner_stmt)
5215		     : gimple_omp_for_clauses (inner_stmt);
5216      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5217      gcc_assert (innerc);
5218      startvar = OMP_CLAUSE_DECL (innerc);
5219      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5220				OMP_CLAUSE__LOOPTEMP_);
5221      gcc_assert (innerc);
5222      endvar = OMP_CLAUSE_DECL (innerc);
5223      if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5224	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5225	{
5226	  innerc = find_lastprivate_looptemp (fd, innerc);
5227	  if (innerc)
5228	    {
5229	      /* If needed (distribute parallel for with lastprivate),
5230		 propagate down the total number of iterations.  */
5231	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5232				     fd->loop.n2);
5233	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5234					    GSI_CONTINUE_LINKING);
5235	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5236	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5237	    }
5238	}
5239    }
5240  t = fold_convert (itype, s0);
5241  t = fold_build2 (MULT_EXPR, itype, t, step);
5242  if (POINTER_TYPE_P (type))
5243    {
5244      t = fold_build_pointer_plus (n1, t);
5245      if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5246	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5247	t = fold_convert (signed_type_for (type), t);
5248    }
5249  else
5250    t = fold_build2 (PLUS_EXPR, type, t, n1);
5251  t = fold_convert (TREE_TYPE (startvar), t);
5252  t = force_gimple_operand_gsi (&gsi, t,
5253				DECL_P (startvar)
5254				&& TREE_ADDRESSABLE (startvar),
5255				NULL_TREE, false, GSI_CONTINUE_LINKING);
5256  assign_stmt = gimple_build_assign (startvar, t);
5257  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5258  if (cond_var)
5259    {
5260      tree itype = TREE_TYPE (cond_var);
5261      /* For lastprivate(conditional:) itervar, we need some iteration
5262	 counter that starts at unsigned non-zero and increases.
5263	 Prefer as few IVs as possible, so if we can use startvar
5264	 itself, use that, or startvar + constant (those would be
5265	 incremented with step), and as last resort use the s0 + 1
5266	 incremented by 1.  */
5267      if (POINTER_TYPE_P (type)
5268	  || TREE_CODE (n1) != INTEGER_CST
5269	  || fd->loop.cond_code != LT_EXPR)
5270	t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5271			 build_int_cst (itype, 1));
5272      else if (tree_int_cst_sgn (n1) == 1)
5273	t = fold_convert (itype, t);
5274      else
5275	{
5276	  tree c = fold_convert (itype, n1);
5277	  c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5278	  t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5279	}
5280      t = force_gimple_operand_gsi (&gsi, t, false,
5281				    NULL_TREE, false, GSI_CONTINUE_LINKING);
5282      assign_stmt = gimple_build_assign (cond_var, t);
5283      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5284    }
5285
5286  t = fold_convert (itype, e0);
5287  t = fold_build2 (MULT_EXPR, itype, t, step);
5288  if (POINTER_TYPE_P (type))
5289    {
5290      t = fold_build_pointer_plus (n1, t);
5291      if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5292	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5293	t = fold_convert (signed_type_for (type), t);
5294    }
5295  else
5296    t = fold_build2 (PLUS_EXPR, type, t, n1);
5297  t = fold_convert (TREE_TYPE (startvar), t);
5298  e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5299				false, GSI_CONTINUE_LINKING);
5300  if (endvar)
5301    {
5302      assign_stmt = gimple_build_assign (endvar, e);
5303      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5304      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5305	assign_stmt = gimple_build_assign (fd->loop.v, e);
5306      else
5307	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5308      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5309    }
5310  /* Handle linear clause adjustments.  */
5311  tree itercnt = NULL_TREE;
5312  tree *nonrect_bounds = NULL;
5313  if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5314    for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5315	 c; c = OMP_CLAUSE_CHAIN (c))
5316      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5317	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5318	{
5319	  tree d = OMP_CLAUSE_DECL (c);
5320	  tree t = d, a, dest;
5321	  if (omp_privatize_by_reference (t))
5322	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5323	  if (itercnt == NULL_TREE)
5324	    {
5325	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
5326		{
5327		  itercnt = fold_build2 (MINUS_EXPR, itype,
5328					 fold_convert (itype, n1),
5329					 fold_convert (itype, fd->loop.n1));
5330		  itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5331		  itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5332		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5333						      NULL_TREE, false,
5334						      GSI_CONTINUE_LINKING);
5335		}
5336	      else
5337		itercnt = s0;
5338	    }
5339	  tree type = TREE_TYPE (t);
5340	  if (POINTER_TYPE_P (type))
5341	    type = sizetype;
5342	  a = fold_build2 (MULT_EXPR, type,
5343			   fold_convert (type, itercnt),
5344			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5345	  dest = unshare_expr (t);
5346	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5347			   : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5348	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5349					false, GSI_CONTINUE_LINKING);
5350	  expand_omp_build_assign (&gsi, dest, t, true);
5351	}
5352  if (fd->collapse > 1)
5353    {
5354      if (fd->non_rect)
5355	{
5356	  nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5357	  memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5358	}
5359      expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5360				startvar);
5361    }
5362
5363  if (!broken_loop)
5364    {
5365      /* The code controlling the sequential loop replaces the
5366	 GIMPLE_OMP_CONTINUE.  */
5367      gsi = gsi_last_nondebug_bb (cont_bb);
5368      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5369      gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5370      vmain = gimple_omp_continue_control_use (cont_stmt);
5371      vback = gimple_omp_continue_control_def (cont_stmt);
5372
5373      if (cond_var)
5374	{
5375	  tree itype = TREE_TYPE (cond_var);
5376	  tree t2;
5377	  if (POINTER_TYPE_P (type)
5378	      || TREE_CODE (n1) != INTEGER_CST
5379	      || fd->loop.cond_code != LT_EXPR)
5380	    t2 = build_int_cst (itype, 1);
5381	  else
5382	    t2 = fold_convert (itype, step);
5383	  t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5384	  t2 = force_gimple_operand_gsi (&gsi, t2, false,
5385					 NULL_TREE, true, GSI_SAME_STMT);
5386	  assign_stmt = gimple_build_assign (cond_var, t2);
5387	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5388	}
5389
5390      if (!gimple_omp_for_combined_p (fd->for_stmt))
5391	{
5392	  if (POINTER_TYPE_P (type))
5393	    t = fold_build_pointer_plus (vmain, step);
5394	  else
5395	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
5396	  t = force_gimple_operand_gsi (&gsi, t,
5397					DECL_P (vback)
5398					&& TREE_ADDRESSABLE (vback),
5399					NULL_TREE, true, GSI_SAME_STMT);
5400	  assign_stmt = gimple_build_assign (vback, t);
5401	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5402
5403	  t = build2 (fd->loop.cond_code, boolean_type_node,
5404		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
5405		      ? t : vback, e);
5406	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5407	}
5408
5409      /* Remove the GIMPLE_OMP_CONTINUE statement.  */
5410      gsi_remove (&gsi, true);
5411
5412      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5413	collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5414						   cont_bb, body_bb);
5415    }
5416
5417  /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
5418  gsi = gsi_last_nondebug_bb (exit_bb);
5419  if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5420    {
5421      t = gimple_omp_return_lhs (gsi_stmt (gsi));
5422      if (fd->have_reductemp
5423	  || ((fd->have_pointer_condtemp || fd->have_scantemp)
5424	      && !fd->have_nonctrl_scantemp))
5425	{
5426	  tree fn;
5427	  if (t)
5428	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5429	  else
5430	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5431	  gcall *g = gimple_build_call (fn, 0);
5432	  if (t)
5433	    {
5434	      gimple_call_set_lhs (g, t);
5435	      if (fd->have_reductemp)
5436		gsi_insert_after (&gsi, gimple_build_assign (reductions,
5437							     NOP_EXPR, t),
5438				  GSI_SAME_STMT);
5439	    }
5440	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5441	}
5442      else
5443	gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5444    }
5445  else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5446	   && !fd->have_nonctrl_scantemp)
5447    {
5448      tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5449      gcall *g = gimple_build_call (fn, 0);
5450      gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5451    }
5452  if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5453    {
5454      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5455      tree controlp = NULL_TREE, controlb = NULL_TREE;
5456      for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5457	if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5458	    && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5459	  {
5460	    if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5461	      controlb = OMP_CLAUSE_DECL (c);
5462	    else
5463	      controlp = OMP_CLAUSE_DECL (c);
5464	    if (controlb && controlp)
5465	      break;
5466	  }
5467      gcc_assert (controlp && controlb);
5468      gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5469				     NULL_TREE, NULL_TREE);
5470      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5471      exit1_bb = split_block (exit_bb, g)->dest;
5472      gsi = gsi_after_labels (exit1_bb);
5473      g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5474			     controlp);
5475      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5476      exit2_bb = split_block (exit1_bb, g)->dest;
5477      gsi = gsi_after_labels (exit2_bb);
5478      g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5479			     controlp);
5480      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5481      exit3_bb = split_block (exit2_bb, g)->dest;
5482      gsi = gsi_after_labels (exit3_bb);
5483    }
5484  gsi_remove (&gsi, true);
5485
5486  /* Connect all the blocks.  */
5487  ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5488  ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5489  ep = find_edge (entry_bb, second_bb);
5490  ep->flags = EDGE_TRUE_VALUE;
5491  ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5492  if (fourth_bb)
5493    {
5494      ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5495      ep->probability
5496	= profile_probability::guessed_always ().apply_scale (1, 2);
5497      ep = find_edge (third_bb, fourth_bb);
5498      ep->flags = EDGE_TRUE_VALUE;
5499      ep->probability
5500	= profile_probability::guessed_always ().apply_scale (1, 2);
5501      ep = find_edge (fourth_bb, fifth_bb);
5502      redirect_edge_and_branch (ep, sixth_bb);
5503    }
5504  else
5505    sixth_bb = third_bb;
5506  find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5507  find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5508  if (exit1_bb)
5509    {
5510      ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5511      ep->probability
5512	= profile_probability::guessed_always ().apply_scale (1, 2);
5513      ep = find_edge (exit_bb, exit1_bb);
5514      ep->flags = EDGE_TRUE_VALUE;
5515      ep->probability
5516	= profile_probability::guessed_always ().apply_scale (1, 2);
5517      ep = find_edge (exit1_bb, exit2_bb);
5518      redirect_edge_and_branch (ep, exit3_bb);
5519    }
5520
5521  if (!broken_loop)
5522    {
5523      ep = find_edge (cont_bb, body_bb);
5524      if (ep == NULL)
5525	{
5526	  ep = BRANCH_EDGE (cont_bb);
5527	  gcc_assert (single_succ (ep->dest) == body_bb);
5528	}
5529      if (gimple_omp_for_combined_p (fd->for_stmt))
5530	{
5531	  remove_edge (ep);
5532	  ep = NULL;
5533	}
5534      else if (fd->collapse > 1)
5535	{
5536	  remove_edge (ep);
5537	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5538	}
5539      else
5540	ep->flags = EDGE_TRUE_VALUE;
5541      find_edge (cont_bb, fin_bb)->flags
5542	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5543    }
5544
5545  set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5546  set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5547  if (fourth_bb)
5548    {
5549      set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5550      set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5551    }
5552  set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5553
5554  set_immediate_dominator (CDI_DOMINATORS, body_bb,
5555			   recompute_dominator (CDI_DOMINATORS, body_bb));
5556  set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5557			   recompute_dominator (CDI_DOMINATORS, fin_bb));
5558  if (exit1_bb)
5559    {
5560      set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5561      set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5562    }
5563
5564  class loop *loop = body_bb->loop_father;
5565  if (loop != entry_bb->loop_father)
5566    {
5567      gcc_assert (broken_loop || loop->header == body_bb);
5568      gcc_assert (broken_loop
5569		  || loop->latch == region->cont
5570		  || single_pred (loop->latch) == region->cont);
5571      return;
5572    }
5573
5574  if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5575    {
5576      loop = alloc_loop ();
5577      loop->header = body_bb;
5578      if (collapse_bb == NULL)
5579	loop->latch = cont_bb;
5580      add_loop (loop, body_bb->loop_father);
5581    }
5582}
5583
5584/* Return phi in E->DEST with ARG on edge E.  */
5585
5586static gphi *
5587find_phi_with_arg_on_edge (tree arg, edge e)
5588{
5589  basic_block bb = e->dest;
5590
5591  for (gphi_iterator gpi = gsi_start_phis (bb);
5592       !gsi_end_p (gpi);
5593       gsi_next (&gpi))
5594    {
5595      gphi *phi = gpi.phi ();
5596      if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5597	return phi;
5598    }
5599
5600  return NULL;
5601}
5602
5603/* A subroutine of expand_omp_for.  Generate code for a parallel
5604   loop with static schedule and a specified chunk size.  Given
5605   parameters:
5606
5607	for (V = N1; V cond N2; V += STEP) BODY;
5608
5609   where COND is "<" or ">", we generate pseudocode
5610
5611	if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5612	if (cond is <)
5613	  adj = STEP - 1;
5614	else
5615	  adj = STEP + 1;
5616	if ((__typeof (V)) -1 > 0 && cond is >)
5617	  n = -(adj + N2 - N1) / -STEP;
5618	else
5619	  n = (adj + N2 - N1) / STEP;
5620	trip = 0;
5621	V = threadid * CHUNK * STEP + N1;  -- this extra definition of V is
5622					      here so that V is defined
5623					      if the loop is not entered
5624    L0:
5625	s0 = (trip * nthreads + threadid) * CHUNK;
5626	e0 = min (s0 + CHUNK, n);
5627	if (s0 < n) goto L1; else goto L4;
5628    L1:
5629	V = s0 * STEP + N1;
5630	e = e0 * STEP + N1;
5631    L2:
5632	BODY;
5633	V += STEP;
5634	if (V cond e) goto L2; else goto L3;
5635    L3:
5636	trip += 1;
5637	goto L0;
5638    L4:
5639*/
5640
5641static void
5642expand_omp_for_static_chunk (struct omp_region *region,
5643			     struct omp_for_data *fd, gimple *inner_stmt)
5644{
5645  tree n, s0, e0, e, t;
5646  tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5647  tree type, itype, vmain, vback, vextra;
5648  basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5649  basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5650  gimple_stmt_iterator gsi, gsip;
5651  edge se;
5652  bool broken_loop = region->cont == NULL;
5653  tree *counts = NULL;
5654  tree n1, n2, step;
5655  tree reductions = NULL_TREE;
5656  tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5657
5658  itype = type = TREE_TYPE (fd->loop.v);
5659  if (POINTER_TYPE_P (type))
5660    itype = signed_type_for (type);
5661
5662  entry_bb = region->entry;
5663  se = split_block (entry_bb, last_stmt (entry_bb));
5664  entry_bb = se->src;
5665  iter_part_bb = se->dest;
5666  cont_bb = region->cont;
5667  gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5668  fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5669  gcc_assert (broken_loop
5670	      || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5671  seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5672  body_bb = single_succ (seq_start_bb);
5673  if (!broken_loop)
5674    {
5675      gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5676		  || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5677      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5678      trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5679    }
5680  exit_bb = region->exit;
5681
5682  /* Trip and adjustment setup goes in ENTRY_BB.  */
5683  gsi = gsi_last_nondebug_bb (entry_bb);
5684  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5685  gsip = gsi;
5686  gsi_prev (&gsip);
5687
5688  if (fd->collapse > 1)
5689    {
5690      int first_zero_iter = -1, dummy = -1;
5691      basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5692
5693      counts = XALLOCAVEC (tree, fd->collapse);
5694      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5695				  fin_bb, first_zero_iter,
5696				  dummy_bb, dummy, l2_dom_bb);
5697      t = NULL_TREE;
5698    }
5699  else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5700    t = integer_one_node;
5701  else
5702    t = fold_binary (fd->loop.cond_code, boolean_type_node,
5703		     fold_convert (type, fd->loop.n1),
5704		     fold_convert (type, fd->loop.n2));
5705  if (fd->collapse == 1
5706      && TYPE_UNSIGNED (type)
5707      && (t == NULL_TREE || !integer_onep (t)))
5708    {
5709      n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5710      n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5711				     true, GSI_SAME_STMT);
5712      n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5713      n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5714				     true, GSI_SAME_STMT);
5715      gcond *cond_stmt = expand_omp_build_cond (&gsi, fd->loop.cond_code,
5716						n1, n2);
5717      se = split_block (entry_bb, cond_stmt);
5718      se->flags = EDGE_TRUE_VALUE;
5719      entry_bb = se->dest;
5720      se->probability = profile_probability::very_likely ();
5721      se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5722      se->probability = profile_probability::very_unlikely ();
5723      if (gimple_in_ssa_p (cfun))
5724	{
5725	  int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5726	  for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5727	       !gsi_end_p (gpi); gsi_next (&gpi))
5728	    {
5729	      gphi *phi = gpi.phi ();
5730	      add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5731			   se, UNKNOWN_LOCATION);
5732	    }
5733	}
5734      gsi = gsi_last_bb (entry_bb);
5735    }
5736
5737  if (fd->lastprivate_conditional)
5738    {
5739      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5740      tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5741      if (fd->have_pointer_condtemp)
5742	condtemp = OMP_CLAUSE_DECL (c);
5743      c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5744      cond_var = OMP_CLAUSE_DECL (c);
5745    }
5746  if (fd->have_reductemp || fd->have_pointer_condtemp)
5747    {
5748      tree t1 = build_int_cst (long_integer_type_node, 0);
5749      tree t2 = build_int_cst (long_integer_type_node, 1);
5750      tree t3 = build_int_cstu (long_integer_type_node,
5751				(HOST_WIDE_INT_1U << 31) + 1);
5752      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5753      gimple_stmt_iterator gsi2 = gsi_none ();
5754      gimple *g = NULL;
5755      tree mem = null_pointer_node, memv = NULL_TREE;
5756      if (fd->have_reductemp)
5757	{
5758	  tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5759	  reductions = OMP_CLAUSE_DECL (c);
5760	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5761	  g = SSA_NAME_DEF_STMT (reductions);
5762	  reductions = gimple_assign_rhs1 (g);
5763	  OMP_CLAUSE_DECL (c) = reductions;
5764	  gsi2 = gsi_for_stmt (g);
5765	}
5766      else
5767	{
5768	  if (gsi_end_p (gsip))
5769	    gsi2 = gsi_after_labels (region->entry);
5770	  else
5771	    gsi2 = gsip;
5772	  reductions = null_pointer_node;
5773	}
5774      if (fd->have_pointer_condtemp)
5775	{
5776	  tree type = TREE_TYPE (condtemp);
5777	  memv = create_tmp_var (type);
5778	  TREE_ADDRESSABLE (memv) = 1;
5779	  unsigned HOST_WIDE_INT sz
5780	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5781	  sz *= fd->lastprivate_conditional;
5782	  expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5783				   false);
5784	  mem = build_fold_addr_expr (memv);
5785	}
5786      tree t
5787	= build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5788			   9, t1, t2, t2, t3, t1, null_pointer_node,
5789			   null_pointer_node, reductions, mem);
5790      force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5791				true, GSI_SAME_STMT);
5792      if (fd->have_pointer_condtemp)
5793	expand_omp_build_assign (&gsi2, condtemp, memv, false);
5794      if (fd->have_reductemp)
5795	{
5796	  gsi_remove (&gsi2, true);
5797	  release_ssa_name (gimple_assign_lhs (g));
5798	}
5799    }
5800  switch (gimple_omp_for_kind (fd->for_stmt))
5801    {
5802    case GF_OMP_FOR_KIND_FOR:
5803      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5804      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5805      break;
5806    case GF_OMP_FOR_KIND_DISTRIBUTE:
5807      nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5808      threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5809      break;
5810    default:
5811      gcc_unreachable ();
5812    }
5813  nthreads = build_call_expr (nthreads, 0);
5814  nthreads = fold_convert (itype, nthreads);
5815  nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5816				       true, GSI_SAME_STMT);
5817  threadid = build_call_expr (threadid, 0);
5818  threadid = fold_convert (itype, threadid);
5819  threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5820				       true, GSI_SAME_STMT);
5821
5822  n1 = fd->loop.n1;
5823  n2 = fd->loop.n2;
5824  step = fd->loop.step;
5825  if (gimple_omp_for_combined_into_p (fd->for_stmt))
5826    {
5827      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5828				     OMP_CLAUSE__LOOPTEMP_);
5829      gcc_assert (innerc);
5830      n1 = OMP_CLAUSE_DECL (innerc);
5831      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5832				OMP_CLAUSE__LOOPTEMP_);
5833      gcc_assert (innerc);
5834      n2 = OMP_CLAUSE_DECL (innerc);
5835    }
5836  n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5837				 true, NULL_TREE, true, GSI_SAME_STMT);
5838  n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5839				 true, NULL_TREE, true, GSI_SAME_STMT);
5840  step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5841				   true, NULL_TREE, true, GSI_SAME_STMT);
5842  tree chunk_size = fold_convert (itype, fd->chunk_size);
5843  chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5844  chunk_size
5845    = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5846				GSI_SAME_STMT);
5847
5848  t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5849  t = fold_build2 (PLUS_EXPR, itype, step, t);
5850  t = fold_build2 (PLUS_EXPR, itype, t, n2);
5851  t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5852  if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5853    t = fold_build2 (TRUNC_DIV_EXPR, itype,
5854		     fold_build1 (NEGATE_EXPR, itype, t),
5855		     fold_build1 (NEGATE_EXPR, itype, step));
5856  else
5857    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5858  t = fold_convert (itype, t);
5859  n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5860				true, GSI_SAME_STMT);
5861
5862  trip_var = create_tmp_reg (itype, ".trip");
5863  if (gimple_in_ssa_p (cfun))
5864    {
5865      trip_init = make_ssa_name (trip_var);
5866      trip_main = make_ssa_name (trip_var);
5867      trip_back = make_ssa_name (trip_var);
5868    }
5869  else
5870    {
5871      trip_init = trip_var;
5872      trip_main = trip_var;
5873      trip_back = trip_var;
5874    }
5875
5876  gassign *assign_stmt
5877    = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5878  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5879
5880  t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5881  t = fold_build2 (MULT_EXPR, itype, t, step);
5882  if (POINTER_TYPE_P (type))
5883    t = fold_build_pointer_plus (n1, t);
5884  else
5885    t = fold_build2 (PLUS_EXPR, type, t, n1);
5886  vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5887				     true, GSI_SAME_STMT);
5888
5889  /* Remove the GIMPLE_OMP_FOR.  */
5890  gsi_remove (&gsi, true);
5891
5892  gimple_stmt_iterator gsif = gsi;
5893
5894  /* Iteration space partitioning goes in ITER_PART_BB.  */
5895  gsi = gsi_last_bb (iter_part_bb);
5896
5897  t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5898  t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5899  t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5900  s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5901				 false, GSI_CONTINUE_LINKING);
5902
5903  t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5904  t = fold_build2 (MIN_EXPR, itype, t, n);
5905  e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5906				 false, GSI_CONTINUE_LINKING);
5907
5908  t = build2 (LT_EXPR, boolean_type_node, s0, n);
5909  gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5910
5911  /* Setup code for sequential iteration goes in SEQ_START_BB.  */
5912  gsi = gsi_start_bb (seq_start_bb);
5913
5914  tree startvar = fd->loop.v;
5915  tree endvar = NULL_TREE;
5916
5917  if (gimple_omp_for_combined_p (fd->for_stmt))
5918    {
5919      tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5920		     ? gimple_omp_parallel_clauses (inner_stmt)
5921		     : gimple_omp_for_clauses (inner_stmt);
5922      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5923      gcc_assert (innerc);
5924      startvar = OMP_CLAUSE_DECL (innerc);
5925      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5926				OMP_CLAUSE__LOOPTEMP_);
5927      gcc_assert (innerc);
5928      endvar = OMP_CLAUSE_DECL (innerc);
5929      if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5930	  && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5931	{
5932	  innerc = find_lastprivate_looptemp (fd, innerc);
5933	  if (innerc)
5934	    {
5935	      /* If needed (distribute parallel for with lastprivate),
5936		 propagate down the total number of iterations.  */
5937	      tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5938				     fd->loop.n2);
5939	      t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5940					    GSI_CONTINUE_LINKING);
5941	      assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5942	      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5943	    }
5944	}
5945    }
5946
5947  t = fold_convert (itype, s0);
5948  t = fold_build2 (MULT_EXPR, itype, t, step);
5949  if (POINTER_TYPE_P (type))
5950    {
5951      t = fold_build_pointer_plus (n1, t);
5952      if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5953	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5954	t = fold_convert (signed_type_for (type), t);
5955    }
5956  else
5957    t = fold_build2 (PLUS_EXPR, type, t, n1);
5958  t = fold_convert (TREE_TYPE (startvar), t);
5959  t = force_gimple_operand_gsi (&gsi, t,
5960				DECL_P (startvar)
5961				&& TREE_ADDRESSABLE (startvar),
5962				NULL_TREE, false, GSI_CONTINUE_LINKING);
5963  assign_stmt = gimple_build_assign (startvar, t);
5964  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5965  if (cond_var)
5966    {
5967      tree itype = TREE_TYPE (cond_var);
5968      /* For lastprivate(conditional:) itervar, we need some iteration
5969	 counter that starts at unsigned non-zero and increases.
5970	 Prefer as few IVs as possible, so if we can use startvar
5971	 itself, use that, or startvar + constant (those would be
5972	 incremented with step), and as last resort use the s0 + 1
5973	 incremented by 1.  */
5974      if (POINTER_TYPE_P (type)
5975	  || TREE_CODE (n1) != INTEGER_CST
5976	  || fd->loop.cond_code != LT_EXPR)
5977	t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5978			 build_int_cst (itype, 1));
5979      else if (tree_int_cst_sgn (n1) == 1)
5980	t = fold_convert (itype, t);
5981      else
5982	{
5983	  tree c = fold_convert (itype, n1);
5984	  c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5985	  t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5986	}
5987      t = force_gimple_operand_gsi (&gsi, t, false,
5988				    NULL_TREE, false, GSI_CONTINUE_LINKING);
5989      assign_stmt = gimple_build_assign (cond_var, t);
5990      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5991    }
5992
5993  t = fold_convert (itype, e0);
5994  t = fold_build2 (MULT_EXPR, itype, t, step);
5995  if (POINTER_TYPE_P (type))
5996    {
5997      t = fold_build_pointer_plus (n1, t);
5998      if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5999	  && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
6000	t = fold_convert (signed_type_for (type), t);
6001    }
6002  else
6003    t = fold_build2 (PLUS_EXPR, type, t, n1);
6004  t = fold_convert (TREE_TYPE (startvar), t);
6005  e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6006				false, GSI_CONTINUE_LINKING);
6007  if (endvar)
6008    {
6009      assign_stmt = gimple_build_assign (endvar, e);
6010      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6011      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
6012	assign_stmt = gimple_build_assign (fd->loop.v, e);
6013      else
6014	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
6015      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6016    }
6017  /* Handle linear clause adjustments.  */
6018  tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
6019  if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
6020    for (tree c = gimple_omp_for_clauses (fd->for_stmt);
6021	 c; c = OMP_CLAUSE_CHAIN (c))
6022      if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
6023	  && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
6024	{
6025	  tree d = OMP_CLAUSE_DECL (c);
6026	  tree t = d, a, dest;
6027	  if (omp_privatize_by_reference (t))
6028	    t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
6029	  tree type = TREE_TYPE (t);
6030	  if (POINTER_TYPE_P (type))
6031	    type = sizetype;
6032	  dest = unshare_expr (t);
6033	  tree v = create_tmp_var (TREE_TYPE (t), NULL);
6034	  expand_omp_build_assign (&gsif, v, t);
6035	  if (itercnt == NULL_TREE)
6036	    {
6037	      if (gimple_omp_for_combined_into_p (fd->for_stmt))
6038		{
6039		  itercntbias
6040		    = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
6041				   fold_convert (itype, fd->loop.n1));
6042		  itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
6043					     itercntbias, step);
6044		  itercntbias
6045		    = force_gimple_operand_gsi (&gsif, itercntbias, true,
6046						NULL_TREE, true,
6047						GSI_SAME_STMT);
6048		  itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
6049		  itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
6050						      NULL_TREE, false,
6051						      GSI_CONTINUE_LINKING);
6052		}
6053	      else
6054		itercnt = s0;
6055	    }
6056	  a = fold_build2 (MULT_EXPR, type,
6057			   fold_convert (type, itercnt),
6058			   fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
6059	  t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
6060			   : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
6061	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6062					false, GSI_CONTINUE_LINKING);
6063	  expand_omp_build_assign (&gsi, dest, t, true);
6064	}
6065  if (fd->collapse > 1)
6066    expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
6067
6068  if (!broken_loop)
6069    {
6070      /* The code controlling the sequential loop goes in CONT_BB,
6071	 replacing the GIMPLE_OMP_CONTINUE.  */
6072      gsi = gsi_last_nondebug_bb (cont_bb);
6073      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
6074      vmain = gimple_omp_continue_control_use (cont_stmt);
6075      vback = gimple_omp_continue_control_def (cont_stmt);
6076
6077      if (cond_var)
6078	{
6079	  tree itype = TREE_TYPE (cond_var);
6080	  tree t2;
6081	  if (POINTER_TYPE_P (type)
6082	      || TREE_CODE (n1) != INTEGER_CST
6083	      || fd->loop.cond_code != LT_EXPR)
6084	    t2 = build_int_cst (itype, 1);
6085	  else
6086	    t2 = fold_convert (itype, step);
6087	  t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
6088	  t2 = force_gimple_operand_gsi (&gsi, t2, false,
6089					 NULL_TREE, true, GSI_SAME_STMT);
6090	  assign_stmt = gimple_build_assign (cond_var, t2);
6091	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6092	}
6093
6094      if (!gimple_omp_for_combined_p (fd->for_stmt))
6095	{
6096	  if (POINTER_TYPE_P (type))
6097	    t = fold_build_pointer_plus (vmain, step);
6098	  else
6099	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
6100	  if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6101	    t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6102					  true, GSI_SAME_STMT);
6103	  assign_stmt = gimple_build_assign (vback, t);
6104	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6105
6106	  if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6107	    t = build2 (EQ_EXPR, boolean_type_node,
6108			build_int_cst (itype, 0),
6109			build_int_cst (itype, 1));
6110	  else
6111	    t = build2 (fd->loop.cond_code, boolean_type_node,
6112			DECL_P (vback) && TREE_ADDRESSABLE (vback)
6113			? t : vback, e);
6114	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6115	}
6116
6117      /* Remove GIMPLE_OMP_CONTINUE.  */
6118      gsi_remove (&gsi, true);
6119
6120      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6121	collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6122
6123      /* Trip update code goes into TRIP_UPDATE_BB.  */
6124      gsi = gsi_start_bb (trip_update_bb);
6125
6126      t = build_int_cst (itype, 1);
6127      t = build2 (PLUS_EXPR, itype, trip_main, t);
6128      assign_stmt = gimple_build_assign (trip_back, t);
6129      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6130    }
6131
6132  /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
6133  gsi = gsi_last_nondebug_bb (exit_bb);
6134  if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6135    {
6136      t = gimple_omp_return_lhs (gsi_stmt (gsi));
6137      if (fd->have_reductemp || fd->have_pointer_condtemp)
6138	{
6139	  tree fn;
6140	  if (t)
6141	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6142	  else
6143	    fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6144	  gcall *g = gimple_build_call (fn, 0);
6145	  if (t)
6146	    {
6147	      gimple_call_set_lhs (g, t);
6148	      if (fd->have_reductemp)
6149		gsi_insert_after (&gsi, gimple_build_assign (reductions,
6150							     NOP_EXPR, t),
6151				  GSI_SAME_STMT);
6152	    }
6153	  gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6154	}
6155      else
6156	gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6157    }
6158  else if (fd->have_pointer_condtemp)
6159    {
6160      tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6161      gcall *g = gimple_build_call (fn, 0);
6162      gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6163    }
6164  gsi_remove (&gsi, true);
6165
6166  /* Connect the new blocks.  */
6167  find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6168  find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6169
6170  if (!broken_loop)
6171    {
6172      se = find_edge (cont_bb, body_bb);
6173      if (se == NULL)
6174	{
6175	  se = BRANCH_EDGE (cont_bb);
6176	  gcc_assert (single_succ (se->dest) == body_bb);
6177	}
6178      if (gimple_omp_for_combined_p (fd->for_stmt))
6179	{
6180	  remove_edge (se);
6181	  se = NULL;
6182	}
6183      else if (fd->collapse > 1)
6184	{
6185	  remove_edge (se);
6186	  se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6187	}
6188      else
6189	se->flags = EDGE_TRUE_VALUE;
6190      find_edge (cont_bb, trip_update_bb)->flags
6191	= se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6192
6193      redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6194				iter_part_bb);
6195    }
6196
6197  if (gimple_in_ssa_p (cfun))
6198    {
6199      gphi_iterator psi;
6200      gphi *phi;
6201      edge re, ene;
6202      edge_var_map *vm;
6203      size_t i;
6204
6205      gcc_assert (fd->collapse == 1 && !broken_loop);
6206
6207      /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6208	 remove arguments of the phi nodes in fin_bb.  We need to create
6209	 appropriate phi nodes in iter_part_bb instead.  */
6210      se = find_edge (iter_part_bb, fin_bb);
6211      re = single_succ_edge (trip_update_bb);
6212      vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6213      ene = single_succ_edge (entry_bb);
6214
6215      psi = gsi_start_phis (fin_bb);
6216      for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6217	   gsi_next (&psi), ++i)
6218	{
6219	  gphi *nphi;
6220	  location_t locus;
6221
6222	  phi = psi.phi ();
6223	  if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6224			       redirect_edge_var_map_def (vm), 0))
6225	    continue;
6226
6227	  t = gimple_phi_result (phi);
6228	  gcc_assert (t == redirect_edge_var_map_result (vm));
6229
6230	  if (!single_pred_p (fin_bb))
6231	    t = copy_ssa_name (t, phi);
6232
6233	  nphi = create_phi_node (t, iter_part_bb);
6234
6235	  t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6236	  locus = gimple_phi_arg_location_from_edge (phi, se);
6237
6238	  /* A special case -- fd->loop.v is not yet computed in
6239	     iter_part_bb, we need to use vextra instead.  */
6240	  if (t == fd->loop.v)
6241	    t = vextra;
6242	  add_phi_arg (nphi, t, ene, locus);
6243	  locus = redirect_edge_var_map_location (vm);
6244	  tree back_arg = redirect_edge_var_map_def (vm);
6245	  add_phi_arg (nphi, back_arg, re, locus);
6246	  edge ce = find_edge (cont_bb, body_bb);
6247	  if (ce == NULL)
6248	    {
6249	      ce = BRANCH_EDGE (cont_bb);
6250	      gcc_assert (single_succ (ce->dest) == body_bb);
6251	      ce = single_succ_edge (ce->dest);
6252	    }
6253	  gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6254	  gcc_assert (inner_loop_phi != NULL);
6255	  add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6256		       find_edge (seq_start_bb, body_bb), locus);
6257
6258	  if (!single_pred_p (fin_bb))
6259	    add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6260	}
6261      gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6262      redirect_edge_var_map_clear (re);
6263      if (single_pred_p (fin_bb))
6264	while (1)
6265	  {
6266	    psi = gsi_start_phis (fin_bb);
6267	    if (gsi_end_p (psi))
6268	      break;
6269	    remove_phi_node (&psi, false);
6270	  }
6271
6272      /* Make phi node for trip.  */
6273      phi = create_phi_node (trip_main, iter_part_bb);
6274      add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6275		   UNKNOWN_LOCATION);
6276      add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6277		   UNKNOWN_LOCATION);
6278    }
6279
6280  if (!broken_loop)
6281    set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6282  set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6283			   recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6284  set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6285			   recompute_dominator (CDI_DOMINATORS, fin_bb));
6286  set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6287			   recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6288  set_immediate_dominator (CDI_DOMINATORS, body_bb,
6289			   recompute_dominator (CDI_DOMINATORS, body_bb));
6290
6291  if (!broken_loop)
6292    {
6293      class loop *loop = body_bb->loop_father;
6294      class loop *trip_loop = alloc_loop ();
6295      trip_loop->header = iter_part_bb;
6296      trip_loop->latch = trip_update_bb;
6297      add_loop (trip_loop, iter_part_bb->loop_father);
6298
6299      if (loop != entry_bb->loop_father)
6300	{
6301	  gcc_assert (loop->header == body_bb);
6302	  gcc_assert (loop->latch == region->cont
6303		      || single_pred (loop->latch) == region->cont);
6304	  trip_loop->inner = loop;
6305	  return;
6306	}
6307
6308      if (!gimple_omp_for_combined_p (fd->for_stmt))
6309	{
6310	  loop = alloc_loop ();
6311	  loop->header = body_bb;
6312	  if (collapse_bb == NULL)
6313	    loop->latch = cont_bb;
6314	  add_loop (loop, trip_loop);
6315	}
6316    }
6317}
6318
6319/* A subroutine of expand_omp_for.  Generate code for a simd non-worksharing
6320   loop.  Given parameters:
6321
6322	for (V = N1; V cond N2; V += STEP) BODY;
6323
6324   where COND is "<" or ">", we generate pseudocode
6325
6326	V = N1;
6327	goto L1;
6328    L0:
6329	BODY;
6330	V += STEP;
6331    L1:
6332	if (V cond N2) goto L0; else goto L2;
6333    L2:
6334
6335    For collapsed loops, emit the outer loops as scalar
6336    and only try to vectorize the innermost loop.  */
6337
6338static void
6339expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6340{
6341  tree type, t;
6342  basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6343  gimple_stmt_iterator gsi;
6344  gimple *stmt;
6345  gcond *cond_stmt;
6346  bool broken_loop = region->cont == NULL;
6347  edge e, ne;
6348  tree *counts = NULL;
6349  int i;
6350  int safelen_int = INT_MAX;
6351  bool dont_vectorize = false;
6352  tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6353				  OMP_CLAUSE_SAFELEN);
6354  tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6355				  OMP_CLAUSE__SIMDUID_);
6356  tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6357			      OMP_CLAUSE_IF);
6358  tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6359				  OMP_CLAUSE_SIMDLEN);
6360  tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6361				   OMP_CLAUSE__CONDTEMP_);
6362  tree n1, n2;
6363  tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6364
6365  if (safelen)
6366    {
6367      poly_uint64 val;
6368      safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6369      if (!poly_int_tree_p (safelen, &val))
6370	safelen_int = 0;
6371      else
6372	safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6373      if (safelen_int == 1)
6374	safelen_int = 0;
6375    }
6376  if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6377      || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6378    {
6379      safelen_int = 0;
6380      dont_vectorize = true;
6381    }
6382  type = TREE_TYPE (fd->loop.v);
6383  entry_bb = region->entry;
6384  cont_bb = region->cont;
6385  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6386  gcc_assert (broken_loop
6387	      || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6388  l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6389  if (!broken_loop)
6390    {
6391      gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6392      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6393      l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6394      l2_bb = BRANCH_EDGE (entry_bb)->dest;
6395    }
6396  else
6397    {
6398      BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6399      l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6400      l2_bb = single_succ (l1_bb);
6401    }
6402  exit_bb = region->exit;
6403  l2_dom_bb = NULL;
6404
6405  gsi = gsi_last_nondebug_bb (entry_bb);
6406
6407  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6408  /* Not needed in SSA form right now.  */
6409  gcc_assert (!gimple_in_ssa_p (cfun));
6410  if (fd->collapse > 1
6411      && (gimple_omp_for_combined_into_p (fd->for_stmt)
6412	  || broken_loop))
6413    {
6414      int first_zero_iter = -1, dummy = -1;
6415      basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6416
6417      counts = XALLOCAVEC (tree, fd->collapse);
6418      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6419				  zero_iter_bb, first_zero_iter,
6420				  dummy_bb, dummy, l2_dom_bb);
6421    }
6422  if (l2_dom_bb == NULL)
6423    l2_dom_bb = l1_bb;
6424
6425  n1 = fd->loop.n1;
6426  n2 = fd->loop.n2;
6427  if (gimple_omp_for_combined_into_p (fd->for_stmt))
6428    {
6429      tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6430				     OMP_CLAUSE__LOOPTEMP_);
6431      gcc_assert (innerc);
6432      n1 = OMP_CLAUSE_DECL (innerc);
6433      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6434				OMP_CLAUSE__LOOPTEMP_);
6435      gcc_assert (innerc);
6436      n2 = OMP_CLAUSE_DECL (innerc);
6437    }
6438  tree step = fd->loop.step;
6439  tree orig_step = step; /* May be different from step if is_simt.  */
6440
6441  bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6442				  OMP_CLAUSE__SIMT_);
6443  if (is_simt)
6444    {
6445      cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6446      is_simt = safelen_int > 1;
6447    }
6448  tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6449  if (is_simt)
6450    {
6451      simt_lane = create_tmp_var (unsigned_type_node);
6452      gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6453      gimple_call_set_lhs (g, simt_lane);
6454      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6455      tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6456				 fold_convert (TREE_TYPE (step), simt_lane));
6457      n1 = fold_convert (type, n1);
6458      if (POINTER_TYPE_P (type))
6459	n1 = fold_build_pointer_plus (n1, offset);
6460      else
6461	n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6462
6463      /* Collapsed loops not handled for SIMT yet: limit to one lane only.  */
6464      if (fd->collapse > 1)
6465	simt_maxlane = build_one_cst (unsigned_type_node);
6466      else if (safelen_int < omp_max_simt_vf ())
6467	simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6468      tree vf
6469	= build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6470					unsigned_type_node, 0);
6471      if (simt_maxlane)
6472	vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6473      vf = fold_convert (TREE_TYPE (step), vf);
6474      step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6475    }
6476
6477  tree n2var = NULL_TREE;
6478  tree n2v = NULL_TREE;
6479  tree *nonrect_bounds = NULL;
6480  tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6481  if (fd->collapse > 1)
6482    {
6483      if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6484	{
6485	  if (fd->non_rect)
6486	    {
6487	      nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6488	      memset (nonrect_bounds, 0,
6489		      sizeof (tree) * (fd->last_nonrect + 1));
6490	    }
6491	  expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6492	  gcc_assert (entry_bb == gsi_bb (gsi));
6493	  gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6494	  gsi_prev (&gsi);
6495	  entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6496	  expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6497				    NULL, n1);
6498	  gsi = gsi_for_stmt (fd->for_stmt);
6499	}
6500      if (broken_loop)
6501	;
6502      else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6503	{
6504	  /* Compute in n2var the limit for the first innermost loop,
6505	     i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6506	     where cnt is how many iterations would the loop have if
6507	     all further iterations were assigned to the current task.  */
6508	  n2var = create_tmp_var (type);
6509	  i = fd->collapse - 1;
6510	  tree itype = TREE_TYPE (fd->loops[i].v);
6511	  if (POINTER_TYPE_P (itype))
6512	    itype = signed_type_for (itype);
6513	  t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6514				     ? -1 : 1));
6515	  t = fold_build2 (PLUS_EXPR, itype,
6516			   fold_convert (itype, fd->loops[i].step), t);
6517	  t = fold_build2 (PLUS_EXPR, itype, t,
6518			   fold_convert (itype, fd->loops[i].n2));
6519	  if (fd->loops[i].m2)
6520	    {
6521	      tree t2 = fold_convert (itype,
6522				      fd->loops[i - fd->loops[i].outer].v);
6523	      tree t3 = fold_convert (itype, fd->loops[i].m2);
6524	      t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6525	      t = fold_build2 (PLUS_EXPR, itype, t, t2);
6526	    }
6527	  t = fold_build2 (MINUS_EXPR, itype, t,
6528			   fold_convert (itype, fd->loops[i].v));
6529	  if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6530	    t = fold_build2 (TRUNC_DIV_EXPR, itype,
6531			     fold_build1 (NEGATE_EXPR, itype, t),
6532			     fold_build1 (NEGATE_EXPR, itype,
6533					  fold_convert (itype,
6534							fd->loops[i].step)));
6535	  else
6536	    t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6537			     fold_convert (itype, fd->loops[i].step));
6538	  t = fold_convert (type, t);
6539	  tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6540	  min_arg1 = create_tmp_var (type);
6541	  expand_omp_build_assign (&gsi, min_arg1, t2);
6542	  min_arg2 = create_tmp_var (type);
6543	  expand_omp_build_assign (&gsi, min_arg2, t);
6544	}
6545      else
6546	{
6547	  if (TREE_CODE (n2) == INTEGER_CST)
6548	    {
6549	      /* Indicate for lastprivate handling that at least one iteration
6550		 has been performed, without wasting runtime.  */
6551	      if (integer_nonzerop (n2))
6552		expand_omp_build_assign (&gsi, fd->loop.v,
6553					 fold_convert (type, n2));
6554	      else
6555		/* Indicate that no iteration has been performed.  */
6556		expand_omp_build_assign (&gsi, fd->loop.v,
6557					 build_one_cst (type));
6558	    }
6559	  else
6560	    {
6561	      expand_omp_build_assign (&gsi, fd->loop.v,
6562				       build_zero_cst (type));
6563	      expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6564	    }
6565	  for (i = 0; i < fd->collapse; i++)
6566	    {
6567	      t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6568	      if (fd->loops[i].m1)
6569		{
6570		  tree t2
6571		    = fold_convert (TREE_TYPE (t),
6572				    fd->loops[i - fd->loops[i].outer].v);
6573		  tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6574		  t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6575		  t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6576		}
6577	      expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6578	      /* For normal non-combined collapsed loops just initialize
6579		 the outermost iterator in the entry_bb.  */
6580	      if (!broken_loop)
6581		break;
6582	    }
6583	}
6584    }
6585  else
6586    expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6587  tree altv = NULL_TREE, altn2 = NULL_TREE;
6588  if (fd->collapse == 1
6589      && !broken_loop
6590      && TREE_CODE (orig_step) != INTEGER_CST)
6591    {
6592      /* The vectorizer currently punts on loops with non-constant steps
6593	 for the main IV (can't compute number of iterations and gives up
6594	 because of that).  As for OpenMP loops it is always possible to
6595	 compute the number of iterations upfront, use an alternate IV
6596	 as the loop iterator:
6597	 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6598	 for (i = n1, altv = 0; altv < altn2; altv++, i += step)  */
6599      altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6600      expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6601      tree itype = TREE_TYPE (fd->loop.v);
6602      if (POINTER_TYPE_P (itype))
6603	itype = signed_type_for (itype);
6604      t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6605      t = fold_build2 (PLUS_EXPR, itype,
6606		       fold_convert (itype, step), t);
6607      t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6608      t = fold_build2 (MINUS_EXPR, itype, t,
6609		       fold_convert (itype, fd->loop.v));
6610      if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6611	t = fold_build2 (TRUNC_DIV_EXPR, itype,
6612			 fold_build1 (NEGATE_EXPR, itype, t),
6613			 fold_build1 (NEGATE_EXPR, itype,
6614				      fold_convert (itype, step)));
6615      else
6616	t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6617			 fold_convert (itype, step));
6618      t = fold_convert (TREE_TYPE (altv), t);
6619      altn2 = create_tmp_var (TREE_TYPE (altv));
6620      expand_omp_build_assign (&gsi, altn2, t);
6621      tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6622      t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6623				     true, GSI_SAME_STMT);
6624      t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6625      gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6626					build_zero_cst (TREE_TYPE (altv)));
6627      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6628    }
6629  else if (fd->collapse > 1
6630	   && !broken_loop
6631	   && !gimple_omp_for_combined_into_p (fd->for_stmt)
6632	   && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6633    {
6634      altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6635      altn2 = create_tmp_var (TREE_TYPE (altv));
6636    }
6637  if (cond_var)
6638    {
6639      if (POINTER_TYPE_P (type)
6640	  || TREE_CODE (n1) != INTEGER_CST
6641	  || fd->loop.cond_code != LT_EXPR
6642	  || tree_int_cst_sgn (n1) != 1)
6643	expand_omp_build_assign (&gsi, cond_var,
6644				 build_one_cst (TREE_TYPE (cond_var)));
6645      else
6646	expand_omp_build_assign (&gsi, cond_var,
6647				 fold_convert (TREE_TYPE (cond_var), n1));
6648    }
6649
6650  /* Remove the GIMPLE_OMP_FOR statement.  */
6651  gsi_remove (&gsi, true);
6652
6653  if (!broken_loop)
6654    {
6655      /* Code to control the increment goes in the CONT_BB.  */
6656      gsi = gsi_last_nondebug_bb (cont_bb);
6657      stmt = gsi_stmt (gsi);
6658      gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6659
6660      if (fd->collapse == 1
6661	  || gimple_omp_for_combined_into_p (fd->for_stmt))
6662	{
6663	  if (POINTER_TYPE_P (type))
6664	    t = fold_build_pointer_plus (fd->loop.v, step);
6665	  else
6666	    t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6667	  expand_omp_build_assign (&gsi, fd->loop.v, t);
6668	}
6669      else if (TREE_CODE (n2) != INTEGER_CST)
6670	expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6671      if (altv)
6672	{
6673	  t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6674			   build_one_cst (TREE_TYPE (altv)));
6675	  expand_omp_build_assign (&gsi, altv, t);
6676	}
6677
6678      if (fd->collapse > 1)
6679	{
6680	  i = fd->collapse - 1;
6681	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6682	    {
6683	      t = fold_convert (sizetype, fd->loops[i].step);
6684	      t = fold_build_pointer_plus (fd->loops[i].v, t);
6685	    }
6686	  else
6687	    {
6688	      t = fold_convert (TREE_TYPE (fd->loops[i].v),
6689				fd->loops[i].step);
6690	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6691			       fd->loops[i].v, t);
6692	    }
6693	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6694	}
6695      if (cond_var)
6696	{
6697	  if (POINTER_TYPE_P (type)
6698	      || TREE_CODE (n1) != INTEGER_CST
6699	      || fd->loop.cond_code != LT_EXPR
6700	      || tree_int_cst_sgn (n1) != 1)
6701	    t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6702			     build_one_cst (TREE_TYPE (cond_var)));
6703	  else
6704	    t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6705			     fold_convert (TREE_TYPE (cond_var), step));
6706	  expand_omp_build_assign (&gsi, cond_var, t);
6707	}
6708
6709      /* Remove GIMPLE_OMP_CONTINUE.  */
6710      gsi_remove (&gsi, true);
6711    }
6712
6713  /* Emit the condition in L1_BB.  */
6714  gsi = gsi_start_bb (l1_bb);
6715
6716  if (altv)
6717    t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6718  else if (fd->collapse > 1
6719	   && !gimple_omp_for_combined_into_p (fd->for_stmt)
6720	   && !broken_loop)
6721    {
6722      i = fd->collapse - 1;
6723      tree itype = TREE_TYPE (fd->loops[i].v);
6724      if (fd->loops[i].m2)
6725	t = n2v = create_tmp_var (itype);
6726      else
6727	t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6728      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6729				    false, GSI_CONTINUE_LINKING);
6730      tree v = fd->loops[i].v;
6731      if (DECL_P (v) && TREE_ADDRESSABLE (v))
6732	v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6733				      false, GSI_CONTINUE_LINKING);
6734      t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6735    }
6736  else
6737    {
6738      if (fd->collapse > 1 && !broken_loop)
6739	t = n2var;
6740      else
6741	t = fold_convert (type, unshare_expr (n2));
6742      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6743				    false, GSI_CONTINUE_LINKING);
6744      tree v = fd->loop.v;
6745      if (DECL_P (v) && TREE_ADDRESSABLE (v))
6746	v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6747				      false, GSI_CONTINUE_LINKING);
6748      t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6749    }
6750  cond_stmt = gimple_build_cond_empty (t);
6751  gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6752  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6753		 NULL, NULL)
6754      || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6755		    NULL, NULL))
6756    {
6757      gsi = gsi_for_stmt (cond_stmt);
6758      gimple_regimplify_operands (cond_stmt, &gsi);
6759    }
6760
6761  /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop.  */
6762  if (is_simt)
6763    {
6764      gsi = gsi_start_bb (l2_bb);
6765      step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
6766      if (POINTER_TYPE_P (type))
6767	t = fold_build_pointer_plus (fd->loop.v, step);
6768      else
6769	t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6770      expand_omp_build_assign (&gsi, fd->loop.v, t);
6771    }
6772
6773  /* Remove GIMPLE_OMP_RETURN.  */
6774  gsi = gsi_last_nondebug_bb (exit_bb);
6775  gsi_remove (&gsi, true);
6776
6777  /* Connect the new blocks.  */
6778  remove_edge (FALLTHRU_EDGE (entry_bb));
6779
6780  if (!broken_loop)
6781    {
6782      remove_edge (BRANCH_EDGE (entry_bb));
6783      make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6784
6785      e = BRANCH_EDGE (l1_bb);
6786      ne = FALLTHRU_EDGE (l1_bb);
6787      e->flags = EDGE_TRUE_VALUE;
6788    }
6789  else
6790    {
6791      single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6792
6793      ne = single_succ_edge (l1_bb);
6794      e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6795
6796    }
6797  ne->flags = EDGE_FALSE_VALUE;
6798  e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6799  ne->probability = e->probability.invert ();
6800
6801  set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6802  set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6803
6804  if (simt_maxlane)
6805    {
6806      cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6807				     NULL_TREE, NULL_TREE);
6808      gsi = gsi_last_bb (entry_bb);
6809      gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6810      make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6811      FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6812      FALLTHRU_EDGE (entry_bb)->probability
6813	 = profile_probability::guessed_always ().apply_scale (7, 8);
6814      BRANCH_EDGE (entry_bb)->probability
6815	 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6816      l2_dom_bb = entry_bb;
6817    }
6818  set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6819
6820  if (!broken_loop && fd->collapse > 1)
6821    {
6822      basic_block last_bb = l1_bb;
6823      basic_block init_bb = NULL;
6824      for (i = fd->collapse - 2; i >= 0; i--)
6825	{
6826	  tree nextn2v = NULL_TREE;
6827	  if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6828	    e = EDGE_SUCC (last_bb, 0);
6829	  else
6830	    e = EDGE_SUCC (last_bb, 1);
6831	  basic_block bb = split_edge (e);
6832	  if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6833	    {
6834	      t = fold_convert (sizetype, fd->loops[i].step);
6835	      t = fold_build_pointer_plus (fd->loops[i].v, t);
6836	    }
6837	  else
6838	    {
6839	      t = fold_convert (TREE_TYPE (fd->loops[i].v),
6840				fd->loops[i].step);
6841	      t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6842			       fd->loops[i].v, t);
6843	    }
6844	  gsi = gsi_after_labels (bb);
6845	  expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6846
6847	  bb = split_block (bb, last_stmt (bb))->dest;
6848	  gsi = gsi_start_bb (bb);
6849	  tree itype = TREE_TYPE (fd->loops[i].v);
6850	  if (fd->loops[i].m2)
6851	    t = nextn2v = create_tmp_var (itype);
6852	  else
6853	    t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
6854	  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6855					false, GSI_CONTINUE_LINKING);
6856	  tree v = fd->loops[i].v;
6857	  if (DECL_P (v) && TREE_ADDRESSABLE (v))
6858	    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6859					  false, GSI_CONTINUE_LINKING);
6860	  t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6861	  cond_stmt = gimple_build_cond_empty (t);
6862	  gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6863	  if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6864			 expand_omp_regimplify_p, NULL, NULL)
6865	      || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6866			    expand_omp_regimplify_p, NULL, NULL))
6867	    {
6868	      gsi = gsi_for_stmt (cond_stmt);
6869	      gimple_regimplify_operands (cond_stmt, &gsi);
6870	    }
6871	  ne = single_succ_edge (bb);
6872	  ne->flags = EDGE_FALSE_VALUE;
6873
6874	  init_bb = create_empty_bb (bb);
6875	  set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6876	  add_bb_to_loop (init_bb, bb->loop_father);
6877	  e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6878	  e->probability
6879	    = profile_probability::guessed_always ().apply_scale (7, 8);
6880	  ne->probability = e->probability.invert ();
6881
6882	  gsi = gsi_after_labels (init_bb);
6883	  if (fd->loops[i + 1].m1)
6884	    {
6885	      tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6886				      fd->loops[i + 1
6887						- fd->loops[i + 1].outer].v);
6888	      if (POINTER_TYPE_P (TREE_TYPE (t2)))
6889		t = fold_build_pointer_plus (t2, fd->loops[i + 1].n1);
6890	      else
6891		{
6892		  t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6893				    fd->loops[i + 1].n1);
6894		  tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6895		  t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6896		  t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6897		}
6898	    }
6899	  else
6900	    t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6901			      fd->loops[i + 1].n1);
6902	  expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6903	  if (fd->loops[i + 1].m2)
6904	    {
6905	      if (i + 2 == fd->collapse && (n2var || altv))
6906		{
6907		  gcc_assert (n2v == NULL_TREE);
6908		  n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6909		}
6910	      tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6911				      fd->loops[i + 1
6912						- fd->loops[i + 1].outer].v);
6913	      if (POINTER_TYPE_P (TREE_TYPE (t2)))
6914		t = fold_build_pointer_plus (t2, fd->loops[i + 1].n2);
6915	      else
6916		{
6917		  t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6918				    fd->loops[i + 1].n2);
6919		  tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6920		  t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6921		  t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6922		}
6923	      expand_omp_build_assign (&gsi, n2v, t);
6924	    }
6925	  if (i + 2 == fd->collapse && n2var)
6926	    {
6927	      /* For composite simd, n2 is the first iteration the current
6928		 task shouldn't already handle, so we effectively want to use
6929		 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6930		 as the vectorized loop.  Except the vectorizer will not
6931		 vectorize that, so instead compute N2VAR as
6932		 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6933		 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6934		 as the loop to vectorize.  */
6935	      tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6936	      if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6937		{
6938		  tree itype = TREE_TYPE (fd->loops[i].v);
6939		  if (POINTER_TYPE_P (itype))
6940		    itype = signed_type_for (itype);
6941		  t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6942					     == LT_EXPR ? -1 : 1));
6943		  t = fold_build2 (PLUS_EXPR, itype,
6944				   fold_convert (itype,
6945						 fd->loops[i + 1].step), t);
6946		  if (fd->loops[i + 1].m2 == NULL_TREE)
6947		    t = fold_build2 (PLUS_EXPR, itype, t,
6948				     fold_convert (itype,
6949						   fd->loops[i + 1].n2));
6950		  else if (POINTER_TYPE_P (TREE_TYPE (n2v)))
6951		    {
6952		      t = fold_build_pointer_plus (n2v, t);
6953		      t = fold_convert (itype, t);
6954		    }
6955		  else
6956		    t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6957		  t = fold_build2 (MINUS_EXPR, itype, t,
6958				   fold_convert (itype, fd->loops[i + 1].v));
6959		  tree step = fold_convert (itype, fd->loops[i + 1].step);
6960		  if (TYPE_UNSIGNED (itype)
6961		      && fd->loops[i + 1].cond_code == GT_EXPR)
6962		    t = fold_build2 (TRUNC_DIV_EXPR, itype,
6963				     fold_build1 (NEGATE_EXPR, itype, t),
6964				     fold_build1 (NEGATE_EXPR, itype, step));
6965		  else
6966		    t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6967		  t = fold_convert (type, t);
6968		}
6969	      else
6970		t = counts[i + 1];
6971	      expand_omp_build_assign (&gsi, min_arg1, t2);
6972	      expand_omp_build_assign (&gsi, min_arg2, t);
6973	      e = split_block (init_bb, last_stmt (init_bb));
6974	      gsi = gsi_after_labels (e->dest);
6975	      init_bb = e->dest;
6976	      remove_edge (FALLTHRU_EDGE (entry_bb));
6977	      make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6978	      set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6979	      set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6980	      t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6981	      t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6982	      expand_omp_build_assign (&gsi, n2var, t);
6983	    }
6984	  if (i + 2 == fd->collapse && altv)
6985	    {
6986	      /* The vectorizer currently punts on loops with non-constant
6987		 steps for the main IV (can't compute number of iterations
6988		 and gives up because of that).  As for OpenMP loops it is
6989		 always possible to compute the number of iterations upfront,
6990		 use an alternate IV as the loop iterator.  */
6991	      expand_omp_build_assign (&gsi, altv,
6992				       build_zero_cst (TREE_TYPE (altv)));
6993	      tree itype = TREE_TYPE (fd->loops[i + 1].v);
6994	      if (POINTER_TYPE_P (itype))
6995		itype = signed_type_for (itype);
6996	      t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6997					 ? -1 : 1));
6998	      t = fold_build2 (PLUS_EXPR, itype,
6999			       fold_convert (itype, fd->loops[i + 1].step), t);
7000	      t = fold_build2 (PLUS_EXPR, itype, t,
7001			       fold_convert (itype,
7002					     fd->loops[i + 1].m2
7003					     ? n2v : fd->loops[i + 1].n2));
7004	      t = fold_build2 (MINUS_EXPR, itype, t,
7005			       fold_convert (itype, fd->loops[i + 1].v));
7006	      tree step = fold_convert (itype, fd->loops[i + 1].step);
7007	      if (TYPE_UNSIGNED (itype)
7008		  && fd->loops[i + 1].cond_code == GT_EXPR)
7009		t = fold_build2 (TRUNC_DIV_EXPR, itype,
7010				 fold_build1 (NEGATE_EXPR, itype, t),
7011				 fold_build1 (NEGATE_EXPR, itype, step));
7012	      else
7013		t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
7014	      t = fold_convert (TREE_TYPE (altv), t);
7015	      expand_omp_build_assign (&gsi, altn2, t);
7016	      tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
7017				      fd->loops[i + 1].m2
7018				      ? n2v : fd->loops[i + 1].n2);
7019	      t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
7020					     true, GSI_SAME_STMT);
7021	      t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
7022				fd->loops[i + 1].v, t2);
7023	      gassign *g
7024		= gimple_build_assign (altn2, COND_EXPR, t2, altn2,
7025				       build_zero_cst (TREE_TYPE (altv)));
7026	      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7027	    }
7028	  n2v = nextn2v;
7029
7030	  make_edge (init_bb, last_bb, EDGE_FALLTHRU);
7031	  if (!gimple_omp_for_combined_into_p (fd->for_stmt))
7032	    {
7033	      e = find_edge (entry_bb, last_bb);
7034	      redirect_edge_succ (e, bb);
7035	      set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
7036	      set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
7037	    }
7038
7039	  last_bb = bb;
7040	}
7041    }
7042  if (!broken_loop)
7043    {
7044      class loop *loop = alloc_loop ();
7045      loop->header = l1_bb;
7046      loop->latch = cont_bb;
7047      add_loop (loop, l1_bb->loop_father);
7048      loop->safelen = safelen_int;
7049      if (simduid)
7050	{
7051	  loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
7052	  cfun->has_simduid_loops = true;
7053	}
7054      /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
7055	 the loop.  */
7056      if ((flag_tree_loop_vectorize
7057	   || !OPTION_SET_P (flag_tree_loop_vectorize))
7058	  && flag_tree_loop_optimize
7059	  && loop->safelen > 1)
7060	{
7061	  loop->force_vectorize = true;
7062	  if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
7063	    {
7064	      unsigned HOST_WIDE_INT v
7065		= tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
7066	      if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
7067		loop->simdlen = v;
7068	    }
7069	  cfun->has_force_vectorize_loops = true;
7070	}
7071      else if (dont_vectorize)
7072	loop->dont_vectorize = true;
7073    }
7074  else if (simduid)
7075    cfun->has_simduid_loops = true;
7076}
7077
7078/* Taskloop construct is represented after gimplification with
7079   two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7080   in between them.  This routine expands the outer GIMPLE_OMP_FOR,
7081   which should just compute all the needed loop temporaries
7082   for GIMPLE_OMP_TASK.  */
7083
7084static void
7085expand_omp_taskloop_for_outer (struct omp_region *region,
7086			       struct omp_for_data *fd,
7087			       gimple *inner_stmt)
7088{
7089  tree type, bias = NULL_TREE;
7090  basic_block entry_bb, cont_bb, exit_bb;
7091  gimple_stmt_iterator gsi;
7092  gassign *assign_stmt;
7093  tree *counts = NULL;
7094  int i;
7095
7096  gcc_assert (inner_stmt);
7097  gcc_assert (region->cont);
7098  gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
7099	      && gimple_omp_task_taskloop_p (inner_stmt));
7100  type = TREE_TYPE (fd->loop.v);
7101
7102  /* See if we need to bias by LLONG_MIN.  */
7103  if (fd->iter_type == long_long_unsigned_type_node
7104      && TREE_CODE (type) == INTEGER_TYPE
7105      && !TYPE_UNSIGNED (type))
7106    {
7107      tree n1, n2;
7108
7109      if (fd->loop.cond_code == LT_EXPR)
7110	{
7111	  n1 = fd->loop.n1;
7112	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7113	}
7114      else
7115	{
7116	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7117	  n2 = fd->loop.n1;
7118	}
7119      if (TREE_CODE (n1) != INTEGER_CST
7120	  || TREE_CODE (n2) != INTEGER_CST
7121	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7122	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7123    }
7124
7125  entry_bb = region->entry;
7126  cont_bb = region->cont;
7127  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7128  gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7129  exit_bb = region->exit;
7130
7131  gsi = gsi_last_nondebug_bb (entry_bb);
7132  gimple *for_stmt = gsi_stmt (gsi);
7133  gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7134  if (fd->collapse > 1)
7135    {
7136      int first_zero_iter = -1, dummy = -1;
7137      basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7138
7139      counts = XALLOCAVEC (tree, fd->collapse);
7140      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7141				  zero_iter_bb, first_zero_iter,
7142				  dummy_bb, dummy, l2_dom_bb);
7143
7144      if (zero_iter_bb)
7145	{
7146	  /* Some counts[i] vars might be uninitialized if
7147	     some loop has zero iterations.  But the body shouldn't
7148	     be executed in that case, so just avoid uninit warnings.  */
7149	  for (i = first_zero_iter; i < fd->collapse; i++)
7150	    if (SSA_VAR_P (counts[i]))
7151	      suppress_warning (counts[i], OPT_Wuninitialized);
7152	  gsi_prev (&gsi);
7153	  edge e = split_block (entry_bb, gsi_stmt (gsi));
7154	  entry_bb = e->dest;
7155	  make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7156	  gsi = gsi_last_bb (entry_bb);
7157	  set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7158				   get_immediate_dominator (CDI_DOMINATORS,
7159							    zero_iter_bb));
7160	}
7161    }
7162
7163  tree t0, t1;
7164  t1 = fd->loop.n2;
7165  t0 = fd->loop.n1;
7166  if (POINTER_TYPE_P (TREE_TYPE (t0))
7167      && TYPE_PRECISION (TREE_TYPE (t0))
7168	 != TYPE_PRECISION (fd->iter_type))
7169    {
7170      /* Avoid casting pointers to integer of a different size.  */
7171      tree itype = signed_type_for (type);
7172      t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7173      t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7174    }
7175  else
7176    {
7177      t1 = fold_convert (fd->iter_type, t1);
7178      t0 = fold_convert (fd->iter_type, t0);
7179    }
7180  if (bias)
7181    {
7182      t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7183      t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7184    }
7185
7186  tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7187				 OMP_CLAUSE__LOOPTEMP_);
7188  gcc_assert (innerc);
7189  tree startvar = OMP_CLAUSE_DECL (innerc);
7190  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7191  gcc_assert (innerc);
7192  tree endvar = OMP_CLAUSE_DECL (innerc);
7193  if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7194    {
7195      innerc = find_lastprivate_looptemp (fd, innerc);
7196      if (innerc)
7197	{
7198	  /* If needed (inner taskloop has lastprivate clause), propagate
7199	     down the total number of iterations.  */
7200	  tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7201					     NULL_TREE, false,
7202					     GSI_CONTINUE_LINKING);
7203	  assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7204	  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7205	}
7206    }
7207
7208  t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7209				 GSI_CONTINUE_LINKING);
7210  assign_stmt = gimple_build_assign (startvar, t0);
7211  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7212
7213  t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7214				 GSI_CONTINUE_LINKING);
7215  assign_stmt = gimple_build_assign (endvar, t1);
7216  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7217  if (fd->collapse > 1)
7218    expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7219
7220  /* Remove the GIMPLE_OMP_FOR statement.  */
7221  gsi = gsi_for_stmt (for_stmt);
7222  gsi_remove (&gsi, true);
7223
7224  gsi = gsi_last_nondebug_bb (cont_bb);
7225  gsi_remove (&gsi, true);
7226
7227  gsi = gsi_last_nondebug_bb (exit_bb);
7228  gsi_remove (&gsi, true);
7229
7230  FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7231  remove_edge (BRANCH_EDGE (entry_bb));
7232  FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7233  remove_edge (BRANCH_EDGE (cont_bb));
7234  set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7235  set_immediate_dominator (CDI_DOMINATORS, region->entry,
7236			   recompute_dominator (CDI_DOMINATORS, region->entry));
7237}
7238
7239/* Taskloop construct is represented after gimplification with
7240   two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7241   in between them.  This routine expands the inner GIMPLE_OMP_FOR.
7242   GOMP_taskloop{,_ull} function arranges for each task to be given just
7243   a single range of iterations.  */
7244
7245static void
7246expand_omp_taskloop_for_inner (struct omp_region *region,
7247			       struct omp_for_data *fd,
7248			       gimple *inner_stmt)
7249{
7250  tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7251  basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7252  basic_block fin_bb;
7253  gimple_stmt_iterator gsi;
7254  edge ep;
7255  bool broken_loop = region->cont == NULL;
7256  tree *counts = NULL;
7257  tree n1, n2, step;
7258
7259  itype = type = TREE_TYPE (fd->loop.v);
7260  if (POINTER_TYPE_P (type))
7261    itype = signed_type_for (type);
7262
7263  /* See if we need to bias by LLONG_MIN.  */
7264  if (fd->iter_type == long_long_unsigned_type_node
7265      && TREE_CODE (type) == INTEGER_TYPE
7266      && !TYPE_UNSIGNED (type))
7267    {
7268      tree n1, n2;
7269
7270      if (fd->loop.cond_code == LT_EXPR)
7271	{
7272	  n1 = fd->loop.n1;
7273	  n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7274	}
7275      else
7276	{
7277	  n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7278	  n2 = fd->loop.n1;
7279	}
7280      if (TREE_CODE (n1) != INTEGER_CST
7281	  || TREE_CODE (n2) != INTEGER_CST
7282	  || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7283	bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7284    }
7285
7286  entry_bb = region->entry;
7287  cont_bb = region->cont;
7288  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7289  fin_bb = BRANCH_EDGE (entry_bb)->dest;
7290  gcc_assert (broken_loop
7291	      || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7292  body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7293  if (!broken_loop)
7294    {
7295      gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7296      gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7297    }
7298  exit_bb = region->exit;
7299
7300  /* Iteration space partitioning goes in ENTRY_BB.  */
7301  gsi = gsi_last_nondebug_bb (entry_bb);
7302  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7303
7304  if (fd->collapse > 1)
7305    {
7306      int first_zero_iter = -1, dummy = -1;
7307      basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7308
7309      counts = XALLOCAVEC (tree, fd->collapse);
7310      expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7311				  fin_bb, first_zero_iter,
7312				  dummy_bb, dummy, l2_dom_bb);
7313      t = NULL_TREE;
7314    }
7315  else
7316    t = integer_one_node;
7317
7318  step = fd->loop.step;
7319  tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7320				 OMP_CLAUSE__LOOPTEMP_);
7321  gcc_assert (innerc);
7322  n1 = OMP_CLAUSE_DECL (innerc);
7323  innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7324  gcc_assert (innerc);
7325  n2 = OMP_CLAUSE_DECL (innerc);
7326  if (bias)
7327    {
7328      n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7329      n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7330    }
7331  n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7332				 true, NULL_TREE, true, GSI_SAME_STMT);
7333  n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7334				 true, NULL_TREE, true, GSI_SAME_STMT);
7335  step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7336				   true, NULL_TREE, true, GSI_SAME_STMT);
7337
7338  tree startvar = fd->loop.v;
7339  tree endvar = NULL_TREE;
7340
7341  if (gimple_omp_for_combined_p (fd->for_stmt))
7342    {
7343      tree clauses = gimple_omp_for_clauses (inner_stmt);
7344      tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7345      gcc_assert (innerc);
7346      startvar = OMP_CLAUSE_DECL (innerc);
7347      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7348				OMP_CLAUSE__LOOPTEMP_);
7349      gcc_assert (innerc);
7350      endvar = OMP_CLAUSE_DECL (innerc);
7351    }
7352  t = fold_convert (TREE_TYPE (startvar), n1);
7353  t = force_gimple_operand_gsi (&gsi, t,
7354				DECL_P (startvar)
7355				&& TREE_ADDRESSABLE (startvar),
7356				NULL_TREE, false, GSI_CONTINUE_LINKING);
7357  gimple *assign_stmt = gimple_build_assign (startvar, t);
7358  gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7359
7360  t = fold_convert (TREE_TYPE (startvar), n2);
7361  e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7362				false, GSI_CONTINUE_LINKING);
7363  if (endvar)
7364    {
7365      assign_stmt = gimple_build_assign (endvar, e);
7366      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7367      if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7368	assign_stmt = gimple_build_assign (fd->loop.v, e);
7369      else
7370	assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7371      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7372    }
7373
7374  tree *nonrect_bounds = NULL;
7375  if (fd->collapse > 1)
7376    {
7377      if (fd->non_rect)
7378	{
7379	  nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7380	  memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7381	}
7382      gcc_assert (gsi_bb (gsi) == entry_bb);
7383      expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7384				startvar);
7385      entry_bb = gsi_bb (gsi);
7386    }
7387
7388  if (!broken_loop)
7389    {
7390      /* The code controlling the sequential loop replaces the
7391	 GIMPLE_OMP_CONTINUE.  */
7392      gsi = gsi_last_nondebug_bb (cont_bb);
7393      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7394      gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7395      vmain = gimple_omp_continue_control_use (cont_stmt);
7396      vback = gimple_omp_continue_control_def (cont_stmt);
7397
7398      if (!gimple_omp_for_combined_p (fd->for_stmt))
7399	{
7400	  if (POINTER_TYPE_P (type))
7401	    t = fold_build_pointer_plus (vmain, step);
7402	  else
7403	    t = fold_build2 (PLUS_EXPR, type, vmain, step);
7404	  t = force_gimple_operand_gsi (&gsi, t,
7405					DECL_P (vback)
7406					&& TREE_ADDRESSABLE (vback),
7407					NULL_TREE, true, GSI_SAME_STMT);
7408	  assign_stmt = gimple_build_assign (vback, t);
7409	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7410
7411	  t = build2 (fd->loop.cond_code, boolean_type_node,
7412		      DECL_P (vback) && TREE_ADDRESSABLE (vback)
7413		      ? t : vback, e);
7414	  gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7415	}
7416
7417      /* Remove the GIMPLE_OMP_CONTINUE statement.  */
7418      gsi_remove (&gsi, true);
7419
7420      if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7421	collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7422						   cont_bb, body_bb);
7423    }
7424
7425  /* Remove the GIMPLE_OMP_FOR statement.  */
7426  gsi = gsi_for_stmt (fd->for_stmt);
7427  gsi_remove (&gsi, true);
7428
7429  /* Remove the GIMPLE_OMP_RETURN statement.  */
7430  gsi = gsi_last_nondebug_bb (exit_bb);
7431  gsi_remove (&gsi, true);
7432
7433  FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7434  if (!broken_loop)
7435    remove_edge (BRANCH_EDGE (entry_bb));
7436  else
7437    {
7438      remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7439      region->outer->cont = NULL;
7440    }
7441
7442  /* Connect all the blocks.  */
7443  if (!broken_loop)
7444    {
7445      ep = find_edge (cont_bb, body_bb);
7446      if (gimple_omp_for_combined_p (fd->for_stmt))
7447	{
7448	  remove_edge (ep);
7449	  ep = NULL;
7450	}
7451      else if (fd->collapse > 1)
7452	{
7453	  remove_edge (ep);
7454	  ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7455	}
7456      else
7457	ep->flags = EDGE_TRUE_VALUE;
7458      find_edge (cont_bb, fin_bb)->flags
7459	= ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7460    }
7461
7462  set_immediate_dominator (CDI_DOMINATORS, body_bb,
7463			   recompute_dominator (CDI_DOMINATORS, body_bb));
7464  if (!broken_loop)
7465    set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7466			     recompute_dominator (CDI_DOMINATORS, fin_bb));
7467
7468  if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7469    {
7470      class loop *loop = alloc_loop ();
7471      loop->header = body_bb;
7472      if (collapse_bb == NULL)
7473	loop->latch = cont_bb;
7474      add_loop (loop, body_bb->loop_father);
7475    }
7476}
7477
7478/* A subroutine of expand_omp_for.  Generate code for an OpenACC
7479   partitioned loop.  The lowering here is abstracted, in that the
7480   loop parameters are passed through internal functions, which are
7481   further lowered by oacc_device_lower, once we get to the target
7482   compiler.  The loop is of the form:
7483
7484   for (V = B; V LTGT E; V += S) {BODY}
7485
7486   where LTGT is < or >.  We may have a specified chunking size, CHUNKING
7487   (constant 0 for no chunking) and we will have a GWV partitioning
7488   mask, specifying dimensions over which the loop is to be
7489   partitioned (see note below).  We generate code that looks like
7490   (this ignores tiling):
7491
7492   <entry_bb> [incoming FALL->body, BRANCH->exit]
7493     typedef signedintify (typeof (V)) T;  // underlying signed integral type
7494     T range = E - B;
7495     T chunk_no = 0;
7496     T DIR = LTGT == '<' ? +1 : -1;
7497     T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7498     T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7499
7500   <head_bb> [created by splitting end of entry_bb]
7501     T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7502     T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7503     if (!(offset LTGT bound)) goto bottom_bb;
7504
7505   <body_bb> [incoming]
7506     V = B + offset;
7507     {BODY}
7508
7509   <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7510     offset += step;
7511     if (offset LTGT bound) goto body_bb; [*]
7512
7513   <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7514     chunk_no++;
7515     if (chunk < chunk_max) goto head_bb;
7516
7517   <exit_bb> [incoming]
7518     V = B + ((range -/+ 1) / S +/- 1) * S [*]
7519
7520   [*] Needed if V live at end of loop.  */
7521
7522static void
7523expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7524{
7525  bool is_oacc_kernels_parallelized
7526    = (lookup_attribute ("oacc kernels parallelized",
7527			 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7528  {
7529    bool is_oacc_kernels
7530      = (lookup_attribute ("oacc kernels",
7531			   DECL_ATTRIBUTES (current_function_decl)) != NULL);
7532    if (is_oacc_kernels_parallelized)
7533      gcc_checking_assert (is_oacc_kernels);
7534  }
7535  gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7536  /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7537     for SSA specifics, and some are for 'parloops' OpenACC
7538     'kernels'-parallelized specifics.  */
7539
7540  tree v = fd->loop.v;
7541  enum tree_code cond_code = fd->loop.cond_code;
7542  enum tree_code plus_code = PLUS_EXPR;
7543
7544  tree chunk_size = integer_minus_one_node;
7545  tree gwv = integer_zero_node;
7546  tree iter_type = TREE_TYPE (v);
7547  tree diff_type = iter_type;
7548  tree plus_type = iter_type;
7549  struct oacc_collapse *counts = NULL;
7550
7551  gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7552		       == GF_OMP_FOR_KIND_OACC_LOOP);
7553  gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7554  gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7555
7556  if (POINTER_TYPE_P (iter_type))
7557    {
7558      plus_code = POINTER_PLUS_EXPR;
7559      plus_type = sizetype;
7560    }
7561  for (int ix = fd->collapse; ix--;)
7562    {
7563      tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7564      if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7565	diff_type = diff_type2;
7566    }
7567  if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7568    diff_type = signed_type_for (diff_type);
7569  if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7570    diff_type = integer_type_node;
7571
7572  basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7573  basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7574  basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE  */
7575  basic_block bottom_bb = NULL;
7576
7577  /* entry_bb has two successors; the branch edge is to the exit
7578     block, fallthrough edge to body.  */
7579  gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7580	      && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7581
7582  /* If cont_bb non-NULL, it has 2 successors.  The branch successor is
7583     body_bb, or to a block whose only successor is the body_bb.  Its
7584     fallthrough successor is the final block (same as the branch
7585     successor of the entry_bb).  */
7586  if (cont_bb)
7587    {
7588      basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7589      basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7590
7591      gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7592      gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7593    }
7594  else
7595    gcc_assert (!gimple_in_ssa_p (cfun));
7596
7597  /* The exit block only has entry_bb and cont_bb as predecessors.  */
7598  gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7599
7600  tree chunk_no;
7601  tree chunk_max = NULL_TREE;
7602  tree bound, offset;
7603  tree step = create_tmp_var (diff_type, ".step");
7604  bool up = cond_code == LT_EXPR;
7605  tree dir = build_int_cst (diff_type, up ? +1 : -1);
7606  bool chunking = !gimple_in_ssa_p (cfun);
7607  bool negating;
7608
7609  /* Tiling vars.  */
7610  tree tile_size = NULL_TREE;
7611  tree element_s = NULL_TREE;
7612  tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7613  basic_block elem_body_bb = NULL;
7614  basic_block elem_cont_bb = NULL;
7615
7616  /* SSA instances.  */
7617  tree offset_incr = NULL_TREE;
7618  tree offset_init = NULL_TREE;
7619
7620  gimple_stmt_iterator gsi;
7621  gassign *ass;
7622  gcall *call;
7623  gimple *stmt;
7624  tree expr;
7625  location_t loc;
7626  edge split, be, fte;
7627
7628  /* Split the end of entry_bb to create head_bb.  */
7629  split = split_block (entry_bb, last_stmt (entry_bb));
7630  basic_block head_bb = split->dest;
7631  entry_bb = split->src;
7632
7633  /* Chunk setup goes at end of entry_bb, replacing the omp_for.  */
7634  gsi = gsi_last_nondebug_bb (entry_bb);
7635  gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7636  loc = gimple_location (for_stmt);
7637
7638  if (gimple_in_ssa_p (cfun))
7639    {
7640      offset_init = gimple_omp_for_index (for_stmt, 0);
7641      gcc_assert (integer_zerop (fd->loop.n1));
7642      /* The SSA parallelizer does gang parallelism.  */
7643      gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7644    }
7645
7646  if (fd->collapse > 1 || fd->tiling)
7647    {
7648      gcc_assert (!gimple_in_ssa_p (cfun) && up);
7649      counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7650      tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7651					      TREE_TYPE (fd->loop.n2), loc);
7652
7653      if (SSA_VAR_P (fd->loop.n2))
7654	{
7655	  total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7656					    true, GSI_SAME_STMT);
7657	  ass = gimple_build_assign (fd->loop.n2, total);
7658	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7659	}
7660    }
7661
7662  tree b = fd->loop.n1;
7663  tree e = fd->loop.n2;
7664  tree s = fd->loop.step;
7665
7666  b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7667  e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7668
7669  /* Convert the step, avoiding possible unsigned->signed overflow.  */
7670  negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7671  if (negating)
7672    s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7673  s = fold_convert (diff_type, s);
7674  if (negating)
7675    s = fold_build1 (NEGATE_EXPR, diff_type, s);
7676  s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7677
7678  if (!chunking)
7679    chunk_size = integer_zero_node;
7680  expr = fold_convert (diff_type, chunk_size);
7681  chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7682					 NULL_TREE, true, GSI_SAME_STMT);
7683
7684  if (fd->tiling)
7685    {
7686      /* Determine the tile size and element step,
7687	 modify the outer loop step size.  */
7688      tile_size = create_tmp_var (diff_type, ".tile_size");
7689      expr = build_int_cst (diff_type, 1);
7690      for (int ix = 0; ix < fd->collapse; ix++)
7691	expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7692      expr = force_gimple_operand_gsi (&gsi, expr, true,
7693				       NULL_TREE, true, GSI_SAME_STMT);
7694      ass = gimple_build_assign (tile_size, expr);
7695      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7696
7697      element_s = create_tmp_var (diff_type, ".element_s");
7698      ass = gimple_build_assign (element_s, s);
7699      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7700
7701      expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7702      s = force_gimple_operand_gsi (&gsi, expr, true,
7703				    NULL_TREE, true, GSI_SAME_STMT);
7704    }
7705
7706  /* Determine the range, avoiding possible unsigned->signed overflow.  */
7707  negating = !up && TYPE_UNSIGNED (iter_type);
7708  expr = fold_build2 (MINUS_EXPR, plus_type,
7709		      fold_convert (plus_type, negating ? b : e),
7710		      fold_convert (plus_type, negating ? e : b));
7711  expr = fold_convert (diff_type, expr);
7712  if (negating)
7713    expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7714  tree range = force_gimple_operand_gsi (&gsi, expr, true,
7715					 NULL_TREE, true, GSI_SAME_STMT);
7716
7717  chunk_no = build_int_cst (diff_type, 0);
7718  if (chunking)
7719    {
7720      gcc_assert (!gimple_in_ssa_p (cfun));
7721
7722      expr = chunk_no;
7723      chunk_max = create_tmp_var (diff_type, ".chunk_max");
7724      chunk_no = create_tmp_var (diff_type, ".chunk_no");
7725
7726      ass = gimple_build_assign (chunk_no, expr);
7727      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7728
7729      call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7730					 build_int_cst (integer_type_node,
7731							IFN_GOACC_LOOP_CHUNKS),
7732					 dir, range, s, chunk_size, gwv);
7733      gimple_call_set_lhs (call, chunk_max);
7734      gimple_set_location (call, loc);
7735      gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7736    }
7737  else
7738    chunk_size = chunk_no;
7739
7740  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7741				     build_int_cst (integer_type_node,
7742						    IFN_GOACC_LOOP_STEP),
7743				     dir, range, s, chunk_size, gwv);
7744  gimple_call_set_lhs (call, step);
7745  gimple_set_location (call, loc);
7746  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7747
7748  /* Remove the GIMPLE_OMP_FOR.  */
7749  gsi_remove (&gsi, true);
7750
7751  /* Fixup edges from head_bb.  */
7752  be = BRANCH_EDGE (head_bb);
7753  fte = FALLTHRU_EDGE (head_bb);
7754  be->flags |= EDGE_FALSE_VALUE;
7755  fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7756
7757  basic_block body_bb = fte->dest;
7758
7759  if (gimple_in_ssa_p (cfun))
7760    {
7761      gsi = gsi_last_nondebug_bb (cont_bb);
7762      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7763
7764      offset = gimple_omp_continue_control_use (cont_stmt);
7765      offset_incr = gimple_omp_continue_control_def (cont_stmt);
7766    }
7767  else
7768    {
7769      offset = create_tmp_var (diff_type, ".offset");
7770      offset_init = offset_incr = offset;
7771    }
7772  bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7773
7774  /* Loop offset & bound go into head_bb.  */
7775  gsi = gsi_start_bb (head_bb);
7776
7777  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7778				     build_int_cst (integer_type_node,
7779						    IFN_GOACC_LOOP_OFFSET),
7780				     dir, range, s,
7781				     chunk_size, gwv, chunk_no);
7782  gimple_call_set_lhs (call, offset_init);
7783  gimple_set_location (call, loc);
7784  gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7785
7786  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7787				     build_int_cst (integer_type_node,
7788						    IFN_GOACC_LOOP_BOUND),
7789				     dir, range, s,
7790				     chunk_size, gwv, offset_init);
7791  gimple_call_set_lhs (call, bound);
7792  gimple_set_location (call, loc);
7793  gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7794
7795  expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7796  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7797		    GSI_CONTINUE_LINKING);
7798
7799  /* V assignment goes into body_bb.  */
7800  if (!gimple_in_ssa_p (cfun))
7801    {
7802      gsi = gsi_start_bb (body_bb);
7803
7804      expr = build2 (plus_code, iter_type, b,
7805		     fold_convert (plus_type, offset));
7806      expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7807				       true, GSI_SAME_STMT);
7808      ass = gimple_build_assign (v, expr);
7809      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7810
7811      if (fd->collapse > 1 || fd->tiling)
7812	expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7813
7814      if (fd->tiling)
7815	{
7816	  /* Determine the range of the element loop -- usually simply
7817	     the tile_size, but could be smaller if the final
7818	     iteration of the outer loop is a partial tile.  */
7819	  tree e_range = create_tmp_var (diff_type, ".e_range");
7820
7821	  expr = build2 (MIN_EXPR, diff_type,
7822			 build2 (MINUS_EXPR, diff_type, bound, offset),
7823			 build2 (MULT_EXPR, diff_type, tile_size,
7824				 element_s));
7825	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7826					   true, GSI_SAME_STMT);
7827	  ass = gimple_build_assign (e_range, expr);
7828	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7829
7830	  /* Determine bound, offset & step of inner loop. */
7831	  e_bound = create_tmp_var (diff_type, ".e_bound");
7832	  e_offset = create_tmp_var (diff_type, ".e_offset");
7833	  e_step = create_tmp_var (diff_type, ".e_step");
7834
7835	  /* Mark these as element loops.  */
7836	  tree t, e_gwv = integer_minus_one_node;
7837	  tree chunk = build_int_cst (diff_type, 0); /* Never chunked.  */
7838
7839	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7840	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7841					     element_s, chunk, e_gwv, chunk);
7842	  gimple_call_set_lhs (call, e_offset);
7843	  gimple_set_location (call, loc);
7844	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7845
7846	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7847	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7848					     element_s, chunk, e_gwv, e_offset);
7849	  gimple_call_set_lhs (call, e_bound);
7850	  gimple_set_location (call, loc);
7851	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7852
7853	  t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7854	  call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7855					     element_s, chunk, e_gwv);
7856	  gimple_call_set_lhs (call, e_step);
7857	  gimple_set_location (call, loc);
7858	  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7859
7860	  /* Add test and split block.  */
7861	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7862	  stmt = gimple_build_cond_empty (expr);
7863	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7864	  split = split_block (body_bb, stmt);
7865	  elem_body_bb = split->dest;
7866	  if (cont_bb == body_bb)
7867	    cont_bb = elem_body_bb;
7868	  body_bb = split->src;
7869
7870	  split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7871
7872	  /* Add a dummy exit for the tiled block when cont_bb is missing.  */
7873	  if (cont_bb == NULL)
7874	    {
7875	      edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7876	      e->probability = profile_probability::even ();
7877	      split->probability = profile_probability::even ();
7878	    }
7879
7880	  /* Initialize the user's loop vars.  */
7881	  gsi = gsi_start_bb (elem_body_bb);
7882	  expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7883				     diff_type);
7884	}
7885    }
7886
7887  /* Loop increment goes into cont_bb.  If this is not a loop, we
7888     will have spawned threads as if it was, and each one will
7889     execute one iteration.  The specification is not explicit about
7890     whether such constructs are ill-formed or not, and they can
7891     occur, especially when noreturn routines are involved.  */
7892  if (cont_bb)
7893    {
7894      gsi = gsi_last_nondebug_bb (cont_bb);
7895      gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7896      loc = gimple_location (cont_stmt);
7897
7898      if (fd->tiling)
7899	{
7900	  /* Insert element loop increment and test.  */
7901	  expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7902	  expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7903					   true, GSI_SAME_STMT);
7904	  ass = gimple_build_assign (e_offset, expr);
7905	  gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7906	  expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7907
7908	  stmt = gimple_build_cond_empty (expr);
7909	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7910	  split = split_block (cont_bb, stmt);
7911	  elem_cont_bb = split->src;
7912	  cont_bb = split->dest;
7913
7914	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7915	  split->probability = profile_probability::unlikely ().guessed ();
7916	  edge latch_edge
7917	    = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7918	  latch_edge->probability = profile_probability::likely ().guessed ();
7919
7920	  edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7921	  skip_edge->probability = profile_probability::unlikely ().guessed ();
7922	  edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7923	  loop_entry_edge->probability
7924	    = profile_probability::likely ().guessed ();
7925
7926	  gsi = gsi_for_stmt (cont_stmt);
7927	}
7928
7929      /* Increment offset.  */
7930      if (gimple_in_ssa_p (cfun))
7931	expr = build2 (plus_code, iter_type, offset,
7932		       fold_convert (plus_type, step));
7933      else
7934	expr = build2 (PLUS_EXPR, diff_type, offset, step);
7935      expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7936				       true, GSI_SAME_STMT);
7937      ass = gimple_build_assign (offset_incr, expr);
7938      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7939      expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7940      gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7941
7942      /*  Remove the GIMPLE_OMP_CONTINUE.  */
7943      gsi_remove (&gsi, true);
7944
7945      /* Fixup edges from cont_bb.  */
7946      be = BRANCH_EDGE (cont_bb);
7947      fte = FALLTHRU_EDGE (cont_bb);
7948      be->flags |= EDGE_TRUE_VALUE;
7949      fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7950
7951      if (chunking)
7952	{
7953	  /* Split the beginning of exit_bb to make bottom_bb.  We
7954	     need to insert a nop at the start, because splitting is
7955	     after a stmt, not before.  */
7956	  gsi = gsi_start_bb (exit_bb);
7957	  stmt = gimple_build_nop ();
7958	  gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7959	  split = split_block (exit_bb, stmt);
7960	  bottom_bb = split->src;
7961	  exit_bb = split->dest;
7962	  gsi = gsi_last_bb (bottom_bb);
7963
7964	  /* Chunk increment and test goes into bottom_bb.  */
7965	  expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7966			 build_int_cst (diff_type, 1));
7967	  ass = gimple_build_assign (chunk_no, expr);
7968	  gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7969
7970	  /* Chunk test at end of bottom_bb.  */
7971	  expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7972	  gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7973			    GSI_CONTINUE_LINKING);
7974
7975	  /* Fixup edges from bottom_bb.  */
7976	  split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7977	  split->probability = profile_probability::unlikely ().guessed ();
7978	  edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7979	  latch_edge->probability = profile_probability::likely ().guessed ();
7980	}
7981    }
7982
7983  gsi = gsi_last_nondebug_bb (exit_bb);
7984  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7985  loc = gimple_location (gsi_stmt (gsi));
7986
7987  if (!gimple_in_ssa_p (cfun))
7988    {
7989      /* Insert the final value of V, in case it is live.  This is the
7990	 value for the only thread that survives past the join.  */
7991      expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7992      expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7993      expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7994      expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7995      expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7996      expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7997				       true, GSI_SAME_STMT);
7998      ass = gimple_build_assign (v, expr);
7999      gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
8000    }
8001
8002  /* Remove the OMP_RETURN.  */
8003  gsi_remove (&gsi, true);
8004
8005  if (cont_bb)
8006    {
8007      /* We now have one, two or three nested loops.  Update the loop
8008	 structures.  */
8009      class loop *parent = entry_bb->loop_father;
8010      class loop *body = body_bb->loop_father;
8011
8012      if (chunking)
8013	{
8014	  class loop *chunk_loop = alloc_loop ();
8015	  chunk_loop->header = head_bb;
8016	  chunk_loop->latch = bottom_bb;
8017	  add_loop (chunk_loop, parent);
8018	  parent = chunk_loop;
8019	}
8020      else if (parent != body)
8021	{
8022	  gcc_assert (body->header == body_bb);
8023	  gcc_assert (body->latch == cont_bb
8024		      || single_pred (body->latch) == cont_bb);
8025	  parent = NULL;
8026	}
8027
8028      if (parent)
8029	{
8030	  class loop *body_loop = alloc_loop ();
8031	  body_loop->header = body_bb;
8032	  body_loop->latch = cont_bb;
8033	  add_loop (body_loop, parent);
8034
8035	  if (fd->tiling)
8036	    {
8037	      /* Insert tiling's element loop.  */
8038	      class loop *inner_loop = alloc_loop ();
8039	      inner_loop->header = elem_body_bb;
8040	      inner_loop->latch = elem_cont_bb;
8041	      add_loop (inner_loop, body_loop);
8042	    }
8043	}
8044    }
8045}
8046
8047/* Expand the OMP loop defined by REGION.  */
8048
8049static void
8050expand_omp_for (struct omp_region *region, gimple *inner_stmt)
8051{
8052  struct omp_for_data fd;
8053  struct omp_for_data_loop *loops;
8054
8055  loops = XALLOCAVEC (struct omp_for_data_loop,
8056		      gimple_omp_for_collapse (last_stmt (region->entry)));
8057  omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
8058			&fd, loops);
8059  region->sched_kind = fd.sched_kind;
8060  region->sched_modifiers = fd.sched_modifiers;
8061  region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
8062  if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
8063    {
8064      for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
8065	if ((loops[i].m1 || loops[i].m2)
8066	    && (loops[i].m1 == NULL_TREE
8067		|| TREE_CODE (loops[i].m1) == INTEGER_CST)
8068	    && (loops[i].m2 == NULL_TREE
8069		|| TREE_CODE (loops[i].m2) == INTEGER_CST)
8070	    && TREE_CODE (loops[i].step) == INTEGER_CST
8071	    && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
8072	  {
8073	    tree t;
8074	    tree itype = TREE_TYPE (loops[i].v);
8075	    if (loops[i].m1 && loops[i].m2)
8076	      t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
8077	    else if (loops[i].m1)
8078	      t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
8079	    else
8080	      t = loops[i].m2;
8081	    t = fold_build2 (MULT_EXPR, itype, t,
8082			     fold_convert (itype,
8083					   loops[i - loops[i].outer].step));
8084	    if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
8085	      t = fold_build2 (TRUNC_MOD_EXPR, itype,
8086			       fold_build1 (NEGATE_EXPR, itype, t),
8087			       fold_build1 (NEGATE_EXPR, itype,
8088					    fold_convert (itype,
8089							  loops[i].step)));
8090	    else
8091	      t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
8092			       fold_convert (itype, loops[i].step));
8093	    if (integer_nonzerop (t))
8094	      error_at (gimple_location (fd.for_stmt),
8095			"invalid OpenMP non-rectangular loop step; "
8096			"%<(%E - %E) * %E%> is not a multiple of loop %d "
8097			"step %qE",
8098			loops[i].m2 ? loops[i].m2 : integer_zero_node,
8099			loops[i].m1 ? loops[i].m1 : integer_zero_node,
8100			loops[i - loops[i].outer].step, i + 1,
8101			loops[i].step);
8102	  }
8103    }
8104
8105  gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
8106  BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8107  FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
8108  if (region->cont)
8109    {
8110      gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
8111      BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8112      FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
8113    }
8114  else
8115    /* If there isn't a continue then this is a degerate case where
8116       the introduction of abnormal edges during lowering will prevent
8117       original loops from being detected.  Fix that up.  */
8118    loops_state_set (LOOPS_NEED_FIXUP);
8119
8120  if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8121    expand_omp_simd (region, &fd);
8122  else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8123    {
8124      gcc_assert (!inner_stmt && !fd.non_rect);
8125      expand_oacc_for (region, &fd);
8126    }
8127  else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8128    {
8129      if (gimple_omp_for_combined_into_p (fd.for_stmt))
8130	expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8131      else
8132	expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8133    }
8134  else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8135	   && !fd.have_ordered)
8136    {
8137      if (fd.chunk_size == NULL)
8138	expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8139      else
8140	expand_omp_for_static_chunk (region, &fd, inner_stmt);
8141    }
8142  else
8143    {
8144      int fn_index, start_ix, next_ix;
8145      unsigned HOST_WIDE_INT sched = 0;
8146      tree sched_arg = NULL_TREE;
8147
8148      gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8149		  == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8150      if (fd.chunk_size == NULL
8151	  && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8152	fd.chunk_size = integer_zero_node;
8153      switch (fd.sched_kind)
8154	{
8155	case OMP_CLAUSE_SCHEDULE_RUNTIME:
8156	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8157	      && fd.lastprivate_conditional == 0)
8158	    {
8159	      gcc_assert (!fd.have_ordered);
8160	      fn_index = 6;
8161	      sched = 4;
8162	    }
8163	  else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8164		   && !fd.have_ordered
8165		   && fd.lastprivate_conditional == 0)
8166	    fn_index = 7;
8167	  else
8168	    {
8169	      fn_index = 3;
8170	      sched = (HOST_WIDE_INT_1U << 31);
8171	    }
8172	  break;
8173	case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8174	case OMP_CLAUSE_SCHEDULE_GUIDED:
8175	  if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8176	      && !fd.have_ordered
8177	      && fd.lastprivate_conditional == 0)
8178	    {
8179	      fn_index = 3 + fd.sched_kind;
8180	      sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8181	      break;
8182	    }
8183	  fn_index = fd.sched_kind;
8184	  sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8185	  sched += (HOST_WIDE_INT_1U << 31);
8186	  break;
8187	case OMP_CLAUSE_SCHEDULE_STATIC:
8188	  gcc_assert (fd.have_ordered);
8189	  fn_index = 0;
8190	  sched = (HOST_WIDE_INT_1U << 31) + 1;
8191	  break;
8192	default:
8193	  gcc_unreachable ();
8194	}
8195      if (!fd.ordered)
8196	fn_index += fd.have_ordered * 8;
8197      if (fd.ordered)
8198	start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8199      else
8200	start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8201      next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8202      if (fd.have_reductemp || fd.have_pointer_condtemp)
8203	{
8204	  if (fd.ordered)
8205	    start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8206	  else if (fd.have_ordered)
8207	    start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8208	  else
8209	    start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8210	  sched_arg = build_int_cstu (long_integer_type_node, sched);
8211	  if (!fd.chunk_size)
8212	    fd.chunk_size = integer_zero_node;
8213	}
8214      if (fd.iter_type == long_long_unsigned_type_node)
8215	{
8216	  start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8217			- (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8218	  next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8219		      - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8220	}
8221      expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8222			      (enum built_in_function) next_ix, sched_arg,
8223			      inner_stmt);
8224    }
8225
8226  if (gimple_in_ssa_p (cfun))
8227    update_ssa (TODO_update_ssa_only_virtuals);
8228}
8229
8230/* Expand code for an OpenMP sections directive.  In pseudo code, we generate
8231
8232	v = GOMP_sections_start (n);
8233    L0:
8234	switch (v)
8235	  {
8236	  case 0:
8237	    goto L2;
8238	  case 1:
8239	    section 1;
8240	    goto L1;
8241	  case 2:
8242	    ...
8243	  case n:
8244	    ...
8245	  default:
8246	    abort ();
8247	  }
8248    L1:
8249	v = GOMP_sections_next ();
8250	goto L0;
8251    L2:
8252	reduction;
8253
8254    If this is a combined parallel sections, replace the call to
8255    GOMP_sections_start with call to GOMP_sections_next.  */
8256
8257static void
8258expand_omp_sections (struct omp_region *region)
8259{
8260  tree t, u, vin = NULL, vmain, vnext, l2;
8261  unsigned len;
8262  basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8263  gimple_stmt_iterator si, switch_si;
8264  gomp_sections *sections_stmt;
8265  gimple *stmt;
8266  gomp_continue *cont;
8267  edge_iterator ei;
8268  edge e;
8269  struct omp_region *inner;
8270  unsigned i, casei;
8271  bool exit_reachable = region->cont != NULL;
8272
8273  gcc_assert (region->exit != NULL);
8274  entry_bb = region->entry;
8275  l0_bb = single_succ (entry_bb);
8276  l1_bb = region->cont;
8277  l2_bb = region->exit;
8278  if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8279    l2 = gimple_block_label (l2_bb);
8280  else
8281    {
8282      /* This can happen if there are reductions.  */
8283      len = EDGE_COUNT (l0_bb->succs);
8284      gcc_assert (len > 0);
8285      e = EDGE_SUCC (l0_bb, len - 1);
8286      si = gsi_last_nondebug_bb (e->dest);
8287      l2 = NULL_TREE;
8288      if (gsi_end_p (si)
8289	  || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8290	l2 = gimple_block_label (e->dest);
8291      else
8292	FOR_EACH_EDGE (e, ei, l0_bb->succs)
8293	  {
8294	    si = gsi_last_nondebug_bb (e->dest);
8295	    if (gsi_end_p (si)
8296		|| gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8297	      {
8298		l2 = gimple_block_label (e->dest);
8299		break;
8300	      }
8301	  }
8302    }
8303  if (exit_reachable)
8304    default_bb = create_empty_bb (l1_bb->prev_bb);
8305  else
8306    default_bb = create_empty_bb (l0_bb);
8307
8308  /* We will build a switch() with enough cases for all the
8309     GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8310     and a default case to abort if something goes wrong.  */
8311  len = EDGE_COUNT (l0_bb->succs);
8312
8313  /* Use vec::quick_push on label_vec throughout, since we know the size
8314     in advance.  */
8315  auto_vec<tree> label_vec (len);
8316
8317  /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8318     GIMPLE_OMP_SECTIONS statement.  */
8319  si = gsi_last_nondebug_bb (entry_bb);
8320  sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8321  gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8322  vin = gimple_omp_sections_control (sections_stmt);
8323  tree clauses = gimple_omp_sections_clauses (sections_stmt);
8324  tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8325  tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8326  tree cond_var = NULL_TREE;
8327  if (reductmp || condtmp)
8328    {
8329      tree reductions = null_pointer_node, mem = null_pointer_node;
8330      tree memv = NULL_TREE, condtemp = NULL_TREE;
8331      gimple_stmt_iterator gsi = gsi_none ();
8332      gimple *g = NULL;
8333      if (reductmp)
8334	{
8335	  reductions = OMP_CLAUSE_DECL (reductmp);
8336	  gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8337	  g = SSA_NAME_DEF_STMT (reductions);
8338	  reductions = gimple_assign_rhs1 (g);
8339	  OMP_CLAUSE_DECL (reductmp) = reductions;
8340	  gsi = gsi_for_stmt (g);
8341	}
8342      else
8343	gsi = si;
8344      if (condtmp)
8345	{
8346	  condtemp = OMP_CLAUSE_DECL (condtmp);
8347	  tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8348				    OMP_CLAUSE__CONDTEMP_);
8349	  cond_var = OMP_CLAUSE_DECL (c);
8350	  tree type = TREE_TYPE (condtemp);
8351	  memv = create_tmp_var (type);
8352	  TREE_ADDRESSABLE (memv) = 1;
8353	  unsigned cnt = 0;
8354	  for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8355	    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8356		&& OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8357	      ++cnt;
8358	  unsigned HOST_WIDE_INT sz
8359	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8360	  expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8361				   false);
8362	  mem = build_fold_addr_expr (memv);
8363	}
8364      t = build_int_cst (unsigned_type_node, len - 1);
8365      u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8366      stmt = gimple_build_call (u, 3, t, reductions, mem);
8367      gimple_call_set_lhs (stmt, vin);
8368      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8369      if (condtmp)
8370	{
8371	  expand_omp_build_assign (&gsi, condtemp, memv, false);
8372	  tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8373			   vin, build_one_cst (TREE_TYPE (cond_var)));
8374	  expand_omp_build_assign (&gsi, cond_var, t, false);
8375	}
8376      if (reductmp)
8377	{
8378	  gsi_remove (&gsi, true);
8379	  release_ssa_name (gimple_assign_lhs (g));
8380	}
8381    }
8382  else if (!is_combined_parallel (region))
8383    {
8384      /* If we are not inside a combined parallel+sections region,
8385	 call GOMP_sections_start.  */
8386      t = build_int_cst (unsigned_type_node, len - 1);
8387      u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8388      stmt = gimple_build_call (u, 1, t);
8389    }
8390  else
8391    {
8392      /* Otherwise, call GOMP_sections_next.  */
8393      u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8394      stmt = gimple_build_call (u, 0);
8395    }
8396  if (!reductmp && !condtmp)
8397    {
8398      gimple_call_set_lhs (stmt, vin);
8399      gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8400    }
8401  gsi_remove (&si, true);
8402
8403  /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8404     L0_BB.  */
8405  switch_si = gsi_last_nondebug_bb (l0_bb);
8406  gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8407  if (exit_reachable)
8408    {
8409      cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8410      gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8411      vmain = gimple_omp_continue_control_use (cont);
8412      vnext = gimple_omp_continue_control_def (cont);
8413    }
8414  else
8415    {
8416      vmain = vin;
8417      vnext = NULL_TREE;
8418    }
8419
8420  t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8421  label_vec.quick_push (t);
8422  i = 1;
8423
8424  /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR.  */
8425  for (inner = region->inner, casei = 1;
8426       inner;
8427       inner = inner->next, i++, casei++)
8428    {
8429      basic_block s_entry_bb, s_exit_bb;
8430
8431      /* Skip optional reduction region.  */
8432      if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8433	{
8434	  --i;
8435	  --casei;
8436	  continue;
8437	}
8438
8439      s_entry_bb = inner->entry;
8440      s_exit_bb = inner->exit;
8441
8442      t = gimple_block_label (s_entry_bb);
8443      u = build_int_cst (unsigned_type_node, casei);
8444      u = build_case_label (u, NULL, t);
8445      label_vec.quick_push (u);
8446
8447      si = gsi_last_nondebug_bb (s_entry_bb);
8448      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8449      gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8450      gsi_remove (&si, true);
8451      single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8452
8453      if (s_exit_bb == NULL)
8454	continue;
8455
8456      si = gsi_last_nondebug_bb (s_exit_bb);
8457      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8458      gsi_remove (&si, true);
8459
8460      single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8461    }
8462
8463  /* Error handling code goes in DEFAULT_BB.  */
8464  t = gimple_block_label (default_bb);
8465  u = build_case_label (NULL, NULL, t);
8466  make_edge (l0_bb, default_bb, 0);
8467  add_bb_to_loop (default_bb, current_loops->tree_root);
8468
8469  stmt = gimple_build_switch (vmain, u, label_vec);
8470  gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8471  gsi_remove (&switch_si, true);
8472
8473  si = gsi_start_bb (default_bb);
8474  stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8475  gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8476
8477  if (exit_reachable)
8478    {
8479      tree bfn_decl;
8480
8481      /* Code to get the next section goes in L1_BB.  */
8482      si = gsi_last_nondebug_bb (l1_bb);
8483      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8484
8485      bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8486      stmt = gimple_build_call (bfn_decl, 0);
8487      gimple_call_set_lhs (stmt, vnext);
8488      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8489      if (cond_var)
8490	{
8491	  tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8492			   vnext, build_one_cst (TREE_TYPE (cond_var)));
8493	  expand_omp_build_assign (&si, cond_var, t, false);
8494	}
8495      gsi_remove (&si, true);
8496
8497      single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8498    }
8499
8500  /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB.  */
8501  si = gsi_last_nondebug_bb (l2_bb);
8502  if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8503    t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8504  else if (gimple_omp_return_lhs (gsi_stmt (si)))
8505    t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8506  else
8507    t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8508  stmt = gimple_build_call (t, 0);
8509  if (gimple_omp_return_lhs (gsi_stmt (si)))
8510    gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8511  gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8512  gsi_remove (&si, true);
8513
8514  set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8515}
8516
8517/* Expand code for an OpenMP single or scope directive.  We've already expanded
8518   much of the code, here we simply place the GOMP_barrier call.  */
8519
8520static void
8521expand_omp_single (struct omp_region *region)
8522{
8523  basic_block entry_bb, exit_bb;
8524  gimple_stmt_iterator si;
8525
8526  entry_bb = region->entry;
8527  exit_bb = region->exit;
8528
8529  si = gsi_last_nondebug_bb (entry_bb);
8530  enum gimple_code code = gimple_code (gsi_stmt (si));
8531  gcc_assert (code == GIMPLE_OMP_SINGLE || code == GIMPLE_OMP_SCOPE);
8532  gsi_remove (&si, true);
8533  single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8534
8535  if (exit_bb == NULL)
8536    {
8537      gcc_assert (code == GIMPLE_OMP_SCOPE);
8538      return;
8539    }
8540
8541  si = gsi_last_nondebug_bb (exit_bb);
8542  if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8543    {
8544      tree t = gimple_omp_return_lhs (gsi_stmt (si));
8545      gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8546    }
8547  gsi_remove (&si, true);
8548  single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8549}
8550
8551/* Generic expansion for OpenMP synchronization directives: master,
8552   ordered and critical.  All we need to do here is remove the entry
8553   and exit markers for REGION.  */
8554
8555static void
8556expand_omp_synch (struct omp_region *region)
8557{
8558  basic_block entry_bb, exit_bb;
8559  gimple_stmt_iterator si;
8560
8561  entry_bb = region->entry;
8562  exit_bb = region->exit;
8563
8564  si = gsi_last_nondebug_bb (entry_bb);
8565  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8566	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8567	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
8568	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8569	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8570	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8571	      || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8572  if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8573      && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8574    {
8575      expand_omp_taskreg (region);
8576      return;
8577    }
8578  gsi_remove (&si, true);
8579  single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8580
8581  if (exit_bb)
8582    {
8583      si = gsi_last_nondebug_bb (exit_bb);
8584      gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8585      gsi_remove (&si, true);
8586      single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8587    }
8588}
8589
8590/* Translate enum omp_memory_order to enum memmodel for the embedded
8591   fail clause in there.  */
8592
8593static enum memmodel
8594omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
8595{
8596  switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
8597    {
8598    case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
8599      switch (mo & OMP_MEMORY_ORDER_MASK)
8600	{
8601	case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8602	case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8603	case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
8604	case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
8605	case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8606	default: break;
8607	}
8608      gcc_unreachable ();
8609    case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8610    case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8611    case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8612    default: gcc_unreachable ();
8613    }
8614}
8615
8616/* Translate enum omp_memory_order to enum memmodel.  The two enums
8617   are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8618   is 0 and omp_memory_order has the fail mode encoded in it too.  */
8619
8620static enum memmodel
8621omp_memory_order_to_memmodel (enum omp_memory_order mo)
8622{
8623  enum memmodel ret, fail_ret;
8624  switch (mo & OMP_MEMORY_ORDER_MASK)
8625    {
8626    case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
8627    case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
8628    case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
8629    case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
8630    case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
8631    default: gcc_unreachable ();
8632    }
8633  /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
8634     we can just return ret here unconditionally.  Otherwise, work around
8635     it here and make sure fail memmodel is not stronger.  */
8636  if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
8637    return ret;
8638  fail_ret = omp_memory_order_to_fail_memmodel (mo);
8639  if (fail_ret > ret)
8640    return fail_ret;
8641  return ret;
8642}
8643
8644/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8645   operation as a normal volatile load.  */
8646
8647static bool
8648expand_omp_atomic_load (basic_block load_bb, tree addr,
8649			tree loaded_val, int index)
8650{
8651  enum built_in_function tmpbase;
8652  gimple_stmt_iterator gsi;
8653  basic_block store_bb;
8654  location_t loc;
8655  gimple *stmt;
8656  tree decl, call, type, itype;
8657
8658  gsi = gsi_last_nondebug_bb (load_bb);
8659  stmt = gsi_stmt (gsi);
8660  gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8661  loc = gimple_location (stmt);
8662
8663  /* ??? If the target does not implement atomic_load_optab[mode], and mode
8664     is smaller than word size, then expand_atomic_load assumes that the load
8665     is atomic.  We could avoid the builtin entirely in this case.  */
8666
8667  tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8668  decl = builtin_decl_explicit (tmpbase);
8669  if (decl == NULL_TREE)
8670    return false;
8671
8672  type = TREE_TYPE (loaded_val);
8673  itype = TREE_TYPE (TREE_TYPE (decl));
8674
8675  enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8676  tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8677  call = build_call_expr_loc (loc, decl, 2, addr, mo);
8678  if (!useless_type_conversion_p (type, itype))
8679    call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8680  call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8681
8682  force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8683  gsi_remove (&gsi, true);
8684
8685  store_bb = single_succ (load_bb);
8686  gsi = gsi_last_nondebug_bb (store_bb);
8687  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8688  gsi_remove (&gsi, true);
8689
8690  if (gimple_in_ssa_p (cfun))
8691    update_ssa (TODO_update_ssa_no_phi);
8692
8693  return true;
8694}
8695
8696/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8697   operation as a normal volatile store.  */
8698
8699static bool
8700expand_omp_atomic_store (basic_block load_bb, tree addr,
8701			 tree loaded_val, tree stored_val, int index)
8702{
8703  enum built_in_function tmpbase;
8704  gimple_stmt_iterator gsi;
8705  basic_block store_bb = single_succ (load_bb);
8706  location_t loc;
8707  gimple *stmt;
8708  tree decl, call, type, itype;
8709  machine_mode imode;
8710  bool exchange;
8711
8712  gsi = gsi_last_nondebug_bb (load_bb);
8713  stmt = gsi_stmt (gsi);
8714  gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8715
8716  /* If the load value is needed, then this isn't a store but an exchange.  */
8717  exchange = gimple_omp_atomic_need_value_p (stmt);
8718
8719  gsi = gsi_last_nondebug_bb (store_bb);
8720  stmt = gsi_stmt (gsi);
8721  gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8722  loc = gimple_location (stmt);
8723
8724  /* ??? If the target does not implement atomic_store_optab[mode], and mode
8725     is smaller than word size, then expand_atomic_store assumes that the store
8726     is atomic.  We could avoid the builtin entirely in this case.  */
8727
8728  tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8729  tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8730  decl = builtin_decl_explicit (tmpbase);
8731  if (decl == NULL_TREE)
8732    return false;
8733
8734  type = TREE_TYPE (stored_val);
8735
8736  /* Dig out the type of the function's second argument.  */
8737  itype = TREE_TYPE (decl);
8738  itype = TYPE_ARG_TYPES (itype);
8739  itype = TREE_CHAIN (itype);
8740  itype = TREE_VALUE (itype);
8741  imode = TYPE_MODE (itype);
8742
8743  if (exchange && !can_atomic_exchange_p (imode, true))
8744    return false;
8745
8746  if (!useless_type_conversion_p (itype, type))
8747    stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8748  enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8749  tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8750  call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8751  if (exchange)
8752    {
8753      if (!useless_type_conversion_p (type, itype))
8754	call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8755      call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8756    }
8757
8758  force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8759  gsi_remove (&gsi, true);
8760
8761  /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above.  */
8762  gsi = gsi_last_nondebug_bb (load_bb);
8763  gsi_remove (&gsi, true);
8764
8765  if (gimple_in_ssa_p (cfun))
8766    update_ssa (TODO_update_ssa_no_phi);
8767
8768  return true;
8769}
8770
8771/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8772   operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
8773   size of the data type, and thus usable to find the index of the builtin
8774   decl.  Returns false if the expression is not of the proper form.  */
8775
8776static bool
8777expand_omp_atomic_fetch_op (basic_block load_bb,
8778			    tree addr, tree loaded_val,
8779			    tree stored_val, int index)
8780{
8781  enum built_in_function oldbase, newbase, tmpbase;
8782  tree decl, itype, call;
8783  tree lhs, rhs;
8784  basic_block store_bb = single_succ (load_bb);
8785  gimple_stmt_iterator gsi;
8786  gimple *stmt;
8787  location_t loc;
8788  enum tree_code code;
8789  bool need_old, need_new;
8790  machine_mode imode;
8791
8792  /* We expect to find the following sequences:
8793
8794   load_bb:
8795       GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8796
8797   store_bb:
8798       val = tmp OP something; (or: something OP tmp)
8799       GIMPLE_OMP_STORE (val)
8800
8801  ???FIXME: Allow a more flexible sequence.
8802  Perhaps use data flow to pick the statements.
8803
8804  */
8805
8806  gsi = gsi_after_labels (store_bb);
8807  stmt = gsi_stmt (gsi);
8808  if (is_gimple_debug (stmt))
8809    {
8810      gsi_next_nondebug (&gsi);
8811      if (gsi_end_p (gsi))
8812	return false;
8813      stmt = gsi_stmt (gsi);
8814    }
8815  loc = gimple_location (stmt);
8816  if (!is_gimple_assign (stmt))
8817    return false;
8818  gsi_next_nondebug (&gsi);
8819  if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8820    return false;
8821  need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8822  need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8823  enum omp_memory_order omo
8824    = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8825  enum memmodel mo = omp_memory_order_to_memmodel (omo);
8826  gcc_checking_assert (!need_old || !need_new);
8827
8828  if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8829    return false;
8830
8831  /* Check for one of the supported fetch-op operations.  */
8832  code = gimple_assign_rhs_code (stmt);
8833  switch (code)
8834    {
8835    case PLUS_EXPR:
8836    case POINTER_PLUS_EXPR:
8837      oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8838      newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8839      break;
8840    case MINUS_EXPR:
8841      oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8842      newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8843      break;
8844    case BIT_AND_EXPR:
8845      oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8846      newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8847      break;
8848    case BIT_IOR_EXPR:
8849      oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8850      newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8851      break;
8852    case BIT_XOR_EXPR:
8853      oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8854      newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8855      break;
8856    default:
8857      return false;
8858    }
8859
8860  /* Make sure the expression is of the proper form.  */
8861  if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8862    rhs = gimple_assign_rhs2 (stmt);
8863  else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8864	   && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8865    rhs = gimple_assign_rhs1 (stmt);
8866  else
8867    return false;
8868
8869  tmpbase = ((enum built_in_function)
8870	     ((need_new ? newbase : oldbase) + index + 1));
8871  decl = builtin_decl_explicit (tmpbase);
8872  if (decl == NULL_TREE)
8873    return false;
8874  itype = TREE_TYPE (TREE_TYPE (decl));
8875  imode = TYPE_MODE (itype);
8876
8877  /* We could test all of the various optabs involved, but the fact of the
8878     matter is that (with the exception of i486 vs i586 and xadd) all targets
8879     that support any atomic operaton optab also implements compare-and-swap.
8880     Let optabs.cc take care of expanding any compare-and-swap loop.  */
8881  if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8882    return false;
8883
8884  gsi = gsi_last_nondebug_bb (load_bb);
8885  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8886
8887  /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8888     It only requires that the operation happen atomically.  Thus we can
8889     use the RELAXED memory model.  */
8890  call = build_call_expr_loc (loc, decl, 3, addr,
8891			      fold_convert_loc (loc, itype, rhs),
8892			      build_int_cst (NULL, mo));
8893
8894  if (need_old || need_new)
8895    {
8896      lhs = need_old ? loaded_val : stored_val;
8897      call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8898      call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8899    }
8900  else
8901    call = fold_convert_loc (loc, void_type_node, call);
8902  force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8903  gsi_remove (&gsi, true);
8904
8905  gsi = gsi_last_nondebug_bb (store_bb);
8906  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8907  gsi_remove (&gsi, true);
8908  gsi = gsi_last_nondebug_bb (store_bb);
8909  stmt = gsi_stmt (gsi);
8910  gsi_remove (&gsi, true);
8911
8912  if (gimple_in_ssa_p (cfun))
8913    {
8914      release_defs (stmt);
8915      update_ssa (TODO_update_ssa_no_phi);
8916    }
8917
8918  return true;
8919}
8920
8921/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
8922   compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
8923   Returns false if the expression is not of the proper form.  */
8924
8925static bool
8926expand_omp_atomic_cas (basic_block load_bb, tree addr,
8927		       tree loaded_val, tree stored_val, int index)
8928{
8929  /* We expect to find the following sequences:
8930
8931   load_bb:
8932       GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8933
8934   store_bb:
8935       val = tmp == e ? d : tmp;
8936       GIMPLE_OMP_ATOMIC_STORE (val)
8937
8938     or in store_bb instead:
8939       tmp2 = tmp == e;
8940       val = tmp2 ? d : tmp;
8941       GIMPLE_OMP_ATOMIC_STORE (val)
8942
8943     or:
8944       tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
8945       val = e == tmp3 ? d : tmp;
8946       GIMPLE_OMP_ATOMIC_STORE (val)
8947
8948     etc.  */
8949
8950
8951  basic_block store_bb = single_succ (load_bb);
8952  gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
8953  gimple *store_stmt = gsi_stmt (gsi);
8954  if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
8955    return false;
8956  gsi_prev_nondebug (&gsi);
8957  if (gsi_end_p (gsi))
8958    return false;
8959  gimple *condexpr_stmt = gsi_stmt (gsi);
8960  if (!is_gimple_assign (condexpr_stmt)
8961      || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
8962    return false;
8963  if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
8964    return false;
8965  gimple *cond_stmt = NULL;
8966  gimple *vce_stmt = NULL;
8967  gsi_prev_nondebug (&gsi);
8968  if (!gsi_end_p (gsi))
8969    {
8970      cond_stmt = gsi_stmt (gsi);
8971      if (!is_gimple_assign (cond_stmt))
8972	return false;
8973      if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
8974	{
8975	  gsi_prev_nondebug (&gsi);
8976	  if (!gsi_end_p (gsi))
8977	    {
8978	      vce_stmt = gsi_stmt (gsi);
8979	      if (!is_gimple_assign (vce_stmt)
8980		  || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
8981		return false;
8982	    }
8983	}
8984      else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
8985	std::swap (vce_stmt, cond_stmt);
8986      else
8987	return false;
8988      if (vce_stmt)
8989	{
8990	  tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
8991	  if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
8992	      || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
8993	    return false;
8994	  if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
8995	      || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
8996	      || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
8997				      TYPE_SIZE (TREE_TYPE (loaded_val))))
8998	    return false;
8999	  gsi_prev_nondebug (&gsi);
9000	  if (!gsi_end_p (gsi))
9001	    return false;
9002	}
9003    }
9004  tree cond = gimple_assign_rhs1 (condexpr_stmt);
9005  tree cond_op1, cond_op2;
9006  if (cond_stmt)
9007    {
9008      if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
9009	return false;
9010      cond_op1 = gimple_assign_rhs1 (cond_stmt);
9011      cond_op2 = gimple_assign_rhs2 (cond_stmt);
9012    }
9013  else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
9014    return false;
9015  else
9016    {
9017      cond_op1 = TREE_OPERAND (cond, 0);
9018      cond_op2 = TREE_OPERAND (cond, 1);
9019    }
9020  tree d;
9021  if (TREE_CODE (cond) == NE_EXPR)
9022    {
9023      if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
9024	return false;
9025      d = gimple_assign_rhs3 (condexpr_stmt);
9026    }
9027  else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
9028    return false;
9029  else
9030    d = gimple_assign_rhs2 (condexpr_stmt);
9031  tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
9032  if (operand_equal_p (e, cond_op1))
9033    e = cond_op2;
9034  else if (operand_equal_p (e, cond_op2))
9035    e = cond_op1;
9036  else
9037    return false;
9038
9039  location_t loc = gimple_location (store_stmt);
9040  gimple *load_stmt = last_stmt (load_bb);
9041  bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
9042  bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
9043  bool weak = gimple_omp_atomic_weak_p (load_stmt);
9044  enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
9045  tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9046  tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9047  gcc_checking_assert (!need_old || !need_new);
9048
9049  enum built_in_function fncode
9050    = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9051				+ index + 1);
9052  tree cmpxchg = builtin_decl_explicit (fncode);
9053  if (cmpxchg == NULL_TREE)
9054    return false;
9055  tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9056
9057  if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9058      || !can_atomic_load_p (TYPE_MODE (itype)))
9059    return false;
9060
9061  tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9062  if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
9063    return false;
9064
9065  gsi = gsi_for_stmt (store_stmt);
9066  if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
9067    {
9068      tree ne = create_tmp_reg (itype);
9069      gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
9070      gimple_set_location (g, loc);
9071      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9072      e = ne;
9073    }
9074  if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
9075    {
9076      tree nd = create_tmp_reg (itype);
9077      enum tree_code code;
9078      if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
9079	{
9080	  code = VIEW_CONVERT_EXPR;
9081	  d = build1 (VIEW_CONVERT_EXPR, itype, d);
9082	}
9083      else
9084	code = NOP_EXPR;
9085      gimple *g = gimple_build_assign (nd, code, d);
9086      gimple_set_location (g, loc);
9087      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9088      d = nd;
9089    }
9090
9091  tree ctype = build_complex_type (itype);
9092  int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
9093  gimple *g
9094    = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
9095				  build_int_cst (integer_type_node, flag),
9096				  mo, fmo);
9097  tree cres = create_tmp_reg (ctype);
9098  gimple_call_set_lhs (g, cres);
9099  gimple_set_location (g, loc);
9100  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9101
9102  if (cond_stmt || need_old || need_new)
9103    {
9104      tree im = create_tmp_reg (itype);
9105      g = gimple_build_assign (im, IMAGPART_EXPR,
9106			       build1 (IMAGPART_EXPR, itype, cres));
9107      gimple_set_location (g, loc);
9108      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9109
9110      tree re = NULL_TREE;
9111      if (need_old || need_new)
9112	{
9113	  re = create_tmp_reg (itype);
9114	  g = gimple_build_assign (re, REALPART_EXPR,
9115				   build1 (REALPART_EXPR, itype, cres));
9116	  gimple_set_location (g, loc);
9117	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9118	}
9119
9120      if (cond_stmt)
9121	{
9122	  g = gimple_build_assign (gimple_assign_lhs (cond_stmt),
9123				   NOP_EXPR, im);
9124	  gimple_set_location (g, loc);
9125	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9126	}
9127      else if (need_new)
9128	{
9129	  g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
9130				   build2 (NE_EXPR, boolean_type_node,
9131					   im, build_zero_cst (itype)),
9132				   d, re);
9133	  gimple_set_location (g, loc);
9134	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9135	  re = gimple_assign_lhs (g);
9136	}
9137
9138      if (need_old || need_new)
9139	{
9140	  tree v = need_old ? loaded_val : stored_val;
9141	  enum tree_code code;
9142	  if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
9143	    {
9144	      code = VIEW_CONVERT_EXPR;
9145	      re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
9146	    }
9147	  else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
9148	    code = NOP_EXPR;
9149	  else
9150	    code = TREE_CODE (re);
9151	  g = gimple_build_assign (v, code, re);
9152	  gimple_set_location (g, loc);
9153	  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9154	}
9155    }
9156
9157  gsi_remove (&gsi, true);
9158  gsi = gsi_for_stmt (load_stmt);
9159  gsi_remove (&gsi, true);
9160  gsi = gsi_for_stmt (condexpr_stmt);
9161  gsi_remove (&gsi, true);
9162  if (cond_stmt)
9163    {
9164      gsi = gsi_for_stmt (cond_stmt);
9165      gsi_remove (&gsi, true);
9166    }
9167  if (vce_stmt)
9168    {
9169      gsi = gsi_for_stmt (vce_stmt);
9170      gsi_remove (&gsi, true);
9171    }
9172
9173  return true;
9174}
9175
9176/* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
9177
9178      oldval = *addr;
9179      repeat:
9180	newval = rhs;	 // with oldval replacing *addr in rhs
9181	oldval = __sync_val_compare_and_swap (addr, oldval, newval);
9182	if (oldval != newval)
9183	  goto repeat;
9184
9185   INDEX is log2 of the size of the data type, and thus usable to find the
9186   index of the builtin decl.  */
9187
9188static bool
9189expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
9190			    tree addr, tree loaded_val, tree stored_val,
9191			    int index)
9192{
9193  tree loadedi, storedi, initial, new_storedi, old_vali;
9194  tree type, itype, cmpxchg, iaddr, atype;
9195  gimple_stmt_iterator si;
9196  basic_block loop_header = single_succ (load_bb);
9197  gimple *phi, *stmt;
9198  edge e;
9199  enum built_in_function fncode;
9200
9201  fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
9202				    + index + 1);
9203  cmpxchg = builtin_decl_explicit (fncode);
9204  if (cmpxchg == NULL_TREE)
9205    return false;
9206  type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9207  atype = type;
9208  itype = TREE_TYPE (TREE_TYPE (cmpxchg));
9209
9210  if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
9211      || !can_atomic_load_p (TYPE_MODE (itype)))
9212    return false;
9213
9214  /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
9215  si = gsi_last_nondebug_bb (load_bb);
9216  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9217  location_t loc = gimple_location (gsi_stmt (si));
9218  enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
9219  tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
9220  tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
9221
9222  /* For floating-point values, we'll need to view-convert them to integers
9223     so that we can perform the atomic compare and swap.  Simplify the
9224     following code by always setting up the "i"ntegral variables.  */
9225  if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
9226    {
9227      tree iaddr_val;
9228
9229      iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
9230							   true));
9231      atype = itype;
9232      iaddr_val
9233	= force_gimple_operand_gsi (&si,
9234				    fold_convert (TREE_TYPE (iaddr), addr),
9235				    false, NULL_TREE, true, GSI_SAME_STMT);
9236      stmt = gimple_build_assign (iaddr, iaddr_val);
9237      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9238      loadedi = create_tmp_var (itype);
9239      if (gimple_in_ssa_p (cfun))
9240	loadedi = make_ssa_name (loadedi);
9241    }
9242  else
9243    {
9244      iaddr = addr;
9245      loadedi = loaded_val;
9246    }
9247
9248  fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
9249  tree loaddecl = builtin_decl_explicit (fncode);
9250  if (loaddecl)
9251    initial
9252      = fold_convert (atype,
9253		      build_call_expr (loaddecl, 2, iaddr,
9254				       build_int_cst (NULL_TREE,
9255						      MEMMODEL_RELAXED)));
9256  else
9257    {
9258      tree off
9259	= build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
9260						      true), 0);
9261      initial = build2 (MEM_REF, atype, iaddr, off);
9262    }
9263
9264  initial
9265    = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
9266				GSI_SAME_STMT);
9267
9268  /* Move the value to the LOADEDI temporary.  */
9269  if (gimple_in_ssa_p (cfun))
9270    {
9271      gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
9272      phi = create_phi_node (loadedi, loop_header);
9273      SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
9274	       initial);
9275    }
9276  else
9277    gsi_insert_before (&si,
9278		       gimple_build_assign (loadedi, initial),
9279		       GSI_SAME_STMT);
9280  if (loadedi != loaded_val)
9281    {
9282      gimple_stmt_iterator gsi2;
9283      tree x;
9284
9285      x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
9286      gsi2 = gsi_start_bb (loop_header);
9287      if (gimple_in_ssa_p (cfun))
9288	{
9289	  gassign *stmt;
9290	  x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9291					true, GSI_SAME_STMT);
9292	  stmt = gimple_build_assign (loaded_val, x);
9293	  gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
9294	}
9295      else
9296	{
9297	  x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
9298	  force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
9299				    true, GSI_SAME_STMT);
9300	}
9301    }
9302  gsi_remove (&si, true);
9303
9304  si = gsi_last_nondebug_bb (store_bb);
9305  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9306
9307  if (iaddr == addr)
9308    storedi = stored_val;
9309  else
9310    storedi
9311      = force_gimple_operand_gsi (&si,
9312				  build1 (VIEW_CONVERT_EXPR, itype,
9313					  stored_val), true, NULL_TREE, true,
9314				  GSI_SAME_STMT);
9315
9316  /* Build the compare&swap statement.  */
9317  tree ctype = build_complex_type (itype);
9318  int flag = int_size_in_bytes (itype);
9319  new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
9320					      ctype, 6, iaddr, loadedi,
9321					      storedi,
9322					      build_int_cst (integer_type_node,
9323							     flag),
9324					      mo, fmo);
9325  new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
9326  new_storedi = force_gimple_operand_gsi (&si,
9327					  fold_convert (TREE_TYPE (loadedi),
9328							new_storedi),
9329					  true, NULL_TREE,
9330					  true, GSI_SAME_STMT);
9331
9332  if (gimple_in_ssa_p (cfun))
9333    old_vali = loadedi;
9334  else
9335    {
9336      old_vali = create_tmp_var (TREE_TYPE (loadedi));
9337      stmt = gimple_build_assign (old_vali, loadedi);
9338      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9339
9340      stmt = gimple_build_assign (loadedi, new_storedi);
9341      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9342    }
9343
9344  /* Note that we always perform the comparison as an integer, even for
9345     floating point.  This allows the atomic operation to properly
9346     succeed even with NaNs and -0.0.  */
9347  tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
9348  stmt = gimple_build_cond_empty (ne);
9349  gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9350
9351  /* Update cfg.  */
9352  e = single_succ_edge (store_bb);
9353  e->flags &= ~EDGE_FALLTHRU;
9354  e->flags |= EDGE_FALSE_VALUE;
9355  /* Expect no looping.  */
9356  e->probability = profile_probability::guessed_always ();
9357
9358  e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
9359  e->probability = profile_probability::guessed_never ();
9360
9361  /* Copy the new value to loadedi (we already did that before the condition
9362     if we are not in SSA).  */
9363  if (gimple_in_ssa_p (cfun))
9364    {
9365      phi = gimple_seq_first_stmt (phi_nodes (loop_header));
9366      SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
9367    }
9368
9369  /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
9370  gsi_remove (&si, true);
9371
9372  class loop *loop = alloc_loop ();
9373  loop->header = loop_header;
9374  loop->latch = store_bb;
9375  add_loop (loop, loop_header->loop_father);
9376
9377  if (gimple_in_ssa_p (cfun))
9378    update_ssa (TODO_update_ssa_no_phi);
9379
9380  return true;
9381}
9382
9383/* A subroutine of expand_omp_atomic.  Implement the atomic operation as:
9384
9385				  GOMP_atomic_start ();
9386				  *addr = rhs;
9387				  GOMP_atomic_end ();
9388
9389   The result is not globally atomic, but works so long as all parallel
9390   references are within #pragma omp atomic directives.  According to
9391   responses received from omp@openmp.org, appears to be within spec.
9392   Which makes sense, since that's how several other compilers handle
9393   this situation as well.
9394   LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
9395   expanding.  STORED_VAL is the operand of the matching
9396   GIMPLE_OMP_ATOMIC_STORE.
9397
9398   We replace
9399   GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
9400   loaded_val = *addr;
9401
9402   and replace
9403   GIMPLE_OMP_ATOMIC_STORE (stored_val)  with
9404   *addr = stored_val;
9405*/
9406
9407static bool
9408expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
9409			 tree addr, tree loaded_val, tree stored_val)
9410{
9411  gimple_stmt_iterator si;
9412  gassign *stmt;
9413  tree t;
9414
9415  si = gsi_last_nondebug_bb (load_bb);
9416  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
9417
9418  t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
9419  t = build_call_expr (t, 0);
9420  force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9421
9422  tree mem = build_simple_mem_ref (addr);
9423  TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9424  TREE_OPERAND (mem, 1)
9425    = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9426						 true),
9427		    TREE_OPERAND (mem, 1));
9428  stmt = gimple_build_assign (loaded_val, mem);
9429  gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9430  gsi_remove (&si, true);
9431
9432  si = gsi_last_nondebug_bb (store_bb);
9433  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9434
9435  stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9436  gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9437
9438  t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9439  t = build_call_expr (t, 0);
9440  force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9441  gsi_remove (&si, true);
9442
9443  if (gimple_in_ssa_p (cfun))
9444    update_ssa (TODO_update_ssa_no_phi);
9445  return true;
9446}
9447
9448/* Expand an GIMPLE_OMP_ATOMIC statement.  We try to expand
9449   using expand_omp_atomic_fetch_op.  If it failed, we try to
9450   call expand_omp_atomic_pipeline, and if it fails too, the
9451   ultimate fallback is wrapping the operation in a mutex
9452   (expand_omp_atomic_mutex).  REGION is the atomic region built
9453   by build_omp_regions_1().  */
9454
9455static void
9456expand_omp_atomic (struct omp_region *region)
9457{
9458  basic_block load_bb = region->entry, store_bb = region->exit;
9459  gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9460  gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9461  tree loaded_val = gimple_omp_atomic_load_lhs (load);
9462  tree addr = gimple_omp_atomic_load_rhs (load);
9463  tree stored_val = gimple_omp_atomic_store_val (store);
9464  tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9465  HOST_WIDE_INT index;
9466
9467  /* Make sure the type is one of the supported sizes.  */
9468  index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9469  index = exact_log2 (index);
9470  if (index >= 0 && index <= 4)
9471    {
9472      unsigned int align = TYPE_ALIGN_UNIT (type);
9473
9474      /* __sync builtins require strict data alignment.  */
9475      if (exact_log2 (align) >= index)
9476	{
9477	  /* Atomic load.  */
9478	  scalar_mode smode;
9479	  if (loaded_val == stored_val
9480	      && (is_int_mode (TYPE_MODE (type), &smode)
9481		  || is_float_mode (TYPE_MODE (type), &smode))
9482	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9483	      && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9484	    return;
9485
9486	  /* Atomic store.  */
9487	  if ((is_int_mode (TYPE_MODE (type), &smode)
9488	       || is_float_mode (TYPE_MODE (type), &smode))
9489	      && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9490	      && store_bb == single_succ (load_bb)
9491	      && first_stmt (store_bb) == store
9492	      && expand_omp_atomic_store (load_bb, addr, loaded_val,
9493					  stored_val, index))
9494	    return;
9495
9496	  /* When possible, use specialized atomic update functions.  */
9497	  if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9498	      && store_bb == single_succ (load_bb)
9499	      && expand_omp_atomic_fetch_op (load_bb, addr,
9500					     loaded_val, stored_val, index))
9501	    return;
9502
9503	  /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop.  */
9504	  if (store_bb == single_succ (load_bb)
9505	      && !gimple_in_ssa_p (cfun)
9506	      && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
9507					index))
9508	    return;
9509
9510	  /* If we don't have specialized __sync builtins, try and implement
9511	     as a compare and swap loop.  */
9512	  if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9513					  loaded_val, stored_val, index))
9514	    return;
9515	}
9516    }
9517
9518  /* The ultimate fallback is wrapping the operation in a mutex.  */
9519  expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9520}
9521
9522/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9523   at REGION_EXIT.  */
9524
9525static void
9526mark_loops_in_oacc_kernels_region (basic_block region_entry,
9527				   basic_block region_exit)
9528{
9529  class loop *outer = region_entry->loop_father;
9530  gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9531
9532  /* Don't parallelize the kernels region if it contains more than one outer
9533     loop.  */
9534  unsigned int nr_outer_loops = 0;
9535  class loop *single_outer = NULL;
9536  for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9537    {
9538      gcc_assert (loop_outer (loop) == outer);
9539
9540      if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9541	continue;
9542
9543      if (region_exit != NULL
9544	  && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9545	continue;
9546
9547      nr_outer_loops++;
9548      single_outer = loop;
9549    }
9550  if (nr_outer_loops != 1)
9551    return;
9552
9553  for (class loop *loop = single_outer->inner;
9554       loop != NULL;
9555       loop = loop->inner)
9556    if (loop->next)
9557      return;
9558
9559  /* Mark the loops in the region.  */
9560  for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9561    loop->in_oacc_kernels_region = true;
9562}
9563
9564/* Build target argument identifier from the DEVICE identifier, value
9565   identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
9566
9567static tree
9568get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9569{
9570  tree t = build_int_cst (integer_type_node, device);
9571  if (subseqent_param)
9572    t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9573		     build_int_cst (integer_type_node,
9574				    GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9575  t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9576		   build_int_cst (integer_type_node, id));
9577  return t;
9578}
9579
9580/* Like above but return it in type that can be directly stored as an element
9581   of the argument array.  */
9582
9583static tree
9584get_target_argument_identifier (int device, bool subseqent_param, int id)
9585{
9586  tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9587  return fold_convert (ptr_type_node, t);
9588}
9589
9590/* Return a target argument consisting of DEVICE identifier, value identifier
9591   ID, and the actual VALUE.  */
9592
9593static tree
9594get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9595			   tree value)
9596{
9597  tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9598			fold_convert (integer_type_node, value),
9599			build_int_cst (unsigned_type_node,
9600				       GOMP_TARGET_ARG_VALUE_SHIFT));
9601  t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9602		   get_target_argument_identifier_1 (device, false, id));
9603  t = fold_convert (ptr_type_node, t);
9604  return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9605}
9606
9607/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9608   push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9609   otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9610   arguments.  */
9611
9612static void
9613push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9614					 int id, tree value, vec <tree> *args)
9615{
9616  if (tree_fits_shwi_p (value)
9617      && tree_to_shwi (value) > -(1 << 15)
9618      && tree_to_shwi (value) < (1 << 15))
9619    args->quick_push (get_target_argument_value (gsi, device, id, value));
9620  else
9621    {
9622      args->quick_push (get_target_argument_identifier (device, true, id));
9623      value = fold_convert (ptr_type_node, value);
9624      value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9625					GSI_SAME_STMT);
9626      args->quick_push (value);
9627    }
9628}
9629
9630/* Create an array of arguments that is then passed to GOMP_target.  */
9631
9632static tree
9633get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9634{
9635  auto_vec <tree, 6> args;
9636  tree clauses = gimple_omp_target_clauses (tgt_stmt);
9637  tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9638  if (c)
9639    t = OMP_CLAUSE_NUM_TEAMS_UPPER_EXPR (c);
9640  else
9641    t = integer_minus_one_node;
9642  push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9643					   GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9644
9645  c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9646  if (c)
9647    t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9648  else
9649    t = integer_minus_one_node;
9650  push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9651					   GOMP_TARGET_ARG_THREAD_LIMIT, t,
9652					   &args);
9653
9654  /* Produce more, perhaps device specific, arguments here.  */
9655
9656  tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9657							  args.length () + 1),
9658				  ".omp_target_args");
9659  for (unsigned i = 0; i < args.length (); i++)
9660    {
9661      tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9662			 build_int_cst (integer_type_node, i),
9663			 NULL_TREE, NULL_TREE);
9664      gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9665			 GSI_SAME_STMT);
9666    }
9667  tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9668		     build_int_cst (integer_type_node, args.length ()),
9669		     NULL_TREE, NULL_TREE);
9670  gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9671		     GSI_SAME_STMT);
9672  TREE_ADDRESSABLE (argarray) = 1;
9673  return build_fold_addr_expr (argarray);
9674}
9675
9676/* Expand the GIMPLE_OMP_TARGET starting at REGION.  */
9677
9678static void
9679expand_omp_target (struct omp_region *region)
9680{
9681  basic_block entry_bb, exit_bb, new_bb;
9682  struct function *child_cfun;
9683  tree child_fn, block, t;
9684  gimple_stmt_iterator gsi;
9685  gomp_target *entry_stmt;
9686  gimple *stmt;
9687  edge e;
9688  bool offloaded;
9689  int target_kind;
9690
9691  entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9692  target_kind = gimple_omp_target_kind (entry_stmt);
9693  new_bb = region->entry;
9694
9695  offloaded = is_gimple_omp_offloaded (entry_stmt);
9696  switch (target_kind)
9697    {
9698    case GF_OMP_TARGET_KIND_REGION:
9699    case GF_OMP_TARGET_KIND_UPDATE:
9700    case GF_OMP_TARGET_KIND_ENTER_DATA:
9701    case GF_OMP_TARGET_KIND_EXIT_DATA:
9702    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9703    case GF_OMP_TARGET_KIND_OACC_KERNELS:
9704    case GF_OMP_TARGET_KIND_OACC_SERIAL:
9705    case GF_OMP_TARGET_KIND_OACC_UPDATE:
9706    case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9707    case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9708    case GF_OMP_TARGET_KIND_OACC_DECLARE:
9709    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9710    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9711    case GF_OMP_TARGET_KIND_DATA:
9712    case GF_OMP_TARGET_KIND_OACC_DATA:
9713    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9714    case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9715      break;
9716    default:
9717      gcc_unreachable ();
9718    }
9719
9720  child_fn = NULL_TREE;
9721  child_cfun = NULL;
9722  if (offloaded)
9723    {
9724      child_fn = gimple_omp_target_child_fn (entry_stmt);
9725      child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9726    }
9727
9728  /* Supported by expand_omp_taskreg, but not here.  */
9729  if (child_cfun != NULL)
9730    gcc_checking_assert (!child_cfun->cfg);
9731  gcc_checking_assert (!gimple_in_ssa_p (cfun));
9732
9733  entry_bb = region->entry;
9734  exit_bb = region->exit;
9735
9736  if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9737    mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9738
9739  /* Going on, all OpenACC compute constructs are mapped to
9740     'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9741     To distinguish between them, we attach attributes.  */
9742  switch (target_kind)
9743    {
9744    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9745      DECL_ATTRIBUTES (child_fn)
9746	= tree_cons (get_identifier ("oacc parallel"),
9747		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
9748      break;
9749    case GF_OMP_TARGET_KIND_OACC_KERNELS:
9750      DECL_ATTRIBUTES (child_fn)
9751	= tree_cons (get_identifier ("oacc kernels"),
9752		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
9753      break;
9754    case GF_OMP_TARGET_KIND_OACC_SERIAL:
9755      DECL_ATTRIBUTES (child_fn)
9756	= tree_cons (get_identifier ("oacc serial"),
9757		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
9758      break;
9759    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9760      DECL_ATTRIBUTES (child_fn)
9761	= tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9762		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
9763      break;
9764    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9765      DECL_ATTRIBUTES (child_fn)
9766	= tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9767		     NULL_TREE, DECL_ATTRIBUTES (child_fn));
9768      break;
9769    default:
9770      /* Make sure we don't miss any.  */
9771      gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9772			     && is_gimple_omp_offloaded (entry_stmt)));
9773      break;
9774    }
9775
9776  if (offloaded)
9777    {
9778      unsigned srcidx, dstidx, num;
9779
9780      /* If the offloading region needs data sent from the parent
9781	 function, then the very first statement (except possible
9782	 tree profile counter updates) of the offloading body
9783	 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O.  Since
9784	 &.OMP_DATA_O is passed as an argument to the child function,
9785	 we need to replace it with the argument as seen by the child
9786	 function.
9787
9788	 In most cases, this will end up being the identity assignment
9789	 .OMP_DATA_I = .OMP_DATA_I.  However, if the offloading body had
9790	 a function call that has been inlined, the original PARM_DECL
9791	 .OMP_DATA_I may have been converted into a different local
9792	 variable.  In which case, we need to keep the assignment.  */
9793      tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9794      if (data_arg)
9795	{
9796	  basic_block entry_succ_bb = single_succ (entry_bb);
9797	  gimple_stmt_iterator gsi;
9798	  tree arg;
9799	  gimple *tgtcopy_stmt = NULL;
9800	  tree sender = TREE_VEC_ELT (data_arg, 0);
9801
9802	  for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9803	    {
9804	      gcc_assert (!gsi_end_p (gsi));
9805	      stmt = gsi_stmt (gsi);
9806	      if (gimple_code (stmt) != GIMPLE_ASSIGN)
9807		continue;
9808
9809	      if (gimple_num_ops (stmt) == 2)
9810		{
9811		  tree arg = gimple_assign_rhs1 (stmt);
9812
9813		  /* We're ignoring the subcode because we're
9814		     effectively doing a STRIP_NOPS.  */
9815
9816		  if (TREE_CODE (arg) == ADDR_EXPR
9817		      && TREE_OPERAND (arg, 0) == sender)
9818		    {
9819		      tgtcopy_stmt = stmt;
9820		      break;
9821		    }
9822		}
9823	    }
9824
9825	  gcc_assert (tgtcopy_stmt != NULL);
9826	  arg = DECL_ARGUMENTS (child_fn);
9827
9828	  gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9829	  gsi_remove (&gsi, true);
9830	}
9831
9832      /* Declare local variables needed in CHILD_CFUN.  */
9833      block = DECL_INITIAL (child_fn);
9834      BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9835      /* The gimplifier could record temporaries in the offloading block
9836	 rather than in containing function's local_decls chain,
9837	 which would mean cgraph missed finalizing them.  Do it now.  */
9838      for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9839	if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9840	  varpool_node::finalize_decl (t);
9841      DECL_SAVED_TREE (child_fn) = NULL;
9842      /* We'll create a CFG for child_fn, so no gimple body is needed.  */
9843      gimple_set_body (child_fn, NULL);
9844      TREE_USED (block) = 1;
9845
9846      /* Reset DECL_CONTEXT on function arguments.  */
9847      for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9848	DECL_CONTEXT (t) = child_fn;
9849
9850      /* Split ENTRY_BB at GIMPLE_*,
9851	 so that it can be moved to the child function.  */
9852      gsi = gsi_last_nondebug_bb (entry_bb);
9853      stmt = gsi_stmt (gsi);
9854      gcc_assert (stmt
9855		  && gimple_code (stmt) == gimple_code (entry_stmt));
9856      e = split_block (entry_bb, stmt);
9857      gsi_remove (&gsi, true);
9858      entry_bb = e->dest;
9859      single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9860
9861      /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR.  */
9862      if (exit_bb)
9863	{
9864	  gsi = gsi_last_nondebug_bb (exit_bb);
9865	  gcc_assert (!gsi_end_p (gsi)
9866		      && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9867	  stmt = gimple_build_return (NULL);
9868	  gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9869	  gsi_remove (&gsi, true);
9870	}
9871
9872      /* Move the offloading region into CHILD_CFUN.  */
9873
9874      block = gimple_block (entry_stmt);
9875
9876      new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9877      if (exit_bb)
9878	single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9879      /* When the OMP expansion process cannot guarantee an up-to-date
9880	 loop tree arrange for the child function to fixup loops.  */
9881      if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9882	child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9883
9884      /* Remove non-local VAR_DECLs from child_cfun->local_decls list.  */
9885      num = vec_safe_length (child_cfun->local_decls);
9886      for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9887	{
9888	  t = (*child_cfun->local_decls)[srcidx];
9889	  if (DECL_CONTEXT (t) == cfun->decl)
9890	    continue;
9891	  if (srcidx != dstidx)
9892	    (*child_cfun->local_decls)[dstidx] = t;
9893	  dstidx++;
9894	}
9895      if (dstidx != num)
9896	vec_safe_truncate (child_cfun->local_decls, dstidx);
9897
9898      /* Inform the callgraph about the new function.  */
9899      child_cfun->curr_properties = cfun->curr_properties;
9900      child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9901      child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9902      cgraph_node *node = cgraph_node::get_create (child_fn);
9903      node->parallelized_function = 1;
9904      cgraph_node::add_new_function (child_fn, true);
9905
9906      /* Add the new function to the offload table.  */
9907      if (ENABLE_OFFLOADING)
9908	{
9909	  if (in_lto_p)
9910	    DECL_PRESERVE_P (child_fn) = 1;
9911	  vec_safe_push (offload_funcs, child_fn);
9912	}
9913
9914      bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9915		      && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9916
9917      /* Fix the callgraph edges for child_cfun.  Those for cfun will be
9918	 fixed in a following pass.  */
9919      push_cfun (child_cfun);
9920      if (need_asm)
9921	assign_assembler_name_if_needed (child_fn);
9922      cgraph_edge::rebuild_edges ();
9923
9924      /* Some EH regions might become dead, see PR34608.  If
9925	 pass_cleanup_cfg isn't the first pass to happen with the
9926	 new child, these dead EH edges might cause problems.
9927	 Clean them up now.  */
9928      if (flag_exceptions)
9929	{
9930	  basic_block bb;
9931	  bool changed = false;
9932
9933	  FOR_EACH_BB_FN (bb, cfun)
9934	    changed |= gimple_purge_dead_eh_edges (bb);
9935	  if (changed)
9936	    cleanup_tree_cfg ();
9937	}
9938      if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9939	verify_loop_structure ();
9940      pop_cfun ();
9941
9942      if (dump_file && !gimple_in_ssa_p (cfun))
9943	{
9944	  omp_any_child_fn_dumped = true;
9945	  dump_function_header (dump_file, child_fn, dump_flags);
9946	  dump_function_to_file (child_fn, dump_file, dump_flags);
9947	}
9948
9949      adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9950    }
9951
9952  /* Emit a library call to launch the offloading region, or do data
9953     transfers.  */
9954  tree t1, t2, t3, t4, depend, c, clauses;
9955  enum built_in_function start_ix;
9956  unsigned int flags_i = 0;
9957
9958  switch (gimple_omp_target_kind (entry_stmt))
9959    {
9960    case GF_OMP_TARGET_KIND_REGION:
9961      start_ix = BUILT_IN_GOMP_TARGET;
9962      break;
9963    case GF_OMP_TARGET_KIND_DATA:
9964      start_ix = BUILT_IN_GOMP_TARGET_DATA;
9965      break;
9966    case GF_OMP_TARGET_KIND_UPDATE:
9967      start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9968      break;
9969    case GF_OMP_TARGET_KIND_ENTER_DATA:
9970      start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9971      break;
9972    case GF_OMP_TARGET_KIND_EXIT_DATA:
9973      start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9974      flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9975      break;
9976    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9977    case GF_OMP_TARGET_KIND_OACC_KERNELS:
9978    case GF_OMP_TARGET_KIND_OACC_SERIAL:
9979    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9980    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9981      start_ix = BUILT_IN_GOACC_PARALLEL;
9982      break;
9983    case GF_OMP_TARGET_KIND_OACC_DATA:
9984    case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9985    case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9986      start_ix = BUILT_IN_GOACC_DATA_START;
9987      break;
9988    case GF_OMP_TARGET_KIND_OACC_UPDATE:
9989      start_ix = BUILT_IN_GOACC_UPDATE;
9990      break;
9991    case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
9992      start_ix = BUILT_IN_GOACC_ENTER_DATA;
9993      break;
9994    case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
9995      start_ix = BUILT_IN_GOACC_EXIT_DATA;
9996      break;
9997    case GF_OMP_TARGET_KIND_OACC_DECLARE:
9998      start_ix = BUILT_IN_GOACC_DECLARE;
9999      break;
10000    default:
10001      gcc_unreachable ();
10002    }
10003
10004  clauses = gimple_omp_target_clauses (entry_stmt);
10005
10006  tree device = NULL_TREE;
10007  location_t device_loc = UNKNOWN_LOCATION;
10008  tree goacc_flags = NULL_TREE;
10009  if (is_gimple_omp_oacc (entry_stmt))
10010    {
10011      /* By default, no GOACC_FLAGs are set.  */
10012      goacc_flags = integer_zero_node;
10013    }
10014  else
10015    {
10016      c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
10017      if (c)
10018	{
10019	  device = OMP_CLAUSE_DEVICE_ID (c);
10020	  device_loc = OMP_CLAUSE_LOCATION (c);
10021	  if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
10022	    sorry_at (device_loc, "%<ancestor%> not yet supported");
10023	}
10024      else
10025	{
10026	  /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
10027	     library choose).  */
10028	  device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
10029	  device_loc = gimple_location (entry_stmt);
10030	}
10031
10032      c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
10033      /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
10034	 nowait doesn't appear.  */
10035      if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
10036	c = NULL;
10037      if (c)
10038	flags_i |= GOMP_TARGET_FLAG_NOWAIT;
10039    }
10040
10041  /* By default, there is no conditional.  */
10042  tree cond = NULL_TREE;
10043  c = omp_find_clause (clauses, OMP_CLAUSE_IF);
10044  if (c)
10045    cond = OMP_CLAUSE_IF_EXPR (c);
10046  /* If we found the clause 'if (cond)', build:
10047     OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
10048     OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
10049  if (cond)
10050    {
10051      tree *tp;
10052      if (is_gimple_omp_oacc (entry_stmt))
10053	tp = &goacc_flags;
10054      else
10055	{
10056	  /* Ensure 'device' is of the correct type.  */
10057	  device = fold_convert_loc (device_loc, integer_type_node, device);
10058
10059	  tp = &device;
10060	}
10061
10062      cond = gimple_boolify (cond);
10063
10064      basic_block cond_bb, then_bb, else_bb;
10065      edge e;
10066      tree tmp_var;
10067
10068      tmp_var = create_tmp_var (TREE_TYPE (*tp));
10069      if (offloaded)
10070	e = split_block_after_labels (new_bb);
10071      else
10072	{
10073	  gsi = gsi_last_nondebug_bb (new_bb);
10074	  gsi_prev (&gsi);
10075	  e = split_block (new_bb, gsi_stmt (gsi));
10076	}
10077      cond_bb = e->src;
10078      new_bb = e->dest;
10079      remove_edge (e);
10080
10081      then_bb = create_empty_bb (cond_bb);
10082      else_bb = create_empty_bb (then_bb);
10083      set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
10084      set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
10085
10086      stmt = gimple_build_cond_empty (cond);
10087      gsi = gsi_last_bb (cond_bb);
10088      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10089
10090      gsi = gsi_start_bb (then_bb);
10091      stmt = gimple_build_assign (tmp_var, *tp);
10092      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10093
10094      gsi = gsi_start_bb (else_bb);
10095      if (is_gimple_omp_oacc (entry_stmt))
10096	stmt = gimple_build_assign (tmp_var,
10097				    BIT_IOR_EXPR,
10098				    *tp,
10099				    build_int_cst (integer_type_node,
10100						   GOACC_FLAG_HOST_FALLBACK));
10101      else
10102	stmt = gimple_build_assign (tmp_var,
10103				    build_int_cst (integer_type_node,
10104						   GOMP_DEVICE_HOST_FALLBACK));
10105      gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
10106
10107      make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
10108      make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
10109      add_bb_to_loop (then_bb, cond_bb->loop_father);
10110      add_bb_to_loop (else_bb, cond_bb->loop_father);
10111      make_edge (then_bb, new_bb, EDGE_FALLTHRU);
10112      make_edge (else_bb, new_bb, EDGE_FALLTHRU);
10113
10114      *tp = tmp_var;
10115
10116      gsi = gsi_last_nondebug_bb (new_bb);
10117    }
10118  else
10119    {
10120      gsi = gsi_last_nondebug_bb (new_bb);
10121
10122      if (device != NULL_TREE)
10123	device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
10124					   true, GSI_SAME_STMT);
10125    }
10126
10127  t = gimple_omp_target_data_arg (entry_stmt);
10128  if (t == NULL)
10129    {
10130      t1 = size_zero_node;
10131      t2 = build_zero_cst (ptr_type_node);
10132      t3 = t2;
10133      t4 = t2;
10134    }
10135  else
10136    {
10137      t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
10138      t1 = size_binop (PLUS_EXPR, t1, size_int (1));
10139      t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
10140      t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
10141      t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
10142    }
10143
10144  gimple *g;
10145  bool tagging = false;
10146  /* The maximum number used by any start_ix, without varargs.  */
10147  auto_vec<tree, 11> args;
10148  if (is_gimple_omp_oacc (entry_stmt))
10149    {
10150      tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
10151					TREE_TYPE (goacc_flags), goacc_flags);
10152      goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
10153						NULL_TREE, true,
10154						GSI_SAME_STMT);
10155      args.quick_push (goacc_flags_m);
10156    }
10157  else
10158    args.quick_push (device);
10159  if (offloaded)
10160    args.quick_push (build_fold_addr_expr (child_fn));
10161  args.quick_push (t1);
10162  args.quick_push (t2);
10163  args.quick_push (t3);
10164  args.quick_push (t4);
10165  switch (start_ix)
10166    {
10167    case BUILT_IN_GOACC_DATA_START:
10168    case BUILT_IN_GOACC_DECLARE:
10169    case BUILT_IN_GOMP_TARGET_DATA:
10170      break;
10171    case BUILT_IN_GOMP_TARGET:
10172    case BUILT_IN_GOMP_TARGET_UPDATE:
10173    case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
10174      args.quick_push (build_int_cst (unsigned_type_node, flags_i));
10175      c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
10176      if (c)
10177	depend = OMP_CLAUSE_DECL (c);
10178      else
10179	depend = build_int_cst (ptr_type_node, 0);
10180      args.quick_push (depend);
10181      if (start_ix == BUILT_IN_GOMP_TARGET)
10182	args.quick_push (get_target_arguments (&gsi, entry_stmt));
10183      break;
10184    case BUILT_IN_GOACC_PARALLEL:
10185      if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
10186	{
10187	  tree dims = NULL_TREE;
10188	  unsigned int ix;
10189
10190	  /* For serial constructs we set all dimensions to 1.  */
10191	  for (ix = GOMP_DIM_MAX; ix--;)
10192	    dims = tree_cons (NULL_TREE, integer_one_node, dims);
10193	  oacc_replace_fn_attrib (child_fn, dims);
10194	}
10195      else
10196	oacc_set_fn_attrib (child_fn, clauses, &args);
10197      tagging = true;
10198      /* FALLTHRU */
10199    case BUILT_IN_GOACC_ENTER_DATA:
10200    case BUILT_IN_GOACC_EXIT_DATA:
10201    case BUILT_IN_GOACC_UPDATE:
10202      {
10203	tree t_async = NULL_TREE;
10204
10205	/* If present, use the value specified by the respective
10206	   clause, making sure that is of the correct type.  */
10207	c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
10208	if (c)
10209	  t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10210				      integer_type_node,
10211				      OMP_CLAUSE_ASYNC_EXPR (c));
10212	else if (!tagging)
10213	  /* Default values for t_async.  */
10214	  t_async = fold_convert_loc (gimple_location (entry_stmt),
10215				      integer_type_node,
10216				      build_int_cst (integer_type_node,
10217						     GOMP_ASYNC_SYNC));
10218	if (tagging && t_async)
10219	  {
10220	    unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
10221
10222	    if (TREE_CODE (t_async) == INTEGER_CST)
10223	      {
10224		/* See if we can pack the async arg in to the tag's
10225		   operand.  */
10226		i_async = TREE_INT_CST_LOW (t_async);
10227		if (i_async < GOMP_LAUNCH_OP_MAX)
10228		  t_async = NULL_TREE;
10229		else
10230		  i_async = GOMP_LAUNCH_OP_MAX;
10231	      }
10232	    args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
10233					      i_async));
10234	  }
10235	if (t_async)
10236	  args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
10237						    NULL_TREE, true,
10238						    GSI_SAME_STMT));
10239
10240	/* Save the argument index, and ... */
10241	unsigned t_wait_idx = args.length ();
10242	unsigned num_waits = 0;
10243	c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
10244	if (!tagging || c)
10245	  /* ... push a placeholder.  */
10246	  args.safe_push (integer_zero_node);
10247
10248	for (; c; c = OMP_CLAUSE_CHAIN (c))
10249	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
10250	    {
10251	      tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
10252					   integer_type_node,
10253					   OMP_CLAUSE_WAIT_EXPR (c));
10254	      arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
10255					      GSI_SAME_STMT);
10256	      args.safe_push (arg);
10257	      num_waits++;
10258	    }
10259
10260	if (!tagging || num_waits)
10261	  {
10262	    tree len;
10263
10264	    /* Now that we know the number, update the placeholder.  */
10265	    if (tagging)
10266	      len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
10267	    else
10268	      len = build_int_cst (integer_type_node, num_waits);
10269	    len = fold_convert_loc (gimple_location (entry_stmt),
10270				    unsigned_type_node, len);
10271	    args[t_wait_idx] = len;
10272	  }
10273      }
10274      break;
10275    default:
10276      gcc_unreachable ();
10277    }
10278  if (tagging)
10279    /*  Push terminal marker - zero.  */
10280    args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
10281
10282  g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
10283  gimple_set_location (g, gimple_location (entry_stmt));
10284  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
10285  if (!offloaded)
10286    {
10287      g = gsi_stmt (gsi);
10288      gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
10289      gsi_remove (&gsi, true);
10290    }
10291}
10292
10293/* Expand the parallel region tree rooted at REGION.  Expansion
10294   proceeds in depth-first order.  Innermost regions are expanded
10295   first.  This way, parallel regions that require a new function to
10296   be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
10297   internal dependencies in their body.  */
10298
10299static void
10300expand_omp (struct omp_region *region)
10301{
10302  omp_any_child_fn_dumped = false;
10303  while (region)
10304    {
10305      location_t saved_location;
10306      gimple *inner_stmt = NULL;
10307
10308      /* First, determine whether this is a combined parallel+workshare
10309	 region.  */
10310      if (region->type == GIMPLE_OMP_PARALLEL)
10311	determine_parallel_type (region);
10312
10313      if (region->type == GIMPLE_OMP_FOR
10314	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
10315	inner_stmt = last_stmt (region->inner->entry);
10316
10317      if (region->inner)
10318	expand_omp (region->inner);
10319
10320      saved_location = input_location;
10321      if (gimple_has_location (last_stmt (region->entry)))
10322	input_location = gimple_location (last_stmt (region->entry));
10323
10324      switch (region->type)
10325	{
10326	case GIMPLE_OMP_PARALLEL:
10327	case GIMPLE_OMP_TASK:
10328	  expand_omp_taskreg (region);
10329	  break;
10330
10331	case GIMPLE_OMP_FOR:
10332	  expand_omp_for (region, inner_stmt);
10333	  break;
10334
10335	case GIMPLE_OMP_SECTIONS:
10336	  expand_omp_sections (region);
10337	  break;
10338
10339	case GIMPLE_OMP_SECTION:
10340	  /* Individual omp sections are handled together with their
10341	     parent GIMPLE_OMP_SECTIONS region.  */
10342	  break;
10343
10344	case GIMPLE_OMP_SINGLE:
10345	case GIMPLE_OMP_SCOPE:
10346	  expand_omp_single (region);
10347	  break;
10348
10349	case GIMPLE_OMP_ORDERED:
10350	  {
10351	    gomp_ordered *ord_stmt
10352	      = as_a <gomp_ordered *> (last_stmt (region->entry));
10353	    if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
10354				 OMP_CLAUSE_DEPEND))
10355	      {
10356		/* We'll expand these when expanding corresponding
10357		   worksharing region with ordered(n) clause.  */
10358		gcc_assert (region->outer
10359			    && region->outer->type == GIMPLE_OMP_FOR);
10360		region->ord_stmt = ord_stmt;
10361		break;
10362	      }
10363	  }
10364	  /* FALLTHRU */
10365	case GIMPLE_OMP_MASTER:
10366	case GIMPLE_OMP_MASKED:
10367	case GIMPLE_OMP_TASKGROUP:
10368	case GIMPLE_OMP_CRITICAL:
10369	case GIMPLE_OMP_TEAMS:
10370	  expand_omp_synch (region);
10371	  break;
10372
10373	case GIMPLE_OMP_ATOMIC_LOAD:
10374	  expand_omp_atomic (region);
10375	  break;
10376
10377	case GIMPLE_OMP_TARGET:
10378	  expand_omp_target (region);
10379	  break;
10380
10381	default:
10382	  gcc_unreachable ();
10383	}
10384
10385      input_location = saved_location;
10386      region = region->next;
10387    }
10388  if (omp_any_child_fn_dumped)
10389    {
10390      if (dump_file)
10391	dump_function_header (dump_file, current_function_decl, dump_flags);
10392      omp_any_child_fn_dumped = false;
10393    }
10394}
10395
10396/* Helper for build_omp_regions.  Scan the dominator tree starting at
10397   block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
10398   true, the function ends once a single tree is built (otherwise, whole
10399   forest of OMP constructs may be built).  */
10400
10401static void
10402build_omp_regions_1 (basic_block bb, struct omp_region *parent,
10403		     bool single_tree)
10404{
10405  gimple_stmt_iterator gsi;
10406  gimple *stmt;
10407  basic_block son;
10408
10409  gsi = gsi_last_nondebug_bb (bb);
10410  if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
10411    {
10412      struct omp_region *region;
10413      enum gimple_code code;
10414
10415      stmt = gsi_stmt (gsi);
10416      code = gimple_code (stmt);
10417      if (code == GIMPLE_OMP_RETURN)
10418	{
10419	  /* STMT is the return point out of region PARENT.  Mark it
10420	     as the exit point and make PARENT the immediately
10421	     enclosing region.  */
10422	  gcc_assert (parent);
10423	  region = parent;
10424	  region->exit = bb;
10425	  parent = parent->outer;
10426	}
10427      else if (code == GIMPLE_OMP_ATOMIC_STORE)
10428	{
10429	  /* GIMPLE_OMP_ATOMIC_STORE is analogous to
10430	     GIMPLE_OMP_RETURN, but matches with
10431	     GIMPLE_OMP_ATOMIC_LOAD.  */
10432	  gcc_assert (parent);
10433	  gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10434	  region = parent;
10435	  region->exit = bb;
10436	  parent = parent->outer;
10437	}
10438      else if (code == GIMPLE_OMP_CONTINUE)
10439	{
10440	  gcc_assert (parent);
10441	  parent->cont = bb;
10442	}
10443      else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10444	{
10445	  /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10446	     GIMPLE_OMP_SECTIONS, and we do nothing for it.  */
10447	}
10448      else
10449	{
10450	  region = new_omp_region (bb, code, parent);
10451	  /* Otherwise...  */
10452	  if (code == GIMPLE_OMP_TARGET)
10453	    {
10454	      switch (gimple_omp_target_kind (stmt))
10455		{
10456		case GF_OMP_TARGET_KIND_REGION:
10457		case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10458		case GF_OMP_TARGET_KIND_OACC_KERNELS:
10459		case GF_OMP_TARGET_KIND_OACC_SERIAL:
10460		case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10461		case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10462		  break;
10463		case GF_OMP_TARGET_KIND_UPDATE:
10464		case GF_OMP_TARGET_KIND_ENTER_DATA:
10465		case GF_OMP_TARGET_KIND_EXIT_DATA:
10466		case GF_OMP_TARGET_KIND_DATA:
10467		case GF_OMP_TARGET_KIND_OACC_DATA:
10468		case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10469		case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10470		case GF_OMP_TARGET_KIND_OACC_UPDATE:
10471		case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10472		case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10473		case GF_OMP_TARGET_KIND_OACC_DECLARE:
10474		  /* ..., other than for those stand-alone directives...
10475		     To be precise, target data isn't stand-alone, but
10476		     gimplifier put the end API call into try finally block
10477		     for it, so omp expansion can treat it as such.  */
10478		  region = NULL;
10479		  break;
10480		default:
10481		  gcc_unreachable ();
10482		}
10483	    }
10484	  else if (code == GIMPLE_OMP_ORDERED
10485		   && omp_find_clause (gimple_omp_ordered_clauses
10486					 (as_a <gomp_ordered *> (stmt)),
10487				       OMP_CLAUSE_DEPEND))
10488	    /* #pragma omp ordered depend is also just a stand-alone
10489	       directive.  */
10490	    region = NULL;
10491	  else if (code == GIMPLE_OMP_TASK
10492		   && gimple_omp_task_taskwait_p (stmt))
10493	    /* #pragma omp taskwait depend(...) is a stand-alone directive.  */
10494	    region = NULL;
10495	  else if (code == GIMPLE_OMP_TASKGROUP)
10496	    /* #pragma omp taskgroup isn't a stand-alone directive, but
10497	       gimplifier put the end API call into try finall block
10498	       for it, so omp expansion can treat it as such.  */
10499	    region = NULL;
10500	  /* ..., this directive becomes the parent for a new region.  */
10501	  if (region)
10502	    parent = region;
10503	}
10504    }
10505
10506  if (single_tree && !parent)
10507    return;
10508
10509  for (son = first_dom_son (CDI_DOMINATORS, bb);
10510       son;
10511       son = next_dom_son (CDI_DOMINATORS, son))
10512    build_omp_regions_1 (son, parent, single_tree);
10513}
10514
10515/* Builds the tree of OMP regions rooted at ROOT, storing it to
10516   root_omp_region.  */
10517
10518static void
10519build_omp_regions_root (basic_block root)
10520{
10521  gcc_assert (root_omp_region == NULL);
10522  build_omp_regions_1 (root, NULL, true);
10523  gcc_assert (root_omp_region != NULL);
10524}
10525
10526/* Expands omp construct (and its subconstructs) starting in HEAD.  */
10527
10528void
10529omp_expand_local (basic_block head)
10530{
10531  build_omp_regions_root (head);
10532  if (dump_file && (dump_flags & TDF_DETAILS))
10533    {
10534      fprintf (dump_file, "\nOMP region tree\n\n");
10535      dump_omp_region (dump_file, root_omp_region, 0);
10536      fprintf (dump_file, "\n");
10537    }
10538
10539  remove_exit_barriers (root_omp_region);
10540  expand_omp (root_omp_region);
10541
10542  omp_free_regions ();
10543}
10544
10545/* Scan the CFG and build a tree of OMP regions.  Return the root of
10546   the OMP region tree.  */
10547
10548static void
10549build_omp_regions (void)
10550{
10551  gcc_assert (root_omp_region == NULL);
10552  calculate_dominance_info (CDI_DOMINATORS);
10553  build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10554}
10555
10556/* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
10557
10558static unsigned int
10559execute_expand_omp (void)
10560{
10561  build_omp_regions ();
10562
10563  if (!root_omp_region)
10564    return 0;
10565
10566  if (dump_file)
10567    {
10568      fprintf (dump_file, "\nOMP region tree\n\n");
10569      dump_omp_region (dump_file, root_omp_region, 0);
10570      fprintf (dump_file, "\n");
10571    }
10572
10573  remove_exit_barriers (root_omp_region);
10574
10575  expand_omp (root_omp_region);
10576
10577  if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10578    verify_loop_structure ();
10579  cleanup_tree_cfg ();
10580
10581  omp_free_regions ();
10582
10583  return 0;
10584}
10585
10586/* OMP expansion -- the default pass, run before creation of SSA form.  */
10587
10588namespace {
10589
10590const pass_data pass_data_expand_omp =
10591{
10592  GIMPLE_PASS, /* type */
10593  "ompexp", /* name */
10594  OPTGROUP_OMP, /* optinfo_flags */
10595  TV_NONE, /* tv_id */
10596  PROP_gimple_any, /* properties_required */
10597  PROP_gimple_eomp, /* properties_provided */
10598  0, /* properties_destroyed */
10599  0, /* todo_flags_start */
10600  0, /* todo_flags_finish */
10601};
10602
10603class pass_expand_omp : public gimple_opt_pass
10604{
10605public:
10606  pass_expand_omp (gcc::context *ctxt)
10607    : gimple_opt_pass (pass_data_expand_omp, ctxt)
10608  {}
10609
10610  /* opt_pass methods: */
10611  virtual unsigned int execute (function *)
10612    {
10613      bool gate = ((flag_openacc != 0 || flag_openmp != 0
10614		    || flag_openmp_simd != 0)
10615		   && !seen_error ());
10616
10617      /* This pass always runs, to provide PROP_gimple_eomp.
10618	 But often, there is nothing to do.  */
10619      if (!gate)
10620	return 0;
10621
10622      return execute_expand_omp ();
10623    }
10624
10625}; // class pass_expand_omp
10626
10627} // anon namespace
10628
10629gimple_opt_pass *
10630make_pass_expand_omp (gcc::context *ctxt)
10631{
10632  return new pass_expand_omp (ctxt);
10633}
10634
10635namespace {
10636
10637const pass_data pass_data_expand_omp_ssa =
10638{
10639  GIMPLE_PASS, /* type */
10640  "ompexpssa", /* name */
10641  OPTGROUP_OMP, /* optinfo_flags */
10642  TV_NONE, /* tv_id */
10643  PROP_cfg | PROP_ssa, /* properties_required */
10644  PROP_gimple_eomp, /* properties_provided */
10645  0, /* properties_destroyed */
10646  0, /* todo_flags_start */
10647  TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10648};
10649
10650class pass_expand_omp_ssa : public gimple_opt_pass
10651{
10652public:
10653  pass_expand_omp_ssa (gcc::context *ctxt)
10654    : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10655  {}
10656
10657  /* opt_pass methods: */
10658  virtual bool gate (function *fun)
10659    {
10660      return !(fun->curr_properties & PROP_gimple_eomp);
10661    }
10662  virtual unsigned int execute (function *) { return execute_expand_omp (); }
10663  opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10664
10665}; // class pass_expand_omp_ssa
10666
10667} // anon namespace
10668
10669gimple_opt_pass *
10670make_pass_expand_omp_ssa (gcc::context *ctxt)
10671{
10672  return new pass_expand_omp_ssa (ctxt);
10673}
10674
10675/* Called from tree-cfg.cc::make_edges to create cfg edges for all relevant
10676   GIMPLE_* codes.  */
10677
10678bool
10679omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10680		       int *region_idx)
10681{
10682  gimple *last = last_stmt (bb);
10683  enum gimple_code code = gimple_code (last);
10684  struct omp_region *cur_region = *region;
10685  bool fallthru = false;
10686
10687  switch (code)
10688    {
10689    case GIMPLE_OMP_PARALLEL:
10690    case GIMPLE_OMP_FOR:
10691    case GIMPLE_OMP_SINGLE:
10692    case GIMPLE_OMP_TEAMS:
10693    case GIMPLE_OMP_MASTER:
10694    case GIMPLE_OMP_MASKED:
10695    case GIMPLE_OMP_SCOPE:
10696    case GIMPLE_OMP_CRITICAL:
10697    case GIMPLE_OMP_SECTION:
10698      cur_region = new_omp_region (bb, code, cur_region);
10699      fallthru = true;
10700      break;
10701
10702    case GIMPLE_OMP_TASKGROUP:
10703      cur_region = new_omp_region (bb, code, cur_region);
10704      fallthru = true;
10705      cur_region = cur_region->outer;
10706      break;
10707
10708    case GIMPLE_OMP_TASK:
10709      cur_region = new_omp_region (bb, code, cur_region);
10710      fallthru = true;
10711      if (gimple_omp_task_taskwait_p (last))
10712	cur_region = cur_region->outer;
10713      break;
10714
10715    case GIMPLE_OMP_ORDERED:
10716      cur_region = new_omp_region (bb, code, cur_region);
10717      fallthru = true;
10718      if (omp_find_clause (gimple_omp_ordered_clauses
10719			     (as_a <gomp_ordered *> (last)),
10720			   OMP_CLAUSE_DEPEND))
10721	cur_region = cur_region->outer;
10722      break;
10723
10724    case GIMPLE_OMP_TARGET:
10725      cur_region = new_omp_region (bb, code, cur_region);
10726      fallthru = true;
10727      switch (gimple_omp_target_kind (last))
10728	{
10729	case GF_OMP_TARGET_KIND_REGION:
10730	case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10731	case GF_OMP_TARGET_KIND_OACC_KERNELS:
10732	case GF_OMP_TARGET_KIND_OACC_SERIAL:
10733	case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10734	case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10735	  break;
10736	case GF_OMP_TARGET_KIND_UPDATE:
10737	case GF_OMP_TARGET_KIND_ENTER_DATA:
10738	case GF_OMP_TARGET_KIND_EXIT_DATA:
10739	case GF_OMP_TARGET_KIND_DATA:
10740	case GF_OMP_TARGET_KIND_OACC_DATA:
10741	case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10742	case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10743	case GF_OMP_TARGET_KIND_OACC_UPDATE:
10744	case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
10745	case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
10746	case GF_OMP_TARGET_KIND_OACC_DECLARE:
10747	  cur_region = cur_region->outer;
10748	  break;
10749	default:
10750	  gcc_unreachable ();
10751	}
10752      break;
10753
10754    case GIMPLE_OMP_SECTIONS:
10755      cur_region = new_omp_region (bb, code, cur_region);
10756      fallthru = true;
10757      break;
10758
10759    case GIMPLE_OMP_SECTIONS_SWITCH:
10760      fallthru = false;
10761      break;
10762
10763    case GIMPLE_OMP_ATOMIC_LOAD:
10764    case GIMPLE_OMP_ATOMIC_STORE:
10765       fallthru = true;
10766       break;
10767
10768    case GIMPLE_OMP_RETURN:
10769      /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10770	 somewhere other than the next block.  This will be
10771	 created later.  */
10772      cur_region->exit = bb;
10773      if (cur_region->type == GIMPLE_OMP_TASK)
10774	/* Add an edge corresponding to not scheduling the task
10775	   immediately.  */
10776	make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10777      fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10778      cur_region = cur_region->outer;
10779      break;
10780
10781    case GIMPLE_OMP_CONTINUE:
10782      cur_region->cont = bb;
10783      switch (cur_region->type)
10784	{
10785	case GIMPLE_OMP_FOR:
10786	  /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10787	     succs edges as abnormal to prevent splitting
10788	     them.  */
10789	  single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10790	  /* Make the loopback edge.  */
10791	  make_edge (bb, single_succ (cur_region->entry),
10792		     EDGE_ABNORMAL);
10793
10794	  /* Create an edge from GIMPLE_OMP_FOR to exit, which
10795	     corresponds to the case that the body of the loop
10796	     is not executed at all.  */
10797	  make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10798	  make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10799	  fallthru = false;
10800	  break;
10801
10802	case GIMPLE_OMP_SECTIONS:
10803	  /* Wire up the edges into and out of the nested sections.  */
10804	  {
10805	    basic_block switch_bb = single_succ (cur_region->entry);
10806
10807	    struct omp_region *i;
10808	    for (i = cur_region->inner; i ; i = i->next)
10809	      {
10810		gcc_assert (i->type == GIMPLE_OMP_SECTION);
10811		make_edge (switch_bb, i->entry, 0);
10812		make_edge (i->exit, bb, EDGE_FALLTHRU);
10813	      }
10814
10815	    /* Make the loopback edge to the block with
10816	       GIMPLE_OMP_SECTIONS_SWITCH.  */
10817	    make_edge (bb, switch_bb, 0);
10818
10819	    /* Make the edge from the switch to exit.  */
10820	    make_edge (switch_bb, bb->next_bb, 0);
10821	    fallthru = false;
10822	  }
10823	  break;
10824
10825	case GIMPLE_OMP_TASK:
10826	  fallthru = true;
10827	  break;
10828
10829	default:
10830	  gcc_unreachable ();
10831	}
10832      break;
10833
10834    default:
10835      gcc_unreachable ();
10836    }
10837
10838  if (*region != cur_region)
10839    {
10840      *region = cur_region;
10841      if (cur_region)
10842	*region_idx = cur_region->entry->index;
10843      else
10844	*region_idx = 0;
10845    }
10846
10847  return fallthru;
10848}
10849