1/* Loop autoparallelization.
2   Copyright (C) 2006-2020 Free Software Foundation, Inc.
3   Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4   Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3.  If not see
20<http://www.gnu.org/licenses/>.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "backend.h"
26#include "tree.h"
27#include "gimple.h"
28#include "cfghooks.h"
29#include "tree-pass.h"
30#include "ssa.h"
31#include "cgraph.h"
32#include "gimple-pretty-print.h"
33#include "fold-const.h"
34#include "gimplify.h"
35#include "gimple-iterator.h"
36#include "gimplify-me.h"
37#include "gimple-walk.h"
38#include "stor-layout.h"
39#include "tree-nested.h"
40#include "tree-cfg.h"
41#include "tree-ssa-loop-ivopts.h"
42#include "tree-ssa-loop-manip.h"
43#include "tree-ssa-loop-niter.h"
44#include "tree-ssa-loop.h"
45#include "tree-into-ssa.h"
46#include "cfgloop.h"
47#include "tree-scalar-evolution.h"
48#include "langhooks.h"
49#include "tree-vectorizer.h"
50#include "tree-hasher.h"
51#include "tree-parloops.h"
52#include "omp-general.h"
53#include "omp-low.h"
54#include "tree-ssa.h"
55#include "tree-ssa-alias.h"
56#include "tree-eh.h"
57#include "gomp-constants.h"
58#include "tree-dfa.h"
59#include "stringpool.h"
60#include "attribs.h"
61
62/* This pass tries to distribute iterations of loops into several threads.
63   The implementation is straightforward -- for each loop we test whether its
64   iterations are independent, and if it is the case (and some additional
65   conditions regarding profitability and correctness are satisfied), we
66   add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
67   machinery do its job.
68
69   The most of the complexity is in bringing the code into shape expected
70   by the omp expanders:
71   -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
72      variable and that the exit test is at the start of the loop body
73   -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
74      variables by accesses through pointers, and breaking up ssa chains
75      by storing the values incoming to the parallelized loop to a structure
76      passed to the new function as an argument (something similar is done
77      in omp gimplification, unfortunately only a small part of the code
78      can be shared).
79
80   TODO:
81   -- if there are several parallelizable loops in a function, it may be
82      possible to generate the threads just once (using synchronization to
83      ensure that cross-loop dependences are obeyed).
84   -- handling of common reduction patterns for outer loops.
85
86   More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC  */
87/*
88  Reduction handling:
89  currently we use code inspired by vect_force_simple_reduction to detect
90  reduction patterns.
91  The code transformation will be introduced by an example.
92
93
94parloop
95{
96  int sum=1;
97
98  for (i = 0; i < N; i++)
99   {
100    x[i] = i + 3;
101    sum+=x[i];
102   }
103}
104
105gimple-like code:
106header_bb:
107
108  # sum_29 = PHI <sum_11(5), 1(3)>
109  # i_28 = PHI <i_12(5), 0(3)>
110  D.1795_8 = i_28 + 3;
111  x[i_28] = D.1795_8;
112  sum_11 = D.1795_8 + sum_29;
113  i_12 = i_28 + 1;
114  if (N_6(D) > i_12)
115    goto header_bb;
116
117
118exit_bb:
119
120  # sum_21 = PHI <sum_11(4)>
121  printf (&"%d"[0], sum_21);
122
123
124after reduction transformation (only relevant parts):
125
126parloop
127{
128
129....
130
131
132  # Storing the initial value given by the user.  #
133
134  .paral_data_store.32.sum.27 = 1;
135
136  #pragma omp parallel num_threads(4)
137
138  #pragma omp for schedule(static)
139
140  # The neutral element corresponding to the particular
141  reduction's operation, e.g. 0 for PLUS_EXPR,
142  1 for MULT_EXPR, etc. replaces the user's initial value.  #
143
144  # sum.27_29 = PHI <sum.27_11, 0>
145
146  sum.27_11 = D.1827_8 + sum.27_29;
147
148  GIMPLE_OMP_CONTINUE
149
150  # Adding this reduction phi is done at create_phi_for_local_result() #
151  # sum.27_56 = PHI <sum.27_11, 0>
152  GIMPLE_OMP_RETURN
153
154  # Creating the atomic operation is done at
155  create_call_for_reduction_1()  #
156
157  #pragma omp atomic_load
158  D.1839_59 = *&.paral_data_load.33_51->reduction.23;
159  D.1840_60 = sum.27_56 + D.1839_59;
160  #pragma omp atomic_store (D.1840_60);
161
162  GIMPLE_OMP_RETURN
163
164 # collecting the result after the join of the threads is done at
165  create_loads_for_reductions().
166  The value computed by the threads is loaded from the
167  shared struct.  #
168
169
170  .paral_data_load.33_52 = &.paral_data_store.32;
171  sum_37 =  .paral_data_load.33_52->sum.27;
172  sum_43 = D.1795_41 + sum_37;
173
174  exit bb:
175  # sum_21 = PHI <sum_43, sum_26>
176  printf (&"%d"[0], sum_21);
177
178...
179
180}
181
182*/
183
184/* Error reporting helper for parloops_is_simple_reduction below.  GIMPLE
185   statement STMT is printed with a message MSG. */
186
187static void
188report_ploop_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
189{
190  dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
191}
192
193/* DEF_STMT_INFO occurs in a loop that contains a potential reduction
194   operation.  Return true if the results of DEF_STMT_INFO are something
195   that can be accumulated by such a reduction.  */
196
197static bool
198parloops_valid_reduction_input_p (stmt_vec_info def_stmt_info)
199{
200  return (is_gimple_assign (def_stmt_info->stmt)
201	  || is_gimple_call (def_stmt_info->stmt)
202	  || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
203	  || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
204	      && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
205	      && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
206}
207
208/* Detect SLP reduction of the form:
209
210   #a1 = phi <a5, a0>
211   a2 = operation (a1)
212   a3 = operation (a2)
213   a4 = operation (a3)
214   a5 = operation (a4)
215
216   #a = phi <a5>
217
218   PHI is the reduction phi node (#a1 = phi <a5, a0> above)
219   FIRST_STMT is the first reduction stmt in the chain
220   (a2 = operation (a1)).
221
222   Return TRUE if a reduction chain was detected.  */
223
224static bool
225parloops_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
226			   gimple *first_stmt)
227{
228  class loop *loop = (gimple_bb (phi))->loop_father;
229  class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
230  enum tree_code code;
231  gimple *loop_use_stmt = NULL;
232  stmt_vec_info use_stmt_info;
233  tree lhs;
234  imm_use_iterator imm_iter;
235  use_operand_p use_p;
236  int nloop_uses, size = 0, n_out_of_loop_uses;
237  bool found = false;
238
239  if (loop != vect_loop)
240    return false;
241
242  auto_vec<stmt_vec_info, 8> reduc_chain;
243  lhs = PHI_RESULT (phi);
244  code = gimple_assign_rhs_code (first_stmt);
245  while (1)
246    {
247      nloop_uses = 0;
248      n_out_of_loop_uses = 0;
249      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
250        {
251	  gimple *use_stmt = USE_STMT (use_p);
252	  if (is_gimple_debug (use_stmt))
253	    continue;
254
255          /* Check if we got back to the reduction phi.  */
256	  if (use_stmt == phi)
257            {
258	      loop_use_stmt = use_stmt;
259              found = true;
260              break;
261            }
262
263          if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
264            {
265	      loop_use_stmt = use_stmt;
266	      nloop_uses++;
267            }
268           else
269             n_out_of_loop_uses++;
270
271           /* There are can be either a single use in the loop or two uses in
272              phi nodes.  */
273           if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
274             return false;
275        }
276
277      if (found)
278        break;
279
280      /* We reached a statement with no loop uses.  */
281      if (nloop_uses == 0)
282	return false;
283
284      /* This is a loop exit phi, and we haven't reached the reduction phi.  */
285      if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
286        return false;
287
288      if (!is_gimple_assign (loop_use_stmt)
289	  || code != gimple_assign_rhs_code (loop_use_stmt)
290	  || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
291        return false;
292
293      /* Insert USE_STMT into reduction chain.  */
294      use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
295      reduc_chain.safe_push (use_stmt_info);
296
297      lhs = gimple_assign_lhs (loop_use_stmt);
298      size++;
299   }
300
301  if (!found || loop_use_stmt != phi || size < 2)
302    return false;
303
304  /* Swap the operands, if needed, to make the reduction operand be the second
305     operand.  */
306  lhs = PHI_RESULT (phi);
307  for (unsigned i = 0; i < reduc_chain.length (); ++i)
308    {
309      gassign *next_stmt = as_a <gassign *> (reduc_chain[i]->stmt);
310      if (gimple_assign_rhs2 (next_stmt) == lhs)
311	{
312	  tree op = gimple_assign_rhs1 (next_stmt);
313	  stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
314
315	  /* Check that the other def is either defined in the loop
316	     ("vect_internal_def"), or it's an induction (defined by a
317	     loop-header phi-node).  */
318	  if (def_stmt_info
319	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
320	      && parloops_valid_reduction_input_p (def_stmt_info))
321	    {
322	      lhs = gimple_assign_lhs (next_stmt);
323	      continue;
324	    }
325
326	  return false;
327	}
328      else
329	{
330          tree op = gimple_assign_rhs2 (next_stmt);
331	  stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
332
333          /* Check that the other def is either defined in the loop
334            ("vect_internal_def"), or it's an induction (defined by a
335            loop-header phi-node).  */
336	  if (def_stmt_info
337	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
338	      && parloops_valid_reduction_input_p (def_stmt_info))
339	    {
340	      if (dump_enabled_p ())
341		dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: %G",
342				 next_stmt);
343
344	      swap_ssa_operands (next_stmt,
345				 gimple_assign_rhs1_ptr (next_stmt),
346                                 gimple_assign_rhs2_ptr (next_stmt));
347	      update_stmt (next_stmt);
348	    }
349	  else
350	    return false;
351        }
352
353      lhs = gimple_assign_lhs (next_stmt);
354    }
355
356  /* Build up the actual chain.  */
357  for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
358    {
359      REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
360      REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
361    }
362  REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
363  REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
364
365  /* Save the chain for further analysis in SLP detection.  */
366  LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
367  REDUC_GROUP_SIZE (reduc_chain[0]) = size;
368
369  return true;
370}
371
372/* Return true if we need an in-order reduction for operation CODE
373   on type TYPE.  NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
374   overflow must wrap.  */
375
376static bool
377parloops_needs_fold_left_reduction_p (tree type, tree_code code,
378				      bool need_wrapping_integral_overflow)
379{
380  /* CHECKME: check for !flag_finite_math_only too?  */
381  if (SCALAR_FLOAT_TYPE_P (type))
382    switch (code)
383      {
384      case MIN_EXPR:
385      case MAX_EXPR:
386	return false;
387
388      default:
389	return !flag_associative_math;
390      }
391
392  if (INTEGRAL_TYPE_P (type))
393    {
394      if (!operation_no_trapping_overflow (type, code))
395	return true;
396      if (need_wrapping_integral_overflow
397	  && !TYPE_OVERFLOW_WRAPS (type)
398	  && operation_can_overflow (code))
399	return true;
400      return false;
401    }
402
403  if (SAT_FIXED_POINT_TYPE_P (type))
404    return true;
405
406  return false;
407}
408
409
410/* Function parloops_is_simple_reduction
411
412   (1) Detect a cross-iteration def-use cycle that represents a simple
413   reduction computation.  We look for the following pattern:
414
415   loop_header:
416     a1 = phi < a0, a2 >
417     a3 = ...
418     a2 = operation (a3, a1)
419
420   or
421
422   a3 = ...
423   loop_header:
424     a1 = phi < a0, a2 >
425     a2 = operation (a3, a1)
426
427   such that:
428   1. operation is commutative and associative and it is safe to
429      change the order of the computation
430   2. no uses for a2 in the loop (a2 is used out of the loop)
431   3. no uses of a1 in the loop besides the reduction operation
432   4. no uses of a1 outside the loop.
433
434   Conditions 1,4 are tested here.
435   Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
436
437   (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
438   nested cycles.
439
440   (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
441   reductions:
442
443     a1 = phi < a0, a2 >
444     inner loop (def of a3)
445     a2 = phi < a3 >
446
447   (4) Detect condition expressions, ie:
448     for (int i = 0; i < N; i++)
449       if (a[i] < val)
450	ret_val = a[i];
451
452*/
453
454static stmt_vec_info
455parloops_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
456			  bool *double_reduc,
457			  bool need_wrapping_integral_overflow,
458			  enum vect_reduction_type *v_reduc_type)
459{
460  gphi *phi = as_a <gphi *> (phi_info->stmt);
461  class loop *loop = (gimple_bb (phi))->loop_father;
462  class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
463  bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
464  gimple *phi_use_stmt = NULL;
465  enum tree_code orig_code, code;
466  tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
467  tree type;
468  tree name;
469  imm_use_iterator imm_iter;
470  use_operand_p use_p;
471  bool phi_def;
472
473  *double_reduc = false;
474  *v_reduc_type = TREE_CODE_REDUCTION;
475
476  tree phi_name = PHI_RESULT (phi);
477  /* ???  If there are no uses of the PHI result the inner loop reduction
478     won't be detected as possibly double-reduction by vectorizable_reduction
479     because that tries to walk the PHI arg from the preheader edge which
480     can be constant.  See PR60382.  */
481  if (has_zero_uses (phi_name))
482    return NULL;
483  unsigned nphi_def_loop_uses = 0;
484  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
485    {
486      gimple *use_stmt = USE_STMT (use_p);
487      if (is_gimple_debug (use_stmt))
488	continue;
489
490      if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
491        {
492          if (dump_enabled_p ())
493	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
494			     "intermediate value used outside loop.\n");
495
496          return NULL;
497        }
498
499      nphi_def_loop_uses++;
500      phi_use_stmt = use_stmt;
501    }
502
503  edge latch_e = loop_latch_edge (loop);
504  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
505  if (TREE_CODE (loop_arg) != SSA_NAME)
506    {
507      if (dump_enabled_p ())
508	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
509			 "reduction: not ssa_name: %T\n", loop_arg);
510      return NULL;
511    }
512
513  stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg);
514  if (!def_stmt_info
515      || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
516    return NULL;
517
518  if (gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt))
519    {
520      name = gimple_assign_lhs (def_stmt);
521      phi_def = false;
522    }
523  else if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
524    {
525      name = PHI_RESULT (def_stmt);
526      phi_def = true;
527    }
528  else
529    {
530      if (dump_enabled_p ())
531	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
532			 "reduction: unhandled reduction operation: %G",
533			 def_stmt_info->stmt);
534      return NULL;
535    }
536
537  unsigned nlatch_def_loop_uses = 0;
538  auto_vec<gphi *, 3> lcphis;
539  bool inner_loop_of_double_reduc = false;
540  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
541    {
542      gimple *use_stmt = USE_STMT (use_p);
543      if (is_gimple_debug (use_stmt))
544	continue;
545      if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
546	nlatch_def_loop_uses++;
547      else
548	{
549	  /* We can have more than one loop-closed PHI.  */
550	  lcphis.safe_push (as_a <gphi *> (use_stmt));
551	  if (nested_in_vect_loop
552	      && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))
553		  == vect_double_reduction_def))
554	    inner_loop_of_double_reduc = true;
555	}
556    }
557
558  /* If this isn't a nested cycle or if the nested cycle reduction value
559     is used ouside of the inner loop we cannot handle uses of the reduction
560     value.  */
561  if ((!nested_in_vect_loop || inner_loop_of_double_reduc)
562      && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
563    {
564      if (dump_enabled_p ())
565	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
566			 "reduction used in loop.\n");
567      return NULL;
568    }
569
570  /* If DEF_STMT is a phi node itself, we expect it to have a single argument
571     defined in the inner loop.  */
572  if (phi_def)
573    {
574      gphi *def_stmt = as_a <gphi *> (def_stmt_info->stmt);
575      op1 = PHI_ARG_DEF (def_stmt, 0);
576
577      if (gimple_phi_num_args (def_stmt) != 1
578          || TREE_CODE (op1) != SSA_NAME)
579        {
580          if (dump_enabled_p ())
581	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
582			     "unsupported phi node definition.\n");
583
584          return NULL;
585        }
586
587      gimple *def1 = SSA_NAME_DEF_STMT (op1);
588      if (gimple_bb (def1)
589	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
590          && loop->inner
591          && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
592          && is_gimple_assign (def1)
593	  && is_a <gphi *> (phi_use_stmt)
594	  && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
595        {
596          if (dump_enabled_p ())
597            report_ploop_op (MSG_NOTE, def_stmt,
598			     "detected double reduction: ");
599
600          *double_reduc = true;
601	  return def_stmt_info;
602        }
603
604      return NULL;
605    }
606
607  /* If we are vectorizing an inner reduction we are executing that
608     in the original order only in case we are not dealing with a
609     double reduction.  */
610  bool check_reduction = true;
611  if (flow_loop_nested_p (vect_loop, loop))
612    {
613      gphi *lcphi;
614      unsigned i;
615      check_reduction = false;
616      FOR_EACH_VEC_ELT (lcphis, i, lcphi)
617	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
618	  {
619	    gimple *use_stmt = USE_STMT (use_p);
620	    if (is_gimple_debug (use_stmt))
621	      continue;
622	    if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
623	      check_reduction = true;
624	  }
625    }
626
627  gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
628  code = orig_code = gimple_assign_rhs_code (def_stmt);
629
630  if (nested_in_vect_loop && !check_reduction)
631    {
632      /* FIXME: Even for non-reductions code generation is funneled
633	 through vectorizable_reduction for the stmt defining the
634	 PHI latch value.  So we have to artificially restrict ourselves
635	 for the supported operations.  */
636      switch (get_gimple_rhs_class (code))
637	{
638	case GIMPLE_BINARY_RHS:
639	case GIMPLE_TERNARY_RHS:
640	  break;
641	default:
642	  /* Not supported by vectorizable_reduction.  */
643	  if (dump_enabled_p ())
644	    report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
645			     "nested cycle: not handled operation: ");
646	  return NULL;
647	}
648      if (dump_enabled_p ())
649	report_ploop_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
650      return def_stmt_info;
651    }
652
653  /* We can handle "res -= x[i]", which is non-associative by
654     simply rewriting this into "res += -x[i]".  Avoid changing
655     gimple instruction for the first simple tests and only do this
656     if we're allowed to change code at all.  */
657  if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
658    code = PLUS_EXPR;
659
660  if (code == COND_EXPR)
661    {
662      if (! nested_in_vect_loop)
663	*v_reduc_type = COND_REDUCTION;
664
665      op3 = gimple_assign_rhs1 (def_stmt);
666      if (COMPARISON_CLASS_P (op3))
667        {
668          op4 = TREE_OPERAND (op3, 1);
669          op3 = TREE_OPERAND (op3, 0);
670        }
671      if (op3 == phi_name || op4 == phi_name)
672	{
673	  if (dump_enabled_p ())
674	    report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
675			     "reduction: condition depends on previous"
676			     " iteration: ");
677	  return NULL;
678	}
679
680      op1 = gimple_assign_rhs2 (def_stmt);
681      op2 = gimple_assign_rhs3 (def_stmt);
682    }
683  else if (!commutative_tree_code (code) || !associative_tree_code (code))
684    {
685      if (dump_enabled_p ())
686	report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
687			 "reduction: not commutative/associative: ");
688      return NULL;
689    }
690  else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
691    {
692      op1 = gimple_assign_rhs1 (def_stmt);
693      op2 = gimple_assign_rhs2 (def_stmt);
694    }
695  else
696    {
697      if (dump_enabled_p ())
698	report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
699			 "reduction: not handled operation: ");
700      return NULL;
701    }
702
703  if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
704    {
705      if (dump_enabled_p ())
706	report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
707			 "reduction: both uses not ssa_names: ");
708
709      return NULL;
710    }
711
712  type = TREE_TYPE (gimple_assign_lhs (def_stmt));
713  if ((TREE_CODE (op1) == SSA_NAME
714       && !types_compatible_p (type,TREE_TYPE (op1)))
715      || (TREE_CODE (op2) == SSA_NAME
716          && !types_compatible_p (type, TREE_TYPE (op2)))
717      || (op3 && TREE_CODE (op3) == SSA_NAME
718          && !types_compatible_p (type, TREE_TYPE (op3)))
719      || (op4 && TREE_CODE (op4) == SSA_NAME
720          && !types_compatible_p (type, TREE_TYPE (op4))))
721    {
722      if (dump_enabled_p ())
723        {
724          dump_printf_loc (MSG_NOTE, vect_location,
725			   "reduction: multiple types: operation type: "
726			   "%T, operands types: %T,%T",
727			   type,  TREE_TYPE (op1), TREE_TYPE (op2));
728          if (op3)
729	    dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
730
731          if (op4)
732	    dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
733          dump_printf (MSG_NOTE, "\n");
734        }
735
736      return NULL;
737    }
738
739  /* Check whether it's ok to change the order of the computation.
740     Generally, when vectorizing a reduction we change the order of the
741     computation.  This may change the behavior of the program in some
742     cases, so we need to check that this is ok.  One exception is when
743     vectorizing an outer-loop: the inner-loop is executed sequentially,
744     and therefore vectorizing reductions in the inner-loop during
745     outer-loop vectorization is safe.  */
746  if (check_reduction
747      && *v_reduc_type == TREE_CODE_REDUCTION
748      && parloops_needs_fold_left_reduction_p (type, code,
749					       need_wrapping_integral_overflow))
750    *v_reduc_type = FOLD_LEFT_REDUCTION;
751
752  /* Reduction is safe. We're dealing with one of the following:
753     1) integer arithmetic and no trapv
754     2) floating point arithmetic, and special flags permit this optimization
755     3) nested cycle (i.e., outer loop vectorization).  */
756  stmt_vec_info def1_info = loop_info->lookup_def (op1);
757  stmt_vec_info def2_info = loop_info->lookup_def (op2);
758  if (code != COND_EXPR && !def1_info && !def2_info)
759    {
760      if (dump_enabled_p ())
761	report_ploop_op (MSG_NOTE, def_stmt,
762			 "reduction: no defs for operands: ");
763      return NULL;
764    }
765
766  /* Check that one def is the reduction def, defined by PHI,
767     the other def is either defined in the loop ("vect_internal_def"),
768     or it's an induction (defined by a loop-header phi-node).  */
769
770  if (def2_info
771      && def2_info->stmt == phi
772      && (code == COND_EXPR
773	  || !def1_info
774	  || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
775	  || parloops_valid_reduction_input_p (def1_info)))
776    {
777      if (dump_enabled_p ())
778	report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
779      return def_stmt_info;
780    }
781
782  if (def1_info
783      && def1_info->stmt == phi
784      && (code == COND_EXPR
785	  || !def2_info
786	  || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
787	  || parloops_valid_reduction_input_p (def2_info)))
788    {
789      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
790	{
791	  /* Check if we can swap operands (just for simplicity - so that
792	     the rest of the code can assume that the reduction variable
793	     is always the last (second) argument).  */
794	  if (code == COND_EXPR)
795	    {
796	      /* Swap cond_expr by inverting the condition.  */
797	      tree cond_expr = gimple_assign_rhs1 (def_stmt);
798	      enum tree_code invert_code = ERROR_MARK;
799	      enum tree_code cond_code = TREE_CODE (cond_expr);
800
801	      if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
802		{
803		  bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
804		  invert_code = invert_tree_comparison (cond_code, honor_nans);
805		}
806	      if (invert_code != ERROR_MARK)
807		{
808		  TREE_SET_CODE (cond_expr, invert_code);
809		  swap_ssa_operands (def_stmt,
810				     gimple_assign_rhs2_ptr (def_stmt),
811				     gimple_assign_rhs3_ptr (def_stmt));
812		}
813	      else
814		{
815		  if (dump_enabled_p ())
816		    report_ploop_op (MSG_NOTE, def_stmt,
817				     "detected reduction: cannot swap operands "
818				     "for cond_expr");
819		  return NULL;
820		}
821	    }
822	  else
823	    swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
824			       gimple_assign_rhs2_ptr (def_stmt));
825
826	  if (dump_enabled_p ())
827	    report_ploop_op (MSG_NOTE, def_stmt,
828			     "detected reduction: need to swap operands: ");
829        }
830      else
831        {
832          if (dump_enabled_p ())
833            report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
834        }
835
836      return def_stmt_info;
837    }
838
839  /* Try to find SLP reduction chain.  */
840  if (! nested_in_vect_loop
841      && code != COND_EXPR
842      && orig_code != MINUS_EXPR
843      && parloops_is_slp_reduction (loop_info, phi, def_stmt))
844    {
845      if (dump_enabled_p ())
846        report_ploop_op (MSG_NOTE, def_stmt,
847			 "reduction: detected reduction chain: ");
848
849      return def_stmt_info;
850    }
851
852  /* Look for the expression computing loop_arg from loop PHI result.  */
853  if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
854    return def_stmt_info;
855
856  if (dump_enabled_p ())
857    {
858      report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
859		       "reduction: unknown pattern: ");
860    }
861
862  return NULL;
863}
864
865/* Wrapper around vect_is_simple_reduction, which will modify code
866   in-place if it enables detection of more reductions.  Arguments
867   as there.  */
868
869stmt_vec_info
870parloops_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
871			     bool *double_reduc,
872			     bool need_wrapping_integral_overflow)
873{
874  enum vect_reduction_type v_reduc_type;
875  stmt_vec_info def_info
876    = parloops_is_simple_reduction (loop_info, phi_info, double_reduc,
877				need_wrapping_integral_overflow,
878				&v_reduc_type);
879  if (def_info)
880    {
881      STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type;
882      STMT_VINFO_REDUC_DEF (phi_info) = def_info;
883      STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type;
884      STMT_VINFO_REDUC_DEF (def_info) = phi_info;
885    }
886  return def_info;
887}
888
889/* Minimal number of iterations of a loop that should be executed in each
890   thread.  */
891#define MIN_PER_THREAD param_parloops_min_per_thread
892
893/* Element of the hashtable, representing a
894   reduction in the current loop.  */
895struct reduction_info
896{
897  gimple *reduc_stmt;		/* reduction statement.  */
898  gimple *reduc_phi;		/* The phi node defining the reduction.  */
899  enum tree_code reduction_code;/* code for the reduction operation.  */
900  unsigned reduc_version;	/* SSA_NAME_VERSION of original reduc_phi
901				   result.  */
902  gphi *keep_res;		/* The PHI_RESULT of this phi is the resulting value
903				   of the reduction variable when existing the loop. */
904  tree initial_value;		/* The initial value of the reduction var before entering the loop.  */
905  tree field;			/*  the name of the field in the parloop data structure intended for reduction.  */
906  tree reduc_addr;		/* The address of the reduction variable for
907				   openacc reductions.  */
908  tree init;			/* reduction initialization value.  */
909  gphi *new_phi;		/* (helper field) Newly created phi node whose result
910				   will be passed to the atomic operation.  Represents
911				   the local result each thread computed for the reduction
912				   operation.  */
913};
914
915/* Reduction info hashtable helpers.  */
916
917struct reduction_hasher : free_ptr_hash <reduction_info>
918{
919  static inline hashval_t hash (const reduction_info *);
920  static inline bool equal (const reduction_info *, const reduction_info *);
921};
922
923/* Equality and hash functions for hashtab code.  */
924
925inline bool
926reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
927{
928  return (a->reduc_phi == b->reduc_phi);
929}
930
931inline hashval_t
932reduction_hasher::hash (const reduction_info *a)
933{
934  return a->reduc_version;
935}
936
937typedef hash_table<reduction_hasher> reduction_info_table_type;
938
939
940static struct reduction_info *
941reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
942{
943  struct reduction_info tmpred, *red;
944
945  if (reduction_list->is_empty () || phi == NULL)
946    return NULL;
947
948  if (gimple_uid (phi) == (unsigned int)-1
949      || gimple_uid (phi) == 0)
950    return NULL;
951
952  tmpred.reduc_phi = phi;
953  tmpred.reduc_version = gimple_uid (phi);
954  red = reduction_list->find (&tmpred);
955  gcc_assert (red == NULL || red->reduc_phi == phi);
956
957  return red;
958}
959
960/* Element of hashtable of names to copy.  */
961
962struct name_to_copy_elt
963{
964  unsigned version;	/* The version of the name to copy.  */
965  tree new_name;	/* The new name used in the copy.  */
966  tree field;		/* The field of the structure used to pass the
967			   value.  */
968};
969
970/* Name copies hashtable helpers.  */
971
972struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
973{
974  static inline hashval_t hash (const name_to_copy_elt *);
975  static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
976};
977
978/* Equality and hash functions for hashtab code.  */
979
980inline bool
981name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
982{
983  return a->version == b->version;
984}
985
986inline hashval_t
987name_to_copy_hasher::hash (const name_to_copy_elt *a)
988{
989  return (hashval_t) a->version;
990}
991
992typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
993
994/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
995   matrix.  Rather than use floats, we simply keep a single DENOMINATOR that
996   represents the denominator for every element in the matrix.  */
997typedef struct lambda_trans_matrix_s
998{
999  lambda_matrix matrix;
1000  int rowsize;
1001  int colsize;
1002  int denominator;
1003} *lambda_trans_matrix;
1004#define LTM_MATRIX(T) ((T)->matrix)
1005#define LTM_ROWSIZE(T) ((T)->rowsize)
1006#define LTM_COLSIZE(T) ((T)->colsize)
1007#define LTM_DENOMINATOR(T) ((T)->denominator)
1008
1009/* Allocate a new transformation matrix.  */
1010
1011static lambda_trans_matrix
1012lambda_trans_matrix_new (int colsize, int rowsize,
1013			 struct obstack * lambda_obstack)
1014{
1015  lambda_trans_matrix ret;
1016
1017  ret = (lambda_trans_matrix)
1018    obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
1019  LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
1020  LTM_ROWSIZE (ret) = rowsize;
1021  LTM_COLSIZE (ret) = colsize;
1022  LTM_DENOMINATOR (ret) = 1;
1023  return ret;
1024}
1025
1026/* Multiply a vector VEC by a matrix MAT.
1027   MAT is an M*N matrix, and VEC is a vector with length N.  The result
1028   is stored in DEST which must be a vector of length M.  */
1029
1030static void
1031lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
1032			   lambda_vector vec, lambda_vector dest)
1033{
1034  int i, j;
1035
1036  lambda_vector_clear (dest, m);
1037  for (i = 0; i < m; i++)
1038    for (j = 0; j < n; j++)
1039      dest[i] += matrix[i][j] * vec[j];
1040}
1041
1042/* Return true if TRANS is a legal transformation matrix that respects
1043   the dependence vectors in DISTS and DIRS.  The conservative answer
1044   is false.
1045
1046   "Wolfe proves that a unimodular transformation represented by the
1047   matrix T is legal when applied to a loop nest with a set of
1048   lexicographically non-negative distance vectors RDG if and only if
1049   for each vector d in RDG, (T.d >= 0) is lexicographically positive.
1050   i.e.: if and only if it transforms the lexicographically positive
1051   distance vectors to lexicographically positive vectors.  Note that
1052   a unimodular matrix must transform the zero vector (and only it) to
1053   the zero vector." S.Muchnick.  */
1054
1055static bool
1056lambda_transform_legal_p (lambda_trans_matrix trans,
1057			  int nb_loops,
1058			  vec<ddr_p> dependence_relations)
1059{
1060  unsigned int i, j;
1061  lambda_vector distres;
1062  struct data_dependence_relation *ddr;
1063
1064  gcc_assert (LTM_COLSIZE (trans) == nb_loops
1065	      && LTM_ROWSIZE (trans) == nb_loops);
1066
1067  /* When there are no dependences, the transformation is correct.  */
1068  if (dependence_relations.length () == 0)
1069    return true;
1070
1071  ddr = dependence_relations[0];
1072  if (ddr == NULL)
1073    return true;
1074
1075  /* When there is an unknown relation in the dependence_relations, we
1076     know that it is no worth looking at this loop nest: give up.  */
1077  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1078    return false;
1079
1080  distres = lambda_vector_new (nb_loops);
1081
1082  /* For each distance vector in the dependence graph.  */
1083  FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
1084    {
1085      /* Don't care about relations for which we know that there is no
1086	 dependence, nor about read-read (aka. output-dependences):
1087	 these data accesses can happen in any order.  */
1088      if (DDR_ARE_DEPENDENT (ddr) == chrec_known
1089	  || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
1090	continue;
1091
1092      /* Conservatively answer: "this transformation is not valid".  */
1093      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1094	return false;
1095
1096      /* If the dependence could not be captured by a distance vector,
1097	 conservatively answer that the transform is not valid.  */
1098      if (DDR_NUM_DIST_VECTS (ddr) == 0)
1099	return false;
1100
1101      /* Compute trans.dist_vect */
1102      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
1103	{
1104	  lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
1105				     DDR_DIST_VECT (ddr, j), distres);
1106
1107	  if (!lambda_vector_lexico_pos (distres, nb_loops))
1108	    return false;
1109	}
1110    }
1111  return true;
1112}
1113
1114/* Data dependency analysis. Returns true if the iterations of LOOP
1115   are independent on each other (that is, if we can execute them
1116   in parallel).  */
1117
1118static bool
1119loop_parallel_p (class loop *loop, struct obstack * parloop_obstack)
1120{
1121  vec<ddr_p> dependence_relations;
1122  vec<data_reference_p> datarefs;
1123  lambda_trans_matrix trans;
1124  bool ret = false;
1125
1126  if (dump_file && (dump_flags & TDF_DETAILS))
1127  {
1128    fprintf (dump_file, "Considering loop %d\n", loop->num);
1129    if (!loop->inner)
1130      fprintf (dump_file, "loop is innermost\n");
1131    else
1132      fprintf (dump_file, "loop NOT innermost\n");
1133   }
1134
1135  /* Check for problems with dependences.  If the loop can be reversed,
1136     the iterations are independent.  */
1137  auto_vec<loop_p, 3> loop_nest;
1138  datarefs.create (10);
1139  dependence_relations.create (100);
1140  if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
1141					   &dependence_relations))
1142    {
1143      if (dump_file && (dump_flags & TDF_DETAILS))
1144	fprintf (dump_file, "  FAILED: cannot analyze data dependencies\n");
1145      ret = false;
1146      goto end;
1147    }
1148  if (dump_file && (dump_flags & TDF_DETAILS))
1149    dump_data_dependence_relations (dump_file, dependence_relations);
1150
1151  trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
1152  LTM_MATRIX (trans)[0][0] = -1;
1153
1154  if (lambda_transform_legal_p (trans, 1, dependence_relations))
1155    {
1156      ret = true;
1157      if (dump_file && (dump_flags & TDF_DETAILS))
1158	fprintf (dump_file, "  SUCCESS: may be parallelized\n");
1159    }
1160  else if (dump_file && (dump_flags & TDF_DETAILS))
1161    fprintf (dump_file,
1162	     "  FAILED: data dependencies exist across iterations\n");
1163
1164 end:
1165  free_dependence_relations (dependence_relations);
1166  free_data_refs (datarefs);
1167
1168  return ret;
1169}
1170
1171/* Return true when LOOP contains basic blocks marked with the
1172   BB_IRREDUCIBLE_LOOP flag.  */
1173
1174static inline bool
1175loop_has_blocks_with_irreducible_flag (class loop *loop)
1176{
1177  unsigned i;
1178  basic_block *bbs = get_loop_body_in_dom_order (loop);
1179  bool res = true;
1180
1181  for (i = 0; i < loop->num_nodes; i++)
1182    if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
1183      goto end;
1184
1185  res = false;
1186 end:
1187  free (bbs);
1188  return res;
1189}
1190
1191/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
1192   The assignment statement is placed on edge ENTRY.  DECL_ADDRESS maps decls
1193   to their addresses that can be reused.  The address of OBJ is known to
1194   be invariant in the whole function.  Other needed statements are placed
1195   right before GSI.  */
1196
1197static tree
1198take_address_of (tree obj, tree type, edge entry,
1199		 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
1200{
1201  int uid;
1202  tree *var_p, name, addr;
1203  gassign *stmt;
1204  gimple_seq stmts;
1205
1206  /* Since the address of OBJ is invariant, the trees may be shared.
1207     Avoid rewriting unrelated parts of the code.  */
1208  obj = unshare_expr (obj);
1209  for (var_p = &obj;
1210       handled_component_p (*var_p);
1211       var_p = &TREE_OPERAND (*var_p, 0))
1212    continue;
1213
1214  /* Canonicalize the access to base on a MEM_REF.  */
1215  if (DECL_P (*var_p))
1216    *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
1217
1218  /* Assign a canonical SSA name to the address of the base decl used
1219     in the address and share it for all accesses and addresses based
1220     on it.  */
1221  uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
1222  int_tree_map elt;
1223  elt.uid = uid;
1224  int_tree_map *slot = decl_address->find_slot (elt, INSERT);
1225  if (!slot->to)
1226    {
1227      if (gsi == NULL)
1228	return NULL;
1229      addr = TREE_OPERAND (*var_p, 0);
1230      const char *obj_name
1231	= get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
1232      if (obj_name)
1233	name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
1234      else
1235	name = make_ssa_name (TREE_TYPE (addr));
1236      stmt = gimple_build_assign (name, addr);
1237      gsi_insert_on_edge_immediate (entry, stmt);
1238
1239      slot->uid = uid;
1240      slot->to = name;
1241    }
1242  else
1243    name = slot->to;
1244
1245  /* Express the address in terms of the canonical SSA name.  */
1246  TREE_OPERAND (*var_p, 0) = name;
1247  if (gsi == NULL)
1248    return build_fold_addr_expr_with_type (obj, type);
1249
1250  name = force_gimple_operand (build_addr (obj),
1251			       &stmts, true, NULL_TREE);
1252  if (!gimple_seq_empty_p (stmts))
1253    gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1254
1255  if (!useless_type_conversion_p (type, TREE_TYPE (name)))
1256    {
1257      name = force_gimple_operand (fold_convert (type, name), &stmts, true,
1258				   NULL_TREE);
1259      if (!gimple_seq_empty_p (stmts))
1260	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1261    }
1262
1263  return name;
1264}
1265
1266static tree
1267reduc_stmt_res (gimple *stmt)
1268{
1269  return (gimple_code (stmt) == GIMPLE_PHI
1270	  ? gimple_phi_result (stmt)
1271	  : gimple_assign_lhs (stmt));
1272}
1273
1274/* Callback for htab_traverse.  Create the initialization statement
1275   for reduction described in SLOT, and place it at the preheader of
1276   the loop described in DATA.  */
1277
1278int
1279initialize_reductions (reduction_info **slot, class loop *loop)
1280{
1281  tree init;
1282  tree type, arg;
1283  edge e;
1284
1285  struct reduction_info *const reduc = *slot;
1286
1287  /* Create initialization in preheader:
1288     reduction_variable = initialization value of reduction.  */
1289
1290  /* In the phi node at the header, replace the argument coming
1291     from the preheader with the reduction initialization value.  */
1292
1293  /* Initialize the reduction.  */
1294  type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1295  init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
1296				reduc->reduction_code, type);
1297  reduc->init = init;
1298
1299  /* Replace the argument representing the initialization value
1300     with the initialization value for the reduction (neutral
1301     element for the particular operation, e.g. 0 for PLUS_EXPR,
1302     1 for MULT_EXPR, etc).
1303     Keep the old value in a new variable "reduction_initial",
1304     that will be taken in consideration after the parallel
1305     computing is done.  */
1306
1307  e = loop_preheader_edge (loop);
1308  arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
1309  /* Create new variable to hold the initial value.  */
1310
1311  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
1312	   (reduc->reduc_phi, loop_preheader_edge (loop)), init);
1313  reduc->initial_value = arg;
1314  return 1;
1315}
1316
1317struct elv_data
1318{
1319  struct walk_stmt_info info;
1320  edge entry;
1321  int_tree_htab_type *decl_address;
1322  gimple_stmt_iterator *gsi;
1323  bool changed;
1324  bool reset;
1325};
1326
1327/* Eliminates references to local variables in *TP out of the single
1328   entry single exit region starting at DTA->ENTRY.
1329   DECL_ADDRESS contains addresses of the references that had their
1330   address taken already.  If the expression is changed, CHANGED is
1331   set to true.  Callback for walk_tree.  */
1332
1333static tree
1334eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
1335{
1336  struct elv_data *const dta = (struct elv_data *) data;
1337  tree t = *tp, var, addr, addr_type, type, obj;
1338
1339  if (DECL_P (t))
1340    {
1341      *walk_subtrees = 0;
1342
1343      if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
1344	return NULL_TREE;
1345
1346      type = TREE_TYPE (t);
1347      addr_type = build_pointer_type (type);
1348      addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
1349			      dta->gsi);
1350      if (dta->gsi == NULL && addr == NULL_TREE)
1351	{
1352	  dta->reset = true;
1353	  return NULL_TREE;
1354	}
1355
1356      *tp = build_simple_mem_ref (addr);
1357
1358      dta->changed = true;
1359      return NULL_TREE;
1360    }
1361
1362  if (TREE_CODE (t) == ADDR_EXPR)
1363    {
1364      /* ADDR_EXPR may appear in two contexts:
1365	 -- as a gimple operand, when the address taken is a function invariant
1366	 -- as gimple rhs, when the resulting address in not a function
1367	    invariant
1368	 We do not need to do anything special in the latter case (the base of
1369	 the memory reference whose address is taken may be replaced in the
1370	 DECL_P case).  The former case is more complicated, as we need to
1371	 ensure that the new address is still a gimple operand.  Thus, it
1372	 is not sufficient to replace just the base of the memory reference --
1373	 we need to move the whole computation of the address out of the
1374	 loop.  */
1375      if (!is_gimple_val (t))
1376	return NULL_TREE;
1377
1378      *walk_subtrees = 0;
1379      obj = TREE_OPERAND (t, 0);
1380      var = get_base_address (obj);
1381      if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
1382	return NULL_TREE;
1383
1384      addr_type = TREE_TYPE (t);
1385      addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
1386			      dta->gsi);
1387      if (dta->gsi == NULL && addr == NULL_TREE)
1388	{
1389	  dta->reset = true;
1390	  return NULL_TREE;
1391	}
1392      *tp = addr;
1393
1394      dta->changed = true;
1395      return NULL_TREE;
1396    }
1397
1398  if (!EXPR_P (t))
1399    *walk_subtrees = 0;
1400
1401  return NULL_TREE;
1402}
1403
1404/* Moves the references to local variables in STMT at *GSI out of the single
1405   entry single exit region starting at ENTRY.  DECL_ADDRESS contains
1406   addresses of the references that had their address taken
1407   already.  */
1408
1409static void
1410eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
1411				int_tree_htab_type *decl_address)
1412{
1413  struct elv_data dta;
1414  gimple *stmt = gsi_stmt (*gsi);
1415
1416  memset (&dta.info, '\0', sizeof (dta.info));
1417  dta.entry = entry;
1418  dta.decl_address = decl_address;
1419  dta.changed = false;
1420  dta.reset = false;
1421
1422  if (gimple_debug_bind_p (stmt))
1423    {
1424      dta.gsi = NULL;
1425      walk_tree (gimple_debug_bind_get_value_ptr (stmt),
1426		 eliminate_local_variables_1, &dta.info, NULL);
1427      if (dta.reset)
1428	{
1429	  gimple_debug_bind_reset_value (stmt);
1430	  dta.changed = true;
1431	}
1432    }
1433  else if (gimple_clobber_p (stmt))
1434    {
1435      unlink_stmt_vdef (stmt);
1436      stmt = gimple_build_nop ();
1437      gsi_replace (gsi, stmt, false);
1438      dta.changed = true;
1439    }
1440  else
1441    {
1442      dta.gsi = gsi;
1443      walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
1444    }
1445
1446  if (dta.changed)
1447    update_stmt (stmt);
1448}
1449
1450/* Eliminates the references to local variables from the single entry
1451   single exit region between the ENTRY and EXIT edges.
1452
1453   This includes:
1454   1) Taking address of a local variable -- these are moved out of the
1455   region (and temporary variable is created to hold the address if
1456   necessary).
1457
1458   2) Dereferencing a local variable -- these are replaced with indirect
1459   references.  */
1460
1461static void
1462eliminate_local_variables (edge entry, edge exit)
1463{
1464  basic_block bb;
1465  auto_vec<basic_block, 3> body;
1466  unsigned i;
1467  gimple_stmt_iterator gsi;
1468  bool has_debug_stmt = false;
1469  int_tree_htab_type decl_address (10);
1470  basic_block entry_bb = entry->src;
1471  basic_block exit_bb = exit->dest;
1472
1473  gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1474
1475  FOR_EACH_VEC_ELT (body, i, bb)
1476    if (bb != entry_bb && bb != exit_bb)
1477      {
1478        for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1479	  if (is_gimple_debug (gsi_stmt (gsi)))
1480	    {
1481	      if (gimple_debug_bind_p (gsi_stmt (gsi)))
1482	        has_debug_stmt = true;
1483	    }
1484	  else
1485	    eliminate_local_variables_stmt (entry, &gsi, &decl_address);
1486      }
1487
1488  if (has_debug_stmt)
1489    FOR_EACH_VEC_ELT (body, i, bb)
1490      if (bb != entry_bb && bb != exit_bb)
1491	for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1492	  if (gimple_debug_bind_p (gsi_stmt (gsi)))
1493	    eliminate_local_variables_stmt (entry, &gsi, &decl_address);
1494}
1495
1496/* Returns true if expression EXPR is not defined between ENTRY and
1497   EXIT, i.e. if all its operands are defined outside of the region.  */
1498
1499static bool
1500expr_invariant_in_region_p (edge entry, edge exit, tree expr)
1501{
1502  basic_block entry_bb = entry->src;
1503  basic_block exit_bb = exit->dest;
1504  basic_block def_bb;
1505
1506  if (is_gimple_min_invariant (expr))
1507    return true;
1508
1509  if (TREE_CODE (expr) == SSA_NAME)
1510    {
1511      def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1512      if (def_bb
1513	  && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
1514	  && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
1515	return false;
1516
1517      return true;
1518    }
1519
1520  return false;
1521}
1522
1523/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
1524   The copies are stored to NAME_COPIES, if NAME was already duplicated,
1525   its duplicate stored in NAME_COPIES is returned.
1526
1527   Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
1528   duplicated, storing the copies in DECL_COPIES.  */
1529
1530static tree
1531separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
1532			       int_tree_htab_type *decl_copies,
1533			       bool copy_name_p)
1534{
1535  tree copy, var, var_copy;
1536  unsigned idx, uid, nuid;
1537  struct int_tree_map ielt;
1538  struct name_to_copy_elt elt, *nelt;
1539  name_to_copy_elt **slot;
1540  int_tree_map *dslot;
1541
1542  if (TREE_CODE (name) != SSA_NAME)
1543    return name;
1544
1545  idx = SSA_NAME_VERSION (name);
1546  elt.version = idx;
1547  slot = name_copies->find_slot_with_hash (&elt, idx,
1548					   copy_name_p ? INSERT : NO_INSERT);
1549  if (slot && *slot)
1550    return (*slot)->new_name;
1551
1552  if (copy_name_p)
1553    {
1554      copy = duplicate_ssa_name (name, NULL);
1555      nelt = XNEW (struct name_to_copy_elt);
1556      nelt->version = idx;
1557      nelt->new_name = copy;
1558      nelt->field = NULL_TREE;
1559      *slot = nelt;
1560    }
1561  else
1562    {
1563      gcc_assert (!slot);
1564      copy = name;
1565    }
1566
1567  var = SSA_NAME_VAR (name);
1568  if (!var)
1569    return copy;
1570
1571  uid = DECL_UID (var);
1572  ielt.uid = uid;
1573  dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
1574  if (!dslot->to)
1575    {
1576      var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
1577      DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
1578      dslot->uid = uid;
1579      dslot->to = var_copy;
1580
1581      /* Ensure that when we meet this decl next time, we won't duplicate
1582         it again.  */
1583      nuid = DECL_UID (var_copy);
1584      ielt.uid = nuid;
1585      dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
1586      gcc_assert (!dslot->to);
1587      dslot->uid = nuid;
1588      dslot->to = var_copy;
1589    }
1590  else
1591    var_copy = dslot->to;
1592
1593  replace_ssa_name_symbol (copy, var_copy);
1594  return copy;
1595}
1596
1597/* Finds the ssa names used in STMT that are defined outside the
1598   region between ENTRY and EXIT and replaces such ssa names with
1599   their duplicates.  The duplicates are stored to NAME_COPIES.  Base
1600   decls of all ssa names used in STMT (including those defined in
1601   LOOP) are replaced with the new temporary variables; the
1602   replacement decls are stored in DECL_COPIES.  */
1603
1604static void
1605separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
1606			       name_to_copy_table_type *name_copies,
1607			       int_tree_htab_type *decl_copies)
1608{
1609  use_operand_p use;
1610  def_operand_p def;
1611  ssa_op_iter oi;
1612  tree name, copy;
1613  bool copy_name_p;
1614
1615  FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
1616  {
1617    name = DEF_FROM_PTR (def);
1618    gcc_assert (TREE_CODE (name) == SSA_NAME);
1619    copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1620					  false);
1621    gcc_assert (copy == name);
1622  }
1623
1624  FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
1625  {
1626    name = USE_FROM_PTR (use);
1627    if (TREE_CODE (name) != SSA_NAME)
1628      continue;
1629
1630    copy_name_p = expr_invariant_in_region_p (entry, exit, name);
1631    copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1632					  copy_name_p);
1633    SET_USE (use, copy);
1634  }
1635}
1636
1637/* Finds the ssa names used in STMT that are defined outside the
1638   region between ENTRY and EXIT and replaces such ssa names with
1639   their duplicates.  The duplicates are stored to NAME_COPIES.  Base
1640   decls of all ssa names used in STMT (including those defined in
1641   LOOP) are replaced with the new temporary variables; the
1642   replacement decls are stored in DECL_COPIES.  */
1643
1644static bool
1645separate_decls_in_region_debug (gimple *stmt,
1646				name_to_copy_table_type *name_copies,
1647				int_tree_htab_type *decl_copies)
1648{
1649  use_operand_p use;
1650  ssa_op_iter oi;
1651  tree var, name;
1652  struct int_tree_map ielt;
1653  struct name_to_copy_elt elt;
1654  name_to_copy_elt **slot;
1655  int_tree_map *dslot;
1656
1657  if (gimple_debug_bind_p (stmt))
1658    var = gimple_debug_bind_get_var (stmt);
1659  else if (gimple_debug_source_bind_p (stmt))
1660    var = gimple_debug_source_bind_get_var (stmt);
1661  else
1662    return true;
1663  if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
1664    return true;
1665  gcc_assert (DECL_P (var) && SSA_VAR_P (var));
1666  ielt.uid = DECL_UID (var);
1667  dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
1668  if (!dslot)
1669    return true;
1670  if (gimple_debug_bind_p (stmt))
1671    gimple_debug_bind_set_var (stmt, dslot->to);
1672  else if (gimple_debug_source_bind_p (stmt))
1673    gimple_debug_source_bind_set_var (stmt, dslot->to);
1674
1675  FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
1676  {
1677    name = USE_FROM_PTR (use);
1678    if (TREE_CODE (name) != SSA_NAME)
1679      continue;
1680
1681    elt.version = SSA_NAME_VERSION (name);
1682    slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
1683    if (!slot)
1684      {
1685	gimple_debug_bind_reset_value (stmt);
1686	update_stmt (stmt);
1687	break;
1688      }
1689
1690    SET_USE (use, (*slot)->new_name);
1691  }
1692
1693  return false;
1694}
1695
1696/* Callback for htab_traverse.  Adds a field corresponding to the reduction
1697   specified in SLOT. The type is passed in DATA.  */
1698
1699int
1700add_field_for_reduction (reduction_info **slot, tree type)
1701{
1702
1703  struct reduction_info *const red = *slot;
1704  tree var = reduc_stmt_res (red->reduc_stmt);
1705  tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
1706			   SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
1707
1708  insert_field_into_struct (type, field);
1709
1710  red->field = field;
1711
1712  return 1;
1713}
1714
1715/* Callback for htab_traverse.  Adds a field corresponding to a ssa name
1716   described in SLOT. The type is passed in DATA.  */
1717
1718int
1719add_field_for_name (name_to_copy_elt **slot, tree type)
1720{
1721  struct name_to_copy_elt *const elt = *slot;
1722  tree name = ssa_name (elt->version);
1723  tree field = build_decl (UNKNOWN_LOCATION,
1724			   FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1725			   TREE_TYPE (name));
1726
1727  insert_field_into_struct (type, field);
1728  elt->field = field;
1729
1730  return 1;
1731}
1732
1733/* Callback for htab_traverse.  A local result is the intermediate result
1734   computed by a single
1735   thread, or the initial value in case no iteration was executed.
1736   This function creates a phi node reflecting these values.
1737   The phi's result will be stored in NEW_PHI field of the
1738   reduction's data structure.  */
1739
1740int
1741create_phi_for_local_result (reduction_info **slot, class loop *loop)
1742{
1743  struct reduction_info *const reduc = *slot;
1744  edge e;
1745  gphi *new_phi;
1746  basic_block store_bb, continue_bb;
1747  tree local_res;
1748  location_t locus;
1749
1750  /* STORE_BB is the block where the phi
1751     should be stored.  It is the destination of the loop exit.
1752     (Find the fallthru edge from GIMPLE_OMP_CONTINUE).  */
1753  continue_bb = single_pred (loop->latch);
1754  store_bb = FALLTHRU_EDGE (continue_bb)->dest;
1755
1756  /* STORE_BB has two predecessors.  One coming from  the loop
1757     (the reduction's result is computed at the loop),
1758     and another coming from a block preceding the loop,
1759     when no iterations
1760     are executed (the initial value should be taken).  */
1761  if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
1762    e = EDGE_PRED (store_bb, 1);
1763  else
1764    e = EDGE_PRED (store_bb, 0);
1765  tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1766  local_res = copy_ssa_name (lhs);
1767  locus = gimple_location (reduc->reduc_stmt);
1768  new_phi = create_phi_node (local_res, store_bb);
1769  add_phi_arg (new_phi, reduc->init, e, locus);
1770  add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
1771  reduc->new_phi = new_phi;
1772
1773  return 1;
1774}
1775
1776struct clsn_data
1777{
1778  tree store;
1779  tree load;
1780
1781  basic_block store_bb;
1782  basic_block load_bb;
1783};
1784
1785/* Callback for htab_traverse.  Create an atomic instruction for the
1786   reduction described in SLOT.
1787   DATA annotates the place in memory the atomic operation relates to,
1788   and the basic block it needs to be generated in.  */
1789
1790int
1791create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1792{
1793  struct reduction_info *const reduc = *slot;
1794  gimple_stmt_iterator gsi;
1795  tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1796  tree load_struct;
1797  basic_block bb;
1798  basic_block new_bb;
1799  edge e;
1800  tree t, addr, ref, x;
1801  tree tmp_load, name;
1802  gimple *load;
1803
1804  if (reduc->reduc_addr == NULL_TREE)
1805    {
1806      load_struct = build_simple_mem_ref (clsn_data->load);
1807      t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1808
1809      addr = build_addr (t);
1810    }
1811  else
1812    {
1813      /* Set the address for the atomic store.  */
1814      addr = reduc->reduc_addr;
1815
1816      /* Remove the non-atomic store '*addr = sum'.  */
1817      tree res = PHI_RESULT (reduc->keep_res);
1818      use_operand_p use_p;
1819      gimple *stmt;
1820      bool single_use_p = single_imm_use (res, &use_p, &stmt);
1821      gcc_assert (single_use_p);
1822      replace_uses_by (gimple_vdef (stmt),
1823		       gimple_vuse (stmt));
1824      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1825      gsi_remove (&gsi, true);
1826    }
1827
1828  /* Create phi node.  */
1829  bb = clsn_data->load_bb;
1830
1831  gsi = gsi_last_bb (bb);
1832  e = split_block (bb, gsi_stmt (gsi));
1833  new_bb = e->dest;
1834
1835  tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1836  tmp_load = make_ssa_name (tmp_load);
1837  load = gimple_build_omp_atomic_load (tmp_load, addr,
1838				       OMP_MEMORY_ORDER_RELAXED);
1839  SSA_NAME_DEF_STMT (tmp_load) = load;
1840  gsi = gsi_start_bb (new_bb);
1841  gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1842
1843  e = split_block (new_bb, load);
1844  new_bb = e->dest;
1845  gsi = gsi_start_bb (new_bb);
1846  ref = tmp_load;
1847  x = fold_build2 (reduc->reduction_code,
1848		   TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1849		   PHI_RESULT (reduc->new_phi));
1850
1851  name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1852				   GSI_CONTINUE_LINKING);
1853
1854  gimple *store = gimple_build_omp_atomic_store (name,
1855						 OMP_MEMORY_ORDER_RELAXED);
1856  gsi_insert_after (&gsi, store, GSI_NEW_STMT);
1857  return 1;
1858}
1859
1860/* Create the atomic operation at the join point of the threads.
1861   REDUCTION_LIST describes the reductions in the LOOP.
1862   LD_ST_DATA describes the shared data structure where
1863   shared data is stored in and loaded from.  */
1864static void
1865create_call_for_reduction (class loop *loop,
1866			   reduction_info_table_type *reduction_list,
1867			   struct clsn_data *ld_st_data)
1868{
1869  reduction_list->traverse <class loop *, create_phi_for_local_result> (loop);
1870  /* Find the fallthru edge from GIMPLE_OMP_CONTINUE.  */
1871  basic_block continue_bb = single_pred (loop->latch);
1872  ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
1873  reduction_list
1874    ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1875}
1876
1877/* Callback for htab_traverse.  Loads the final reduction value at the
1878   join point of all threads, and inserts it in the right place.  */
1879
1880int
1881create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1882{
1883  struct reduction_info *const red = *slot;
1884  gimple *stmt;
1885  gimple_stmt_iterator gsi;
1886  tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1887  tree load_struct;
1888  tree name;
1889  tree x;
1890
1891  /* If there's no exit phi, the result of the reduction is unused.  */
1892  if (red->keep_res == NULL)
1893    return 1;
1894
1895  gsi = gsi_after_labels (clsn_data->load_bb);
1896  load_struct = build_simple_mem_ref (clsn_data->load);
1897  load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1898			NULL_TREE);
1899
1900  x = load_struct;
1901  name = PHI_RESULT (red->keep_res);
1902  stmt = gimple_build_assign (name, x);
1903
1904  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1905
1906  for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1907       !gsi_end_p (gsi); gsi_next (&gsi))
1908    if (gsi_stmt (gsi) == red->keep_res)
1909      {
1910	remove_phi_node (&gsi, false);
1911	return 1;
1912      }
1913  gcc_unreachable ();
1914}
1915
1916/* Load the reduction result that was stored in LD_ST_DATA.
1917   REDUCTION_LIST describes the list of reductions that the
1918   loads should be generated for.  */
1919static void
1920create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1921				  struct clsn_data *ld_st_data)
1922{
1923  gimple_stmt_iterator gsi;
1924  tree t;
1925  gimple *stmt;
1926
1927  gsi = gsi_after_labels (ld_st_data->load_bb);
1928  t = build_fold_addr_expr (ld_st_data->store);
1929  stmt = gimple_build_assign (ld_st_data->load, t);
1930
1931  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1932
1933  reduction_list
1934    ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1935
1936}
1937
1938/* Callback for htab_traverse.  Store the neutral value for the
1939  particular reduction's operation, e.g. 0 for PLUS_EXPR,
1940  1 for MULT_EXPR, etc. into the reduction field.
1941  The reduction is specified in SLOT. The store information is
1942  passed in DATA.  */
1943
1944int
1945create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1946{
1947  struct reduction_info *const red = *slot;
1948  tree t;
1949  gimple *stmt;
1950  gimple_stmt_iterator gsi;
1951  tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1952
1953  gsi = gsi_last_bb (clsn_data->store_bb);
1954  t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1955  stmt = gimple_build_assign (t, red->initial_value);
1956  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1957
1958  return 1;
1959}
1960
1961/* Callback for htab_traverse.  Creates loads to a field of LOAD in LOAD_BB and
1962   store to a field of STORE in STORE_BB for the ssa name and its duplicate
1963   specified in SLOT.  */
1964
1965int
1966create_loads_and_stores_for_name (name_to_copy_elt **slot,
1967				  struct clsn_data *clsn_data)
1968{
1969  struct name_to_copy_elt *const elt = *slot;
1970  tree t;
1971  gimple *stmt;
1972  gimple_stmt_iterator gsi;
1973  tree type = TREE_TYPE (elt->new_name);
1974  tree load_struct;
1975
1976  gsi = gsi_last_bb (clsn_data->store_bb);
1977  t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1978  stmt = gimple_build_assign (t, ssa_name (elt->version));
1979  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1980
1981  gsi = gsi_last_bb (clsn_data->load_bb);
1982  load_struct = build_simple_mem_ref (clsn_data->load);
1983  t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1984  stmt = gimple_build_assign (elt->new_name, t);
1985  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1986
1987  return 1;
1988}
1989
1990/* Moves all the variables used in LOOP and defined outside of it (including
1991   the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1992   name) to a structure created for this purpose.  The code
1993
1994   while (1)
1995     {
1996       use (a);
1997       use (b);
1998     }
1999
2000   is transformed this way:
2001
2002   bb0:
2003   old.a = a;
2004   old.b = b;
2005
2006   bb1:
2007   a' = new->a;
2008   b' = new->b;
2009   while (1)
2010     {
2011       use (a');
2012       use (b');
2013     }
2014
2015   `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT.  The
2016   pointer `new' is intentionally not initialized (the loop will be split to a
2017   separate function later, and `new' will be initialized from its arguments).
2018   LD_ST_DATA holds information about the shared data structure used to pass
2019   information among the threads.  It is initialized here, and
2020   gen_parallel_loop will pass it to create_call_for_reduction that
2021   needs this information.  REDUCTION_LIST describes the reductions
2022   in LOOP.  */
2023
2024static void
2025separate_decls_in_region (edge entry, edge exit,
2026			  reduction_info_table_type *reduction_list,
2027			  tree *arg_struct, tree *new_arg_struct,
2028			  struct clsn_data *ld_st_data)
2029
2030{
2031  basic_block bb1 = split_edge (entry);
2032  basic_block bb0 = single_pred (bb1);
2033  name_to_copy_table_type name_copies (10);
2034  int_tree_htab_type decl_copies (10);
2035  unsigned i;
2036  tree type, type_name, nvar;
2037  gimple_stmt_iterator gsi;
2038  struct clsn_data clsn_data;
2039  auto_vec<basic_block, 3> body;
2040  basic_block bb;
2041  basic_block entry_bb = bb1;
2042  basic_block exit_bb = exit->dest;
2043  bool has_debug_stmt = false;
2044
2045  entry = single_succ_edge (entry_bb);
2046  gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
2047
2048  FOR_EACH_VEC_ELT (body, i, bb)
2049    {
2050      if (bb != entry_bb && bb != exit_bb)
2051	{
2052	  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2053	    separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
2054					   &name_copies, &decl_copies);
2055
2056	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2057	    {
2058	      gimple *stmt = gsi_stmt (gsi);
2059
2060	      if (is_gimple_debug (stmt))
2061		has_debug_stmt = true;
2062	      else
2063		separate_decls_in_region_stmt (entry, exit, stmt,
2064					       &name_copies, &decl_copies);
2065	    }
2066	}
2067    }
2068
2069  /* Now process debug bind stmts.  We must not create decls while
2070     processing debug stmts, so we defer their processing so as to
2071     make sure we will have debug info for as many variables as
2072     possible (all of those that were dealt with in the loop above),
2073     and discard those for which we know there's nothing we can
2074     do.  */
2075  if (has_debug_stmt)
2076    FOR_EACH_VEC_ELT (body, i, bb)
2077      if (bb != entry_bb && bb != exit_bb)
2078	{
2079	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2080	    {
2081	      gimple *stmt = gsi_stmt (gsi);
2082
2083	      if (is_gimple_debug (stmt))
2084		{
2085		  if (separate_decls_in_region_debug (stmt, &name_copies,
2086						      &decl_copies))
2087		    {
2088		      gsi_remove (&gsi, true);
2089		      continue;
2090		    }
2091		}
2092
2093	      gsi_next (&gsi);
2094	    }
2095	}
2096
2097  if (name_copies.is_empty () && reduction_list->is_empty ())
2098    {
2099      /* It may happen that there is nothing to copy (if there are only
2100         loop carried and external variables in the loop).  */
2101      *arg_struct = NULL;
2102      *new_arg_struct = NULL;
2103    }
2104  else
2105    {
2106      /* Create the type for the structure to store the ssa names to.  */
2107      type = lang_hooks.types.make_type (RECORD_TYPE);
2108      type_name = build_decl (UNKNOWN_LOCATION,
2109			      TYPE_DECL, create_tmp_var_name (".paral_data"),
2110			      type);
2111      TYPE_NAME (type) = type_name;
2112
2113      name_copies.traverse <tree, add_field_for_name> (type);
2114      if (reduction_list && !reduction_list->is_empty ())
2115	{
2116	  /* Create the fields for reductions.  */
2117	  reduction_list->traverse <tree, add_field_for_reduction> (type);
2118	}
2119      layout_type (type);
2120
2121      /* Create the loads and stores.  */
2122      *arg_struct = create_tmp_var (type, ".paral_data_store");
2123      nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
2124      *new_arg_struct = make_ssa_name (nvar);
2125
2126      ld_st_data->store = *arg_struct;
2127      ld_st_data->load = *new_arg_struct;
2128      ld_st_data->store_bb = bb0;
2129      ld_st_data->load_bb = bb1;
2130
2131      name_copies
2132	.traverse <struct clsn_data *, create_loads_and_stores_for_name>
2133		  (ld_st_data);
2134
2135      /* Load the calculation from memory (after the join of the threads).  */
2136
2137      if (reduction_list && !reduction_list->is_empty ())
2138	{
2139	  reduction_list
2140	    ->traverse <struct clsn_data *, create_stores_for_reduction>
2141	    (ld_st_data);
2142	  clsn_data.load = make_ssa_name (nvar);
2143	  clsn_data.load_bb = exit->dest;
2144	  clsn_data.store = ld_st_data->store;
2145	  create_final_loads_for_reduction (reduction_list, &clsn_data);
2146	}
2147    }
2148}
2149
2150/* Returns true if FN was created to run in parallel.  */
2151
2152bool
2153parallelized_function_p (tree fndecl)
2154{
2155  cgraph_node *node = cgraph_node::get (fndecl);
2156  gcc_assert (node != NULL);
2157  return node->parallelized_function;
2158}
2159
2160/* Creates and returns an empty function that will receive the body of
2161   a parallelized loop.  */
2162
2163static tree
2164create_loop_fn (location_t loc)
2165{
2166  char buf[100];
2167  char *tname;
2168  tree decl, type, name, t;
2169  struct function *act_cfun = cfun;
2170  static unsigned loopfn_num;
2171
2172  loc = LOCATION_LOCUS (loc);
2173  snprintf (buf, 100, "%s.$loopfn", current_function_name ());
2174  ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
2175  clean_symbol_name (tname);
2176  name = get_identifier (tname);
2177  type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
2178
2179  decl = build_decl (loc, FUNCTION_DECL, name, type);
2180  TREE_STATIC (decl) = 1;
2181  TREE_USED (decl) = 1;
2182  DECL_ARTIFICIAL (decl) = 1;
2183  DECL_IGNORED_P (decl) = 0;
2184  TREE_PUBLIC (decl) = 0;
2185  DECL_UNINLINABLE (decl) = 1;
2186  DECL_EXTERNAL (decl) = 0;
2187  DECL_CONTEXT (decl) = NULL_TREE;
2188  DECL_INITIAL (decl) = make_node (BLOCK);
2189  BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl;
2190
2191  t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
2192  DECL_ARTIFICIAL (t) = 1;
2193  DECL_IGNORED_P (t) = 1;
2194  DECL_RESULT (decl) = t;
2195
2196  t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
2197		  ptr_type_node);
2198  DECL_ARTIFICIAL (t) = 1;
2199  DECL_ARG_TYPE (t) = ptr_type_node;
2200  DECL_CONTEXT (t) = decl;
2201  TREE_USED (t) = 1;
2202  DECL_ARGUMENTS (decl) = t;
2203
2204  allocate_struct_function (decl, false);
2205
2206  /* The call to allocate_struct_function clobbers CFUN, so we need to restore
2207     it.  */
2208  set_cfun (act_cfun);
2209
2210  return decl;
2211}
2212
2213/* Replace uses of NAME by VAL in block BB.  */
2214
2215static void
2216replace_uses_in_bb_by (tree name, tree val, basic_block bb)
2217{
2218  gimple *use_stmt;
2219  imm_use_iterator imm_iter;
2220
2221  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
2222    {
2223      if (gimple_bb (use_stmt) != bb)
2224	continue;
2225
2226      use_operand_p use_p;
2227      FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2228	SET_USE (use_p, val);
2229    }
2230}
2231
2232/* Do transformation from:
2233
2234     <bb preheader>:
2235     ...
2236     goto <bb header>
2237
2238     <bb header>:
2239     ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2240     sum_a = PHI <sum_init (preheader), sum_b (latch)>
2241     ...
2242     use (ivtmp_a)
2243     ...
2244     sum_b = sum_a + sum_update
2245     ...
2246     if (ivtmp_a < n)
2247       goto <bb latch>;
2248     else
2249       goto <bb exit>;
2250
2251     <bb latch>:
2252     ivtmp_b = ivtmp_a + 1;
2253     goto <bb header>
2254
2255     <bb exit>:
2256     sum_z = PHI <sum_b (cond[1]), ...>
2257
2258     [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
2259	 that's <bb header>.
2260
2261   to:
2262
2263     <bb preheader>:
2264     ...
2265     goto <bb newheader>
2266
2267     <bb header>:
2268     ivtmp_a = PHI <ivtmp_c (latch)>
2269     sum_a = PHI <sum_c (latch)>
2270     ...
2271     use (ivtmp_a)
2272     ...
2273     sum_b = sum_a + sum_update
2274     ...
2275     goto <bb latch>;
2276
2277     <bb newheader>:
2278     ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2279     sum_c = PHI <sum_init (preheader), sum_b (latch)>
2280     if (ivtmp_c < n + 1)
2281       goto <bb header>;
2282     else
2283       goto <bb newexit>;
2284
2285     <bb latch>:
2286     ivtmp_b = ivtmp_a + 1;
2287     goto <bb newheader>
2288
2289     <bb newexit>:
2290     sum_y = PHI <sum_c (newheader)>
2291
2292     <bb exit>:
2293     sum_z = PHI <sum_y (newexit), ...>
2294
2295
2296   In unified diff format:
2297
2298      <bb preheader>:
2299      ...
2300-     goto <bb header>
2301+     goto <bb newheader>
2302
2303      <bb header>:
2304-     ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2305-     sum_a = PHI <sum_init (preheader), sum_b (latch)>
2306+     ivtmp_a = PHI <ivtmp_c (latch)>
2307+     sum_a = PHI <sum_c (latch)>
2308      ...
2309      use (ivtmp_a)
2310      ...
2311      sum_b = sum_a + sum_update
2312      ...
2313-     if (ivtmp_a < n)
2314-       goto <bb latch>;
2315+     goto <bb latch>;
2316+
2317+     <bb newheader>:
2318+     ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2319+     sum_c = PHI <sum_init (preheader), sum_b (latch)>
2320+     if (ivtmp_c < n + 1)
2321+       goto <bb header>;
2322      else
2323	goto <bb exit>;
2324
2325      <bb latch>:
2326      ivtmp_b = ivtmp_a + 1;
2327-     goto <bb header>
2328+     goto <bb newheader>
2329
2330+    <bb newexit>:
2331+    sum_y = PHI <sum_c (newheader)>
2332
2333      <bb exit>:
2334-     sum_z = PHI <sum_b (cond[1]), ...>
2335+     sum_z = PHI <sum_y (newexit), ...>
2336
2337   Note: the example does not show any virtual phis, but these are handled more
2338   or less as reductions.
2339
2340
2341   Moves the exit condition of LOOP to the beginning of its header.
2342   REDUCTION_LIST describes the reductions in LOOP.  BOUND is the new loop
2343   bound.  */
2344
2345static void
2346transform_to_exit_first_loop_alt (class loop *loop,
2347				  reduction_info_table_type *reduction_list,
2348				  tree bound)
2349{
2350  basic_block header = loop->header;
2351  basic_block latch = loop->latch;
2352  edge exit = single_dom_exit (loop);
2353  basic_block exit_block = exit->dest;
2354  gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2355  tree control = gimple_cond_lhs (cond_stmt);
2356  edge e;
2357
2358  /* Rewriting virtuals into loop-closed ssa normal form makes this
2359     transformation simpler.  It also ensures that the virtuals are in
2360     loop-closed ssa normal from after the transformation, which is required by
2361     create_parallel_loop.  */
2362  rewrite_virtuals_into_loop_closed_ssa (loop);
2363
2364  /* Create the new_header block.  */
2365  basic_block new_header = split_block_before_cond_jump (exit->src);
2366  edge edge_at_split = single_pred_edge (new_header);
2367
2368  /* Redirect entry edge to new_header.  */
2369  edge entry = loop_preheader_edge (loop);
2370  e = redirect_edge_and_branch (entry, new_header);
2371  gcc_assert (e == entry);
2372
2373  /* Redirect post_inc_edge to new_header.  */
2374  edge post_inc_edge = single_succ_edge (latch);
2375  e = redirect_edge_and_branch (post_inc_edge, new_header);
2376  gcc_assert (e == post_inc_edge);
2377
2378  /* Redirect post_cond_edge to header.  */
2379  edge post_cond_edge = single_pred_edge (latch);
2380  e = redirect_edge_and_branch (post_cond_edge, header);
2381  gcc_assert (e == post_cond_edge);
2382
2383  /* Redirect edge_at_split to latch.  */
2384  e = redirect_edge_and_branch (edge_at_split, latch);
2385  gcc_assert (e == edge_at_split);
2386
2387  /* Set the new loop bound.  */
2388  gimple_cond_set_rhs (cond_stmt, bound);
2389  update_stmt (cond_stmt);
2390
2391  /* Repair the ssa.  */
2392  vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
2393  edge_var_map *vm;
2394  gphi_iterator gsi;
2395  int i;
2396  for (gsi = gsi_start_phis (header), i = 0;
2397       !gsi_end_p (gsi) && v->iterate (i, &vm);
2398       gsi_next (&gsi), i++)
2399    {
2400      gphi *phi = gsi.phi ();
2401      tree res_a = PHI_RESULT (phi);
2402
2403      /* Create new phi.  */
2404      tree res_c = copy_ssa_name (res_a, phi);
2405      gphi *nphi = create_phi_node (res_c, new_header);
2406
2407      /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'.  */
2408      replace_uses_in_bb_by (res_a, res_c, new_header);
2409
2410      /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi.  */
2411      add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
2412
2413      /* Replace sum_b with sum_c in exit phi.  */
2414      tree res_b = redirect_edge_var_map_def (vm);
2415      replace_uses_in_bb_by (res_b, res_c, exit_block);
2416
2417      struct reduction_info *red = reduction_phi (reduction_list, phi);
2418      gcc_assert (virtual_operand_p (res_a)
2419		  || res_a == control
2420		  || red != NULL);
2421
2422      if (red)
2423	{
2424	  /* Register the new reduction phi.  */
2425	  red->reduc_phi = nphi;
2426	  gimple_set_uid (red->reduc_phi, red->reduc_version);
2427	}
2428    }
2429  gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
2430
2431  /* Set the preheader argument of the new phis to ivtmp/sum_init.  */
2432  flush_pending_stmts (entry);
2433
2434  /* Set the latch arguments of the new phis to ivtmp/sum_b.  */
2435  flush_pending_stmts (post_inc_edge);
2436
2437
2438  basic_block new_exit_block = NULL;
2439  if (!single_pred_p (exit->dest))
2440    {
2441      /* Create a new empty exit block, inbetween the new loop header and the
2442	 old exit block.  The function separate_decls_in_region needs this block
2443	 to insert code that is active on loop exit, but not any other path.  */
2444      new_exit_block = split_edge (exit);
2445    }
2446
2447  /* Insert and register the reduction exit phis.  */
2448  for (gphi_iterator gsi = gsi_start_phis (exit_block);
2449       !gsi_end_p (gsi);
2450       gsi_next (&gsi))
2451    {
2452      gphi *phi = gsi.phi ();
2453      gphi *nphi = NULL;
2454      tree res_z = PHI_RESULT (phi);
2455      tree res_c;
2456
2457      if (new_exit_block != NULL)
2458	{
2459	  /* Now that we have a new exit block, duplicate the phi of the old
2460	     exit block in the new exit block to preserve loop-closed ssa.  */
2461	  edge succ_new_exit_block = single_succ_edge (new_exit_block);
2462	  edge pred_new_exit_block = single_pred_edge (new_exit_block);
2463	  tree res_y = copy_ssa_name (res_z, phi);
2464	  nphi = create_phi_node (res_y, new_exit_block);
2465	  res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
2466	  add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
2467	  add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
2468	}
2469      else
2470	res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2471
2472      if (virtual_operand_p (res_z))
2473	continue;
2474
2475      gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
2476      struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
2477      if (red != NULL)
2478	red->keep_res = (nphi != NULL
2479			 ? nphi
2480			 : phi);
2481    }
2482
2483  /* We're going to cancel the loop at the end of gen_parallel_loop, but until
2484     then we're still using some fields, so only bother about fields that are
2485     still used: header and latch.
2486     The loop has a new header bb, so we update it.  The latch bb stays the
2487     same.  */
2488  loop->header = new_header;
2489
2490  /* Recalculate dominance info.  */
2491  free_dominance_info (CDI_DOMINATORS);
2492  calculate_dominance_info (CDI_DOMINATORS);
2493
2494  checking_verify_ssa (true, true);
2495}
2496
2497/* Tries to moves the exit condition of LOOP to the beginning of its header
2498   without duplication of the loop body.  NIT is the number of iterations of the
2499   loop.  REDUCTION_LIST describes the reductions in LOOP.  Return true if
2500   transformation is successful.  */
2501
2502static bool
2503try_transform_to_exit_first_loop_alt (class loop *loop,
2504				      reduction_info_table_type *reduction_list,
2505				      tree nit)
2506{
2507  /* Check whether the latch contains a single statement.  */
2508  if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
2509    return false;
2510
2511  /* Check whether the latch contains no phis.  */
2512  if (phi_nodes (loop->latch) != NULL)
2513    return false;
2514
2515  /* Check whether the latch contains the loop iv increment.  */
2516  edge back = single_succ_edge (loop->latch);
2517  edge exit = single_dom_exit (loop);
2518  gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2519  tree control = gimple_cond_lhs (cond_stmt);
2520  gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
2521  tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
2522  if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
2523    return false;
2524
2525  /* Check whether there's no code between the loop condition and the latch.  */
2526  if (!single_pred_p (loop->latch)
2527      || single_pred (loop->latch) != exit->src)
2528    return false;
2529
2530  tree alt_bound = NULL_TREE;
2531  tree nit_type = TREE_TYPE (nit);
2532
2533  /* Figure out whether nit + 1 overflows.  */
2534  if (TREE_CODE (nit) == INTEGER_CST)
2535    {
2536      if (!tree_int_cst_equal (nit, TYPE_MAX_VALUE (nit_type)))
2537	{
2538	  alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
2539				       nit, build_one_cst (nit_type));
2540
2541	  gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
2542	  transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2543	  return true;
2544	}
2545      else
2546	{
2547	  /* Todo: Figure out if we can trigger this, if it's worth to handle
2548	     optimally, and if we can handle it optimally.  */
2549	  return false;
2550	}
2551    }
2552
2553  gcc_assert (TREE_CODE (nit) == SSA_NAME);
2554
2555  /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
2556     iv with base 0 and step 1 that is incremented in the latch, like this:
2557
2558     <bb header>:
2559     # iv_1 = PHI <0 (preheader), iv_2 (latch)>
2560     ...
2561     if (iv_1 < nit)
2562       goto <bb latch>;
2563     else
2564       goto <bb exit>;
2565
2566     <bb latch>:
2567     iv_2 = iv_1 + 1;
2568     goto <bb header>;
2569
2570     The range of iv_1 is [0, nit].  The latch edge is taken for
2571     iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit.  So the
2572     number of latch executions is equal to nit.
2573
2574     The function max_loop_iterations gives us the maximum number of latch
2575     executions, so it gives us the maximum value of nit.  */
2576  widest_int nit_max;
2577  if (!max_loop_iterations (loop, &nit_max))
2578    return false;
2579
2580  /* Check if nit + 1 overflows.  */
2581  widest_int type_max = wi::to_widest (TYPE_MAX_VALUE (nit_type));
2582  if (nit_max >= type_max)
2583    return false;
2584
2585  gimple *def = SSA_NAME_DEF_STMT (nit);
2586
2587  /* Try to find nit + 1, in the form of n in an assignment nit = n - 1.  */
2588  if (def
2589      && is_gimple_assign (def)
2590      && gimple_assign_rhs_code (def) == PLUS_EXPR)
2591    {
2592      tree op1 = gimple_assign_rhs1 (def);
2593      tree op2 = gimple_assign_rhs2 (def);
2594      if (integer_minus_onep (op1))
2595	alt_bound = op2;
2596      else if (integer_minus_onep (op2))
2597	alt_bound = op1;
2598    }
2599
2600  /* If not found, insert nit + 1.  */
2601  if (alt_bound == NULL_TREE)
2602    {
2603      alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
2604			       build_int_cst_type (nit_type, 1));
2605
2606      gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
2607
2608      alt_bound
2609	= force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
2610				    GSI_CONTINUE_LINKING);
2611    }
2612
2613  transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2614  return true;
2615}
2616
2617/* Moves the exit condition of LOOP to the beginning of its header.  NIT is the
2618   number of iterations of the loop.  REDUCTION_LIST describes the reductions in
2619   LOOP.  */
2620
2621static void
2622transform_to_exit_first_loop (class loop *loop,
2623			      reduction_info_table_type *reduction_list,
2624			      tree nit)
2625{
2626  basic_block *bbs, *nbbs, ex_bb, orig_header;
2627  unsigned n;
2628  bool ok;
2629  edge exit = single_dom_exit (loop), hpred;
2630  tree control, control_name, res, t;
2631  gphi *phi, *nphi;
2632  gassign *stmt;
2633  gcond *cond_stmt, *cond_nit;
2634  tree nit_1;
2635
2636  split_block_after_labels (loop->header);
2637  orig_header = single_succ (loop->header);
2638  hpred = single_succ_edge (loop->header);
2639
2640  cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2641  control = gimple_cond_lhs (cond_stmt);
2642  gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
2643
2644  /* Make sure that we have phi nodes on exit for all loop header phis
2645     (create_parallel_loop requires that).  */
2646  for (gphi_iterator gsi = gsi_start_phis (loop->header);
2647       !gsi_end_p (gsi);
2648       gsi_next (&gsi))
2649    {
2650      phi = gsi.phi ();
2651      res = PHI_RESULT (phi);
2652      t = copy_ssa_name (res, phi);
2653      SET_PHI_RESULT (phi, t);
2654      nphi = create_phi_node (res, orig_header);
2655      add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
2656
2657      if (res == control)
2658	{
2659	  gimple_cond_set_lhs (cond_stmt, t);
2660	  update_stmt (cond_stmt);
2661	  control = t;
2662	}
2663    }
2664
2665  bbs = get_loop_body_in_dom_order (loop);
2666
2667  for (n = 0; bbs[n] != exit->src; n++)
2668   continue;
2669  nbbs = XNEWVEC (basic_block, n);
2670  ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
2671				   bbs + 1, n, nbbs);
2672  gcc_assert (ok);
2673  free (bbs);
2674  ex_bb = nbbs[0];
2675  free (nbbs);
2676
2677  /* Other than reductions, the only gimple reg that should be copied
2678     out of the loop is the control variable.  */
2679  exit = single_dom_exit (loop);
2680  control_name = NULL_TREE;
2681  for (gphi_iterator gsi = gsi_start_phis (ex_bb);
2682       !gsi_end_p (gsi); )
2683    {
2684      phi = gsi.phi ();
2685      res = PHI_RESULT (phi);
2686      if (virtual_operand_p (res))
2687	{
2688	  gsi_next (&gsi);
2689	  continue;
2690	}
2691
2692      /* Check if it is a part of reduction.  If it is,
2693         keep the phi at the reduction's keep_res field.  The
2694         PHI_RESULT of this phi is the resulting value of the reduction
2695         variable when exiting the loop.  */
2696
2697      if (!reduction_list->is_empty ())
2698	{
2699	  struct reduction_info *red;
2700
2701	  tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2702	  red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
2703	  if (red)
2704	    {
2705	      red->keep_res = phi;
2706	      gsi_next (&gsi);
2707	      continue;
2708	    }
2709	}
2710      gcc_assert (control_name == NULL_TREE
2711		  && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
2712      control_name = res;
2713      remove_phi_node (&gsi, false);
2714    }
2715  gcc_assert (control_name != NULL_TREE);
2716
2717  /* Initialize the control variable to number of iterations
2718     according to the rhs of the exit condition.  */
2719  gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
2720  cond_nit = as_a <gcond *> (last_stmt (exit->src));
2721  nit_1 =  gimple_cond_rhs (cond_nit);
2722  nit_1 = force_gimple_operand_gsi (&gsi,
2723				  fold_convert (TREE_TYPE (control_name), nit_1),
2724				  false, NULL_TREE, false, GSI_SAME_STMT);
2725  stmt = gimple_build_assign (control_name, nit_1);
2726  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
2727}
2728
2729/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
2730   LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
2731   NEW_DATA is the variable that should be initialized from the argument
2732   of LOOP_FN.  N_THREADS is the requested number of threads, which can be 0 if
2733   that number is to be determined later.  */
2734
2735static void
2736create_parallel_loop (class loop *loop, tree loop_fn, tree data,
2737		      tree new_data, unsigned n_threads, location_t loc,
2738		      bool oacc_kernels_p)
2739{
2740  gimple_stmt_iterator gsi;
2741  basic_block for_bb, ex_bb, continue_bb;
2742  tree t, param;
2743  gomp_parallel *omp_par_stmt;
2744  gimple *omp_return_stmt1, *omp_return_stmt2;
2745  gimple *phi;
2746  gcond *cond_stmt;
2747  gomp_for *for_stmt;
2748  gomp_continue *omp_cont_stmt;
2749  tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
2750  edge exit, nexit, guard, end, e;
2751
2752  if (oacc_kernels_p)
2753    {
2754      gcc_checking_assert (lookup_attribute ("oacc kernels",
2755					     DECL_ATTRIBUTES (cfun->decl)));
2756      /* Indicate to later processing that this is a parallelized OpenACC
2757	 kernels construct.  */
2758      DECL_ATTRIBUTES (cfun->decl)
2759	= tree_cons (get_identifier ("oacc kernels parallelized"),
2760		     NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
2761    }
2762  else
2763    {
2764      /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
2765
2766      basic_block bb = loop_preheader_edge (loop)->src;
2767      basic_block paral_bb = single_pred (bb);
2768      gsi = gsi_last_bb (paral_bb);
2769
2770      gcc_checking_assert (n_threads != 0);
2771      t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2772      OMP_CLAUSE_NUM_THREADS_EXPR (t)
2773	= build_int_cst (integer_type_node, n_threads);
2774      omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2775      gimple_set_location (omp_par_stmt, loc);
2776
2777      gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
2778
2779      /* Initialize NEW_DATA.  */
2780      if (data)
2781	{
2782	  gassign *assign_stmt;
2783
2784	  gsi = gsi_after_labels (bb);
2785
2786	  param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2787	  assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2788	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2789
2790	  assign_stmt = gimple_build_assign (new_data,
2791					     fold_convert (TREE_TYPE (new_data), param));
2792	  gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2793	}
2794
2795      /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL.  */
2796      bb = split_loop_exit_edge (single_dom_exit (loop));
2797      gsi = gsi_last_bb (bb);
2798      omp_return_stmt1 = gimple_build_omp_return (false);
2799      gimple_set_location (omp_return_stmt1, loc);
2800      gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2801    }
2802
2803  /* Extract data for GIMPLE_OMP_FOR.  */
2804  gcc_assert (loop->header == single_dom_exit (loop)->src);
2805  cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2806
2807  cvar = gimple_cond_lhs (cond_stmt);
2808  cvar_base = SSA_NAME_VAR (cvar);
2809  phi = SSA_NAME_DEF_STMT (cvar);
2810  cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2811  initvar = copy_ssa_name (cvar);
2812  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2813	   initvar);
2814  cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2815
2816  gsi = gsi_last_nondebug_bb (loop->latch);
2817  gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2818  gsi_remove (&gsi, true);
2819
2820  /* Prepare cfg.  */
2821  for_bb = split_edge (loop_preheader_edge (loop));
2822  ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2823  extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2824  gcc_assert (exit == single_dom_exit (loop));
2825
2826  guard = make_edge (for_bb, ex_bb, 0);
2827  /* FIXME: What is the probability?  */
2828  guard->probability = profile_probability::guessed_never ();
2829  /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid.  */
2830  loop->latch = split_edge (single_succ_edge (loop->latch));
2831  single_pred_edge (loop->latch)->flags = 0;
2832  end = make_single_succ_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
2833  rescan_loop_exit (end, true, false);
2834
2835  for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2836       !gsi_end_p (gpi); gsi_next (&gpi))
2837    {
2838      location_t locus;
2839      gphi *phi = gpi.phi ();
2840      tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2841      gimple *def_stmt = SSA_NAME_DEF_STMT (def);
2842
2843      /* If the exit phi is not connected to a header phi in the same loop, this
2844	 value is not modified in the loop, and we're done with this phi.  */
2845      if (!(gimple_code (def_stmt) == GIMPLE_PHI
2846	    && gimple_bb (def_stmt) == loop->header))
2847	{
2848	  locus = gimple_phi_arg_location_from_edge (phi, exit);
2849	  add_phi_arg (phi, def, guard, locus);
2850	  add_phi_arg (phi, def, end, locus);
2851	  continue;
2852	}
2853
2854      gphi *stmt = as_a <gphi *> (def_stmt);
2855      def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2856      locus = gimple_phi_arg_location_from_edge (stmt,
2857						 loop_preheader_edge (loop));
2858      add_phi_arg (phi, def, guard, locus);
2859
2860      def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2861      locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2862      add_phi_arg (phi, def, end, locus);
2863    }
2864  e = redirect_edge_and_branch (exit, nexit->dest);
2865  PENDING_STMT (e) = NULL;
2866
2867  /* Emit GIMPLE_OMP_FOR.  */
2868  if (oacc_kernels_p)
2869    /* Parallelized OpenACC kernels constructs use gang parallelism.  See also
2870       omp-offload.c:execute_oacc_device_lower.  */
2871    t = build_omp_clause (loc, OMP_CLAUSE_GANG);
2872  else
2873    {
2874      t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2875      int chunk_size = param_parloops_chunk_size;
2876      switch (param_parloops_schedule)
2877	{
2878	case PARLOOPS_SCHEDULE_STATIC:
2879	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2880	  break;
2881	case PARLOOPS_SCHEDULE_DYNAMIC:
2882	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2883	  break;
2884	case PARLOOPS_SCHEDULE_GUIDED:
2885	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2886	  break;
2887	case PARLOOPS_SCHEDULE_AUTO:
2888	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2889	  chunk_size = 0;
2890	  break;
2891	case PARLOOPS_SCHEDULE_RUNTIME:
2892	  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2893	  chunk_size = 0;
2894	  break;
2895	default:
2896	  gcc_unreachable ();
2897	}
2898      if (chunk_size != 0)
2899	OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2900	  = build_int_cst (integer_type_node, chunk_size);
2901    }
2902
2903  for_stmt = gimple_build_omp_for (NULL,
2904				   (oacc_kernels_p
2905				    ? GF_OMP_FOR_KIND_OACC_LOOP
2906				    : GF_OMP_FOR_KIND_FOR),
2907				   t, 1, NULL);
2908
2909  gimple_cond_set_lhs (cond_stmt, cvar_base);
2910  type = TREE_TYPE (cvar);
2911  gimple_set_location (for_stmt, loc);
2912  gimple_omp_for_set_index (for_stmt, 0, initvar);
2913  gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2914  gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2915  gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2916  gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2917						cvar_base,
2918						build_int_cst (type, 1)));
2919
2920  gsi = gsi_last_bb (for_bb);
2921  gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2922  SSA_NAME_DEF_STMT (initvar) = for_stmt;
2923
2924  /* Emit GIMPLE_OMP_CONTINUE.  */
2925  continue_bb = single_pred (loop->latch);
2926  gsi = gsi_last_bb (continue_bb);
2927  omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2928  gimple_set_location (omp_cont_stmt, loc);
2929  gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2930  SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2931
2932  /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR.  */
2933  gsi = gsi_last_bb (ex_bb);
2934  omp_return_stmt2 = gimple_build_omp_return (true);
2935  gimple_set_location (omp_return_stmt2, loc);
2936  gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2937
2938  /* After the above dom info is hosed.  Re-compute it.  */
2939  free_dominance_info (CDI_DOMINATORS);
2940  calculate_dominance_info (CDI_DOMINATORS);
2941}
2942
2943/* Return number of phis in bb.  If COUNT_VIRTUAL_P is false, don't count the
2944   virtual phi.  */
2945
2946static unsigned int
2947num_phis (basic_block bb, bool count_virtual_p)
2948{
2949  unsigned int nr_phis = 0;
2950  gphi_iterator gsi;
2951  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2952    {
2953      if (!count_virtual_p && virtual_operand_p (PHI_RESULT (gsi.phi ())))
2954	continue;
2955
2956      nr_phis++;
2957    }
2958
2959  return nr_phis;
2960}
2961
2962/* Generates code to execute the iterations of LOOP in N_THREADS
2963   threads in parallel, which can be 0 if that number is to be determined
2964   later.
2965
2966   NITER describes number of iterations of LOOP.
2967   REDUCTION_LIST describes the reductions existent in the LOOP.  */
2968
2969static void
2970gen_parallel_loop (class loop *loop,
2971		   reduction_info_table_type *reduction_list,
2972		   unsigned n_threads, class tree_niter_desc *niter,
2973		   bool oacc_kernels_p)
2974{
2975  tree many_iterations_cond, type, nit;
2976  tree arg_struct, new_arg_struct;
2977  gimple_seq stmts;
2978  edge entry, exit;
2979  struct clsn_data clsn_data;
2980  location_t loc;
2981  gimple *cond_stmt;
2982  unsigned int m_p_thread=2;
2983
2984  /* From
2985
2986     ---------------------------------------------------------------------
2987     loop
2988       {
2989	 IV = phi (INIT, IV + STEP)
2990	 BODY1;
2991	 if (COND)
2992	   break;
2993	 BODY2;
2994       }
2995     ---------------------------------------------------------------------
2996
2997     with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2998     we generate the following code:
2999
3000     ---------------------------------------------------------------------
3001
3002     if (MAY_BE_ZERO
3003     || NITER < MIN_PER_THREAD * N_THREADS)
3004     goto original;
3005
3006     BODY1;
3007     store all local loop-invariant variables used in body of the loop to DATA.
3008     GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
3009     load the variables from DATA.
3010     GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
3011     BODY2;
3012     BODY1;
3013     GIMPLE_OMP_CONTINUE;
3014     GIMPLE_OMP_RETURN         -- GIMPLE_OMP_FOR
3015     GIMPLE_OMP_RETURN         -- GIMPLE_OMP_PARALLEL
3016     goto end;
3017
3018     original:
3019     loop
3020       {
3021	 IV = phi (INIT, IV + STEP)
3022	 BODY1;
3023	 if (COND)
3024	   break;
3025	 BODY2;
3026       }
3027
3028     end:
3029
3030   */
3031
3032  /* Create two versions of the loop -- in the old one, we know that the
3033     number of iterations is large enough, and we will transform it into the
3034     loop that will be split to loop_fn, the new one will be used for the
3035     remaining iterations.  */
3036
3037  /* We should compute a better number-of-iterations value for outer loops.
3038     That is, if we have
3039
3040    for (i = 0; i < n; ++i)
3041      for (j = 0; j < m; ++j)
3042        ...
3043
3044    we should compute nit = n * m, not nit = n.
3045    Also may_be_zero handling would need to be adjusted.  */
3046
3047  type = TREE_TYPE (niter->niter);
3048  nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
3049			      NULL_TREE);
3050  if (stmts)
3051    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3052
3053  if (!oacc_kernels_p)
3054    {
3055      if (loop->inner)
3056	m_p_thread=2;
3057      else
3058	m_p_thread=MIN_PER_THREAD;
3059
3060      gcc_checking_assert (n_threads != 0);
3061      many_iterations_cond =
3062	fold_build2 (GE_EXPR, boolean_type_node,
3063		     nit, build_int_cst (type, m_p_thread * n_threads - 1));
3064
3065      many_iterations_cond
3066	= fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3067		       invert_truthvalue (unshare_expr (niter->may_be_zero)),
3068		       many_iterations_cond);
3069      many_iterations_cond
3070	= force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
3071      if (stmts)
3072	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3073      if (!is_gimple_condexpr (many_iterations_cond))
3074	{
3075	  many_iterations_cond
3076	    = force_gimple_operand (many_iterations_cond, &stmts,
3077				    true, NULL_TREE);
3078	  if (stmts)
3079	    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop),
3080					      stmts);
3081	}
3082
3083      initialize_original_copy_tables ();
3084
3085      /* We assume that the loop usually iterates a lot.  */
3086      loop_version (loop, many_iterations_cond, NULL,
3087		    profile_probability::likely (),
3088		    profile_probability::unlikely (),
3089		    profile_probability::likely (),
3090		    profile_probability::unlikely (), true);
3091      update_ssa (TODO_update_ssa);
3092      free_original_copy_tables ();
3093    }
3094
3095  /* Base all the induction variables in LOOP on a single control one.  */
3096  canonicalize_loop_ivs (loop, &nit, true);
3097  if (num_phis (loop->header, false) != reduction_list->elements () + 1)
3098    {
3099      /* The call to canonicalize_loop_ivs above failed to "base all the
3100	 induction variables in LOOP on a single control one".  Do damage
3101	 control.  */
3102      basic_block preheader = loop_preheader_edge (loop)->src;
3103      basic_block cond_bb = single_pred (preheader);
3104      gcond *cond = as_a <gcond *> (gsi_stmt (gsi_last_bb (cond_bb)));
3105      gimple_cond_make_true (cond);
3106      update_stmt (cond);
3107      /* We've gotten rid of the duplicate loop created by loop_version, but
3108	 we can't undo whatever canonicalize_loop_ivs has done.
3109	 TODO: Fix this properly by ensuring that the call to
3110	 canonicalize_loop_ivs succeeds.  */
3111      if (dump_file
3112	  && (dump_flags & TDF_DETAILS))
3113	fprintf (dump_file, "canonicalize_loop_ivs failed for loop %d,"
3114		 " aborting transformation\n", loop->num);
3115      return;
3116    }
3117
3118  /* Ensure that the exit condition is the first statement in the loop.
3119     The common case is that latch of the loop is empty (apart from the
3120     increment) and immediately follows the loop exit test.  Attempt to move the
3121     entry of the loop directly before the exit check and increase the number of
3122     iterations of the loop by one.  */
3123  if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
3124    {
3125      if (dump_file
3126	  && (dump_flags & TDF_DETAILS))
3127	fprintf (dump_file,
3128		 "alternative exit-first loop transform succeeded"
3129		 " for loop %d\n", loop->num);
3130    }
3131  else
3132    {
3133      if (oacc_kernels_p)
3134	n_threads = 1;
3135
3136      /* Fall back on the method that handles more cases, but duplicates the
3137	 loop body: move the exit condition of LOOP to the beginning of its
3138	 header, and duplicate the part of the last iteration that gets disabled
3139	 to the exit of the loop.  */
3140      transform_to_exit_first_loop (loop, reduction_list, nit);
3141    }
3142
3143  /* Generate initializations for reductions.  */
3144  if (!reduction_list->is_empty ())
3145    reduction_list->traverse <class loop *, initialize_reductions> (loop);
3146
3147  /* Eliminate the references to local variables from the loop.  */
3148  gcc_assert (single_exit (loop));
3149  entry = loop_preheader_edge (loop);
3150  exit = single_dom_exit (loop);
3151
3152  /* This rewrites the body in terms of new variables.  This has already
3153     been done for oacc_kernels_p in pass_lower_omp/lower_omp ().  */
3154  if (!oacc_kernels_p)
3155    {
3156      eliminate_local_variables (entry, exit);
3157      /* In the old loop, move all variables non-local to the loop to a
3158	 structure and back, and create separate decls for the variables used in
3159	 loop.  */
3160      separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
3161				&new_arg_struct, &clsn_data);
3162    }
3163  else
3164    {
3165      arg_struct = NULL_TREE;
3166      new_arg_struct = NULL_TREE;
3167      clsn_data.load = NULL_TREE;
3168      clsn_data.load_bb = exit->dest;
3169      clsn_data.store = NULL_TREE;
3170      clsn_data.store_bb = NULL;
3171    }
3172
3173  /* Create the parallel constructs.  */
3174  loc = UNKNOWN_LOCATION;
3175  cond_stmt = last_stmt (loop->header);
3176  if (cond_stmt)
3177    loc = gimple_location (cond_stmt);
3178  create_parallel_loop (loop, create_loop_fn (loc), arg_struct, new_arg_struct,
3179			n_threads, loc, oacc_kernels_p);
3180  if (!reduction_list->is_empty ())
3181    create_call_for_reduction (loop, reduction_list, &clsn_data);
3182
3183  scev_reset ();
3184
3185  /* Free loop bound estimations that could contain references to
3186     removed statements.  */
3187  free_numbers_of_iterations_estimates (cfun);
3188}
3189
3190/* Returns true when LOOP contains vector phi nodes.  */
3191
3192static bool
3193loop_has_vector_phi_nodes (class loop *loop ATTRIBUTE_UNUSED)
3194{
3195  unsigned i;
3196  basic_block *bbs = get_loop_body_in_dom_order (loop);
3197  gphi_iterator gsi;
3198  bool res = true;
3199
3200  for (i = 0; i < loop->num_nodes; i++)
3201    for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3202      if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
3203	goto end;
3204
3205  res = false;
3206 end:
3207  free (bbs);
3208  return res;
3209}
3210
3211/* Create a reduction_info struct, initialize it with REDUC_STMT
3212   and PHI, insert it to the REDUCTION_LIST.  */
3213
3214static void
3215build_new_reduction (reduction_info_table_type *reduction_list,
3216		     gimple *reduc_stmt, gphi *phi)
3217{
3218  reduction_info **slot;
3219  struct reduction_info *new_reduction;
3220  enum tree_code reduction_code;
3221
3222  gcc_assert (reduc_stmt);
3223
3224  if (gimple_code (reduc_stmt) == GIMPLE_PHI)
3225    {
3226      tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
3227      gimple *def1 = SSA_NAME_DEF_STMT (op1);
3228      reduction_code = gimple_assign_rhs_code (def1);
3229    }
3230  else
3231    reduction_code = gimple_assign_rhs_code (reduc_stmt);
3232  /* Check for OpenMP supported reduction.  */
3233  switch (reduction_code)
3234    {
3235    case PLUS_EXPR:
3236    case MULT_EXPR:
3237    case MAX_EXPR:
3238    case MIN_EXPR:
3239    case BIT_IOR_EXPR:
3240    case BIT_XOR_EXPR:
3241    case BIT_AND_EXPR:
3242    case TRUTH_OR_EXPR:
3243    case TRUTH_XOR_EXPR:
3244    case TRUTH_AND_EXPR:
3245      break;
3246    default:
3247      return;
3248    }
3249
3250  if (dump_file && (dump_flags & TDF_DETAILS))
3251    {
3252      fprintf (dump_file,
3253	       "Detected reduction. reduction stmt is:\n");
3254      print_gimple_stmt (dump_file, reduc_stmt, 0);
3255      fprintf (dump_file, "\n");
3256    }
3257
3258  new_reduction = XCNEW (struct reduction_info);
3259
3260  new_reduction->reduc_stmt = reduc_stmt;
3261  new_reduction->reduc_phi = phi;
3262  new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
3263  new_reduction->reduction_code = reduction_code;
3264  slot = reduction_list->find_slot (new_reduction, INSERT);
3265  *slot = new_reduction;
3266}
3267
3268/* Callback for htab_traverse.  Sets gimple_uid of reduc_phi stmts.  */
3269
3270int
3271set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
3272{
3273  struct reduction_info *const red = *slot;
3274  gimple_set_uid (red->reduc_phi, red->reduc_version);
3275  return 1;
3276}
3277
3278/* Return true if the type of reduction performed by STMT_INFO is suitable
3279   for this pass.  */
3280
3281static bool
3282valid_reduction_p (stmt_vec_info stmt_info)
3283{
3284  /* Parallelization would reassociate the operation, which isn't
3285     allowed for in-order reductions.  */
3286  vect_reduction_type reduc_type = STMT_VINFO_REDUC_TYPE (stmt_info);
3287  return reduc_type != FOLD_LEFT_REDUCTION;
3288}
3289
3290/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST.  */
3291
3292static void
3293gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
3294{
3295  gphi_iterator gsi;
3296  loop_vec_info simple_loop_info;
3297  auto_vec<gphi *, 4> double_reduc_phis;
3298  auto_vec<gimple *, 4> double_reduc_stmts;
3299
3300  vec_info_shared shared;
3301  simple_loop_info = vect_analyze_loop_form (loop, &shared);
3302  if (simple_loop_info == NULL)
3303    goto gather_done;
3304
3305  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3306    {
3307      gphi *phi = gsi.phi ();
3308      affine_iv iv;
3309      tree res = PHI_RESULT (phi);
3310      bool double_reduc;
3311
3312      if (virtual_operand_p (res))
3313	continue;
3314
3315      if (simple_iv (loop, loop, res, &iv, true))
3316	continue;
3317
3318      stmt_vec_info reduc_stmt_info
3319	= parloops_force_simple_reduction (simple_loop_info,
3320					   simple_loop_info->lookup_stmt (phi),
3321					   &double_reduc, true);
3322      if (!reduc_stmt_info || !valid_reduction_p (reduc_stmt_info))
3323	continue;
3324
3325      if (double_reduc)
3326	{
3327	  if (loop->inner->inner != NULL)
3328	    continue;
3329
3330	  double_reduc_phis.safe_push (phi);
3331	  double_reduc_stmts.safe_push (reduc_stmt_info->stmt);
3332	  continue;
3333	}
3334
3335      build_new_reduction (reduction_list, reduc_stmt_info->stmt, phi);
3336    }
3337  delete simple_loop_info;
3338
3339  if (!double_reduc_phis.is_empty ())
3340    {
3341      vec_info_shared shared;
3342      simple_loop_info = vect_analyze_loop_form (loop->inner, &shared);
3343      if (simple_loop_info)
3344	{
3345	  gphi *phi;
3346	  unsigned int i;
3347
3348	  FOR_EACH_VEC_ELT (double_reduc_phis, i, phi)
3349	    {
3350	      affine_iv iv;
3351	      tree res = PHI_RESULT (phi);
3352	      bool double_reduc;
3353
3354	      use_operand_p use_p;
3355	      gimple *inner_stmt;
3356	      bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
3357	      gcc_assert (single_use_p);
3358	      if (gimple_code (inner_stmt) != GIMPLE_PHI)
3359		continue;
3360	      gphi *inner_phi = as_a <gphi *> (inner_stmt);
3361	      if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
3362			     &iv, true))
3363		continue;
3364
3365	      stmt_vec_info inner_phi_info
3366		= simple_loop_info->lookup_stmt (inner_phi);
3367	      stmt_vec_info inner_reduc_stmt_info
3368		= parloops_force_simple_reduction (simple_loop_info,
3369						   inner_phi_info,
3370						   &double_reduc, true);
3371	      gcc_assert (!double_reduc);
3372	      if (!inner_reduc_stmt_info
3373		  || !valid_reduction_p (inner_reduc_stmt_info))
3374		continue;
3375
3376	      build_new_reduction (reduction_list, double_reduc_stmts[i], phi);
3377	    }
3378	  delete simple_loop_info;
3379	}
3380    }
3381
3382 gather_done:
3383  if (reduction_list->is_empty ())
3384    return;
3385
3386  /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
3387     and delete simple_loop_info, we can set gimple_uid of reduc_phi stmts only
3388     now.  */
3389  basic_block bb;
3390  FOR_EACH_BB_FN (bb, cfun)
3391    for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3392      gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
3393  reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
3394}
3395
3396/* Try to initialize NITER for code generation part.  */
3397
3398static bool
3399try_get_loop_niter (loop_p loop, class tree_niter_desc *niter)
3400{
3401  edge exit = single_dom_exit (loop);
3402
3403  gcc_assert (exit);
3404
3405  /* We need to know # of iterations, and there should be no uses of values
3406     defined inside loop outside of it, unless the values are invariants of
3407     the loop.  */
3408  if (!number_of_iterations_exit (loop, exit, niter, false))
3409    {
3410      if (dump_file && (dump_flags & TDF_DETAILS))
3411	fprintf (dump_file, "  FAILED: number of iterations not known\n");
3412      return false;
3413    }
3414
3415  return true;
3416}
3417
3418/* Return the default def of the first function argument.  */
3419
3420static tree
3421get_omp_data_i_param (void)
3422{
3423  tree decl = DECL_ARGUMENTS (cfun->decl);
3424  gcc_assert (DECL_CHAIN (decl) == NULL_TREE);
3425  return ssa_default_def (cfun, decl);
3426}
3427
3428/* For PHI in loop header of LOOP, look for pattern:
3429
3430   <bb preheader>
3431   .omp_data_i = &.omp_data_arr;
3432   addr = .omp_data_i->sum;
3433   sum_a = *addr;
3434
3435   <bb header>:
3436   sum_b = PHI <sum_a (preheader), sum_c (latch)>
3437
3438   and return addr.  Otherwise, return NULL_TREE.  */
3439
3440static tree
3441find_reduc_addr (class loop *loop, gphi *phi)
3442{
3443  edge e = loop_preheader_edge (loop);
3444  tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
3445  gimple *stmt = SSA_NAME_DEF_STMT (arg);
3446  if (!gimple_assign_single_p (stmt))
3447    return NULL_TREE;
3448  tree memref = gimple_assign_rhs1 (stmt);
3449  if (TREE_CODE (memref) != MEM_REF)
3450    return NULL_TREE;
3451  tree addr = TREE_OPERAND (memref, 0);
3452
3453  gimple *stmt2 = SSA_NAME_DEF_STMT (addr);
3454  if (!gimple_assign_single_p (stmt2))
3455    return NULL_TREE;
3456  tree compref = gimple_assign_rhs1 (stmt2);
3457  if (TREE_CODE (compref) != COMPONENT_REF)
3458    return NULL_TREE;
3459  tree addr2 = TREE_OPERAND (compref, 0);
3460  if (TREE_CODE (addr2) != MEM_REF)
3461    return NULL_TREE;
3462  addr2 = TREE_OPERAND (addr2, 0);
3463  if (TREE_CODE (addr2) != SSA_NAME
3464      || addr2 != get_omp_data_i_param ())
3465    return NULL_TREE;
3466
3467  return addr;
3468}
3469
3470/* Try to initialize REDUCTION_LIST for code generation part.
3471   REDUCTION_LIST describes the reductions.  */
3472
3473static bool
3474try_create_reduction_list (loop_p loop,
3475			   reduction_info_table_type *reduction_list,
3476			   bool oacc_kernels_p)
3477{
3478  edge exit = single_dom_exit (loop);
3479  gphi_iterator gsi;
3480
3481  gcc_assert (exit);
3482
3483  /* Try to get rid of exit phis.  */
3484  final_value_replacement_loop (loop);
3485
3486  gather_scalar_reductions (loop, reduction_list);
3487
3488
3489  for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3490    {
3491      gphi *phi = gsi.phi ();
3492      struct reduction_info *red;
3493      imm_use_iterator imm_iter;
3494      use_operand_p use_p;
3495      gimple *reduc_phi;
3496      tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
3497
3498      if (!virtual_operand_p (val))
3499	{
3500	  if (TREE_CODE (val) != SSA_NAME)
3501	    {
3502	      if (dump_file && (dump_flags & TDF_DETAILS))
3503		fprintf (dump_file,
3504			 "  FAILED: exit PHI argument invariant.\n");
3505	      return false;
3506	    }
3507
3508	  if (dump_file && (dump_flags & TDF_DETAILS))
3509	    {
3510	      fprintf (dump_file, "phi is ");
3511	      print_gimple_stmt (dump_file, phi, 0);
3512	      fprintf (dump_file, "arg of phi to exit:   value ");
3513	      print_generic_expr (dump_file, val);
3514	      fprintf (dump_file, " used outside loop\n");
3515	      fprintf (dump_file,
3516		       "  checking if it is part of reduction pattern:\n");
3517	    }
3518	  if (reduction_list->is_empty ())
3519	    {
3520	      if (dump_file && (dump_flags & TDF_DETAILS))
3521		fprintf (dump_file,
3522			 "  FAILED: it is not a part of reduction.\n");
3523	      return false;
3524	    }
3525	  reduc_phi = NULL;
3526	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
3527	    {
3528	      if (!gimple_debug_bind_p (USE_STMT (use_p))
3529		  && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
3530		{
3531		  reduc_phi = USE_STMT (use_p);
3532		  break;
3533		}
3534	    }
3535	  red = reduction_phi (reduction_list, reduc_phi);
3536	  if (red == NULL)
3537	    {
3538	      if (dump_file && (dump_flags & TDF_DETAILS))
3539		fprintf (dump_file,
3540			 "  FAILED: it is not a part of reduction.\n");
3541	      return false;
3542	    }
3543	  if (red->keep_res != NULL)
3544	    {
3545	      if (dump_file && (dump_flags & TDF_DETAILS))
3546		fprintf (dump_file,
3547			 "  FAILED: reduction has multiple exit phis.\n");
3548	      return false;
3549	    }
3550	  red->keep_res = phi;
3551	  if (dump_file && (dump_flags & TDF_DETAILS))
3552	    {
3553	      fprintf (dump_file, "reduction phi is  ");
3554	      print_gimple_stmt (dump_file, red->reduc_phi, 0);
3555	      fprintf (dump_file, "reduction stmt is  ");
3556	      print_gimple_stmt (dump_file, red->reduc_stmt, 0);
3557	    }
3558	}
3559    }
3560
3561  /* The iterations of the loop may communicate only through bivs whose
3562     iteration space can be distributed efficiently.  */
3563  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3564    {
3565      gphi *phi = gsi.phi ();
3566      tree def = PHI_RESULT (phi);
3567      affine_iv iv;
3568
3569      if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
3570	{
3571	  struct reduction_info *red;
3572
3573	  red = reduction_phi (reduction_list, phi);
3574	  if (red == NULL)
3575	    {
3576	      if (dump_file && (dump_flags & TDF_DETAILS))
3577		fprintf (dump_file,
3578			 "  FAILED: scalar dependency between iterations\n");
3579	      return false;
3580	    }
3581	}
3582    }
3583
3584  if (oacc_kernels_p)
3585    {
3586      for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi);
3587	   gsi_next (&gsi))
3588	{
3589	  gphi *phi = gsi.phi ();
3590	  tree def = PHI_RESULT (phi);
3591	  affine_iv iv;
3592
3593	  if (!virtual_operand_p (def)
3594	      && !simple_iv (loop, loop, def, &iv, true))
3595	    {
3596	      tree addr = find_reduc_addr (loop, phi);
3597	      if (addr == NULL_TREE)
3598		return false;
3599	      struct reduction_info *red = reduction_phi (reduction_list, phi);
3600	      red->reduc_addr = addr;
3601	    }
3602	}
3603    }
3604
3605  return true;
3606}
3607
3608/* Return true if LOOP contains phis with ADDR_EXPR in args.  */
3609
3610static bool
3611loop_has_phi_with_address_arg (class loop *loop)
3612{
3613  basic_block *bbs = get_loop_body (loop);
3614  bool res = false;
3615
3616  unsigned i, j;
3617  gphi_iterator gsi;
3618  for (i = 0; i < loop->num_nodes; i++)
3619    for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3620      {
3621	gphi *phi = gsi.phi ();
3622	for (j = 0; j < gimple_phi_num_args (phi); j++)
3623	  {
3624	    tree arg = gimple_phi_arg_def (phi, j);
3625	    if (TREE_CODE (arg) == ADDR_EXPR)
3626	      {
3627		/* This should be handled by eliminate_local_variables, but that
3628		   function currently ignores phis.  */
3629		res = true;
3630		goto end;
3631	      }
3632	  }
3633      }
3634 end:
3635  free (bbs);
3636
3637  return res;
3638}
3639
3640/* Return true if memory ref REF (corresponding to the stmt at GSI in
3641   REGIONS_BB[I]) conflicts with the statements in REGIONS_BB[I] after gsi,
3642   or the statements in REGIONS_BB[I + n].  REF_IS_STORE indicates if REF is a
3643   store.  Ignore conflicts with SKIP_STMT.  */
3644
3645static bool
3646ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref,
3647			   bool ref_is_store, vec<basic_block> region_bbs,
3648			   unsigned int i, gimple *skip_stmt)
3649{
3650  basic_block bb = region_bbs[i];
3651  gsi_next (&gsi);
3652
3653  while (true)
3654    {
3655      for (; !gsi_end_p (gsi);
3656	   gsi_next (&gsi))
3657	{
3658	  gimple *stmt = gsi_stmt (gsi);
3659	  if (stmt == skip_stmt)
3660	    {
3661	      if (dump_file)
3662		{
3663		  fprintf (dump_file, "skipping reduction store: ");
3664		  print_gimple_stmt (dump_file, stmt, 0);
3665		}
3666	      continue;
3667	    }
3668
3669	  if (!gimple_vdef (stmt)
3670	      && !gimple_vuse (stmt))
3671	    continue;
3672
3673	  if (gimple_code (stmt) == GIMPLE_RETURN)
3674	    continue;
3675
3676	  if (ref_is_store)
3677	    {
3678	      if (ref_maybe_used_by_stmt_p (stmt, ref))
3679		{
3680		  if (dump_file)
3681		    {
3682		      fprintf (dump_file, "Stmt ");
3683		      print_gimple_stmt (dump_file, stmt, 0);
3684		    }
3685		  return true;
3686		}
3687	    }
3688	  else
3689	    {
3690	      if (stmt_may_clobber_ref_p_1 (stmt, ref))
3691		{
3692		  if (dump_file)
3693		    {
3694		      fprintf (dump_file, "Stmt ");
3695		      print_gimple_stmt (dump_file, stmt, 0);
3696		    }
3697		  return true;
3698		}
3699	    }
3700	}
3701      i++;
3702      if (i == region_bbs.length ())
3703	break;
3704      bb = region_bbs[i];
3705      gsi = gsi_start_bb (bb);
3706    }
3707
3708  return false;
3709}
3710
3711/* Return true if the bbs in REGION_BBS but not in in_loop_bbs can be executed
3712   in parallel with REGION_BBS containing the loop.  Return the stores of
3713   reduction results in REDUCTION_STORES.  */
3714
3715static bool
3716oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3717		      reduction_info_table_type *reduction_list,
3718		      bitmap reduction_stores)
3719{
3720  tree omp_data_i = get_omp_data_i_param ();
3721
3722  unsigned i;
3723  basic_block bb;
3724  FOR_EACH_VEC_ELT (region_bbs, i, bb)
3725    {
3726      if (bitmap_bit_p (in_loop_bbs, bb->index))
3727	continue;
3728
3729      gimple_stmt_iterator gsi;
3730      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
3731	   gsi_next (&gsi))
3732	{
3733	  gimple *stmt = gsi_stmt (gsi);
3734	  gimple *skip_stmt = NULL;
3735
3736	  if (is_gimple_debug (stmt)
3737	      || gimple_code (stmt) == GIMPLE_COND)
3738	    continue;
3739
3740	  ao_ref ref;
3741	  bool ref_is_store = false;
3742	  if (gimple_assign_load_p (stmt))
3743	    {
3744	      tree rhs = gimple_assign_rhs1 (stmt);
3745	      tree base = get_base_address (rhs);
3746	      if (TREE_CODE (base) == MEM_REF
3747		  && operand_equal_p (TREE_OPERAND (base, 0), omp_data_i, 0))
3748		continue;
3749
3750	      tree lhs = gimple_assign_lhs (stmt);
3751	      if (TREE_CODE (lhs) == SSA_NAME
3752		  && has_single_use (lhs))
3753		{
3754		  use_operand_p use_p;
3755		  gimple *use_stmt;
3756		  struct reduction_info *red;
3757		  single_imm_use (lhs, &use_p, &use_stmt);
3758		  if (gimple_code (use_stmt) == GIMPLE_PHI
3759		      && (red = reduction_phi (reduction_list, use_stmt)))
3760		    {
3761		      tree val = PHI_RESULT (red->keep_res);
3762		      if (has_single_use (val))
3763			{
3764			  single_imm_use (val, &use_p, &use_stmt);
3765			  if (gimple_store_p (use_stmt))
3766			    {
3767			      unsigned int id
3768				= SSA_NAME_VERSION (gimple_vdef (use_stmt));
3769			      bitmap_set_bit (reduction_stores, id);
3770			      skip_stmt = use_stmt;
3771			      if (dump_file)
3772				{
3773				  fprintf (dump_file, "found reduction load: ");
3774				  print_gimple_stmt (dump_file, stmt, 0);
3775				}
3776			    }
3777			}
3778		    }
3779		}
3780
3781	      ao_ref_init (&ref, rhs);
3782	    }
3783	  else if (gimple_store_p (stmt))
3784	    {
3785	      ao_ref_init (&ref, gimple_assign_lhs (stmt));
3786	      ref_is_store = true;
3787	    }
3788	  else if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
3789	    continue;
3790	  else if (!gimple_has_side_effects (stmt)
3791		   && !gimple_could_trap_p (stmt)
3792		   && !stmt_could_throw_p (cfun, stmt)
3793		   && !gimple_vdef (stmt)
3794		   && !gimple_vuse (stmt))
3795	    continue;
3796	  else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS))
3797	    continue;
3798	  else if (gimple_code (stmt) == GIMPLE_RETURN)
3799	    continue;
3800	  else
3801	    {
3802	      if (dump_file)
3803		{
3804		  fprintf (dump_file, "Unhandled stmt in entry/exit: ");
3805		  print_gimple_stmt (dump_file, stmt, 0);
3806		}
3807	      return false;
3808	    }
3809
3810	  if (ref_conflicts_with_region (gsi, &ref, ref_is_store, region_bbs,
3811					 i, skip_stmt))
3812	    {
3813	      if (dump_file)
3814		{
3815		  fprintf (dump_file, "conflicts with entry/exit stmt: ");
3816		  print_gimple_stmt (dump_file, stmt, 0);
3817		}
3818	      return false;
3819	    }
3820	}
3821    }
3822
3823  return true;
3824}
3825
3826/* Find stores inside REGION_BBS and outside IN_LOOP_BBS, and guard them with
3827   gang_pos == 0, except when the stores are REDUCTION_STORES.  Return true
3828   if any changes were made.  */
3829
3830static bool
3831oacc_entry_exit_single_gang (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3832			     bitmap reduction_stores)
3833{
3834  tree gang_pos = NULL_TREE;
3835  bool changed = false;
3836
3837  unsigned i;
3838  basic_block bb;
3839  FOR_EACH_VEC_ELT (region_bbs, i, bb)
3840    {
3841      if (bitmap_bit_p (in_loop_bbs, bb->index))
3842	continue;
3843
3844      gimple_stmt_iterator gsi;
3845      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3846	{
3847	  gimple *stmt = gsi_stmt (gsi);
3848
3849	  if (!gimple_store_p (stmt))
3850	    {
3851	      /* Update gsi to point to next stmt.  */
3852	      gsi_next (&gsi);
3853	      continue;
3854	    }
3855
3856	  if (bitmap_bit_p (reduction_stores,
3857			    SSA_NAME_VERSION (gimple_vdef (stmt))))
3858	    {
3859	      if (dump_file)
3860		{
3861		  fprintf (dump_file,
3862			   "skipped reduction store for single-gang"
3863			   " neutering: ");
3864		  print_gimple_stmt (dump_file, stmt, 0);
3865		}
3866
3867	      /* Update gsi to point to next stmt.  */
3868	      gsi_next (&gsi);
3869	      continue;
3870	    }
3871
3872	  changed = true;
3873
3874	  if (gang_pos == NULL_TREE)
3875	    {
3876	      tree arg = build_int_cst (integer_type_node, GOMP_DIM_GANG);
3877	      gcall *gang_single
3878		= gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
3879	      gang_pos = make_ssa_name (integer_type_node);
3880	      gimple_call_set_lhs (gang_single, gang_pos);
3881	      gimple_stmt_iterator start
3882		= gsi_start_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
3883	      tree vuse = ssa_default_def (cfun, gimple_vop (cfun));
3884	      gimple_set_vuse (gang_single, vuse);
3885	      gsi_insert_before (&start, gang_single, GSI_SAME_STMT);
3886	    }
3887
3888	  if (dump_file)
3889	    {
3890	      fprintf (dump_file,
3891		       "found store that needs single-gang neutering: ");
3892	      print_gimple_stmt (dump_file, stmt, 0);
3893	    }
3894
3895	  {
3896	    /* Split block before store.  */
3897	    gimple_stmt_iterator gsi2 = gsi;
3898	    gsi_prev (&gsi2);
3899	    edge e;
3900	    if (gsi_end_p (gsi2))
3901	      {
3902		e = split_block_after_labels (bb);
3903		gsi2 = gsi_last_bb (bb);
3904	      }
3905	    else
3906	      e = split_block (bb, gsi_stmt (gsi2));
3907	    basic_block bb2 = e->dest;
3908
3909	    /* Split block after store.  */
3910	    gimple_stmt_iterator gsi3 = gsi_start_bb (bb2);
3911	    edge e2 = split_block (bb2, gsi_stmt (gsi3));
3912	    basic_block bb3 = e2->dest;
3913
3914	    gimple *cond
3915	      = gimple_build_cond (EQ_EXPR, gang_pos, integer_zero_node,
3916				   NULL_TREE, NULL_TREE);
3917	    gsi_insert_after (&gsi2, cond, GSI_NEW_STMT);
3918
3919	    edge e3 = make_edge (bb, bb3, EDGE_FALSE_VALUE);
3920	    /* FIXME: What is the probability?  */
3921	    e3->probability = profile_probability::guessed_never ();
3922	    e->flags = EDGE_TRUE_VALUE;
3923
3924	    tree vdef = gimple_vdef (stmt);
3925	    tree vuse = gimple_vuse (stmt);
3926
3927	    tree phi_res = copy_ssa_name (vdef);
3928	    gphi *new_phi = create_phi_node (phi_res, bb3);
3929	    replace_uses_by (vdef, phi_res);
3930	    add_phi_arg (new_phi, vuse, e3, UNKNOWN_LOCATION);
3931	    add_phi_arg (new_phi, vdef, e2, UNKNOWN_LOCATION);
3932
3933	    /* Update gsi to point to next stmt.  */
3934	    bb = bb3;
3935	    gsi = gsi_start_bb (bb);
3936	  }
3937	}
3938    }
3939
3940  return changed;
3941}
3942
3943/* Return true if the statements before and after the LOOP can be executed in
3944   parallel with the function containing the loop.  Resolve conflicting stores
3945   outside LOOP by guarding them such that only a single gang executes them.  */
3946
3947static bool
3948oacc_entry_exit_ok (class loop *loop,
3949		    reduction_info_table_type *reduction_list)
3950{
3951  basic_block *loop_bbs = get_loop_body_in_dom_order (loop);
3952  vec<basic_block> region_bbs
3953    = get_all_dominated_blocks (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
3954
3955  bitmap in_loop_bbs = BITMAP_ALLOC (NULL);
3956  bitmap_clear (in_loop_bbs);
3957  for (unsigned int i = 0; i < loop->num_nodes; i++)
3958    bitmap_set_bit (in_loop_bbs, loop_bbs[i]->index);
3959
3960  bitmap reduction_stores = BITMAP_ALLOC (NULL);
3961  bool res = oacc_entry_exit_ok_1 (in_loop_bbs, region_bbs, reduction_list,
3962				   reduction_stores);
3963
3964  if (res)
3965    {
3966      bool changed = oacc_entry_exit_single_gang (in_loop_bbs, region_bbs,
3967						  reduction_stores);
3968      if (changed)
3969	{
3970	  free_dominance_info (CDI_DOMINATORS);
3971	  calculate_dominance_info (CDI_DOMINATORS);
3972	}
3973    }
3974
3975  region_bbs.release ();
3976  free (loop_bbs);
3977
3978  BITMAP_FREE (in_loop_bbs);
3979  BITMAP_FREE (reduction_stores);
3980
3981  return res;
3982}
3983
3984/* Detect parallel loops and generate parallel code using libgomp
3985   primitives.  Returns true if some loop was parallelized, false
3986   otherwise.  */
3987
3988static bool
3989parallelize_loops (bool oacc_kernels_p)
3990{
3991  unsigned n_threads;
3992  bool changed = false;
3993  class loop *loop;
3994  class loop *skip_loop = NULL;
3995  class tree_niter_desc niter_desc;
3996  struct obstack parloop_obstack;
3997  HOST_WIDE_INT estimated;
3998
3999  /* Do not parallelize loops in the functions created by parallelization.  */
4000  if (!oacc_kernels_p
4001      && parallelized_function_p (cfun->decl))
4002    return false;
4003
4004  /* Do not parallelize loops in offloaded functions.  */
4005  if (!oacc_kernels_p
4006      && oacc_get_fn_attrib (cfun->decl) != NULL)
4007     return false;
4008
4009  if (cfun->has_nonlocal_label)
4010    return false;
4011
4012  /* For OpenACC kernels, n_threads will be determined later; otherwise, it's
4013     the argument to -ftree-parallelize-loops.  */
4014  if (oacc_kernels_p)
4015    n_threads = 0;
4016  else
4017    n_threads = flag_tree_parallelize_loops;
4018
4019  gcc_obstack_init (&parloop_obstack);
4020  reduction_info_table_type reduction_list (10);
4021
4022  calculate_dominance_info (CDI_DOMINATORS);
4023
4024  FOR_EACH_LOOP (loop, 0)
4025    {
4026      if (loop == skip_loop)
4027	{
4028	  if (!loop->in_oacc_kernels_region
4029	      && dump_file && (dump_flags & TDF_DETAILS))
4030	    fprintf (dump_file,
4031		     "Skipping loop %d as inner loop of parallelized loop\n",
4032		     loop->num);
4033
4034	  skip_loop = loop->inner;
4035	  continue;
4036	}
4037      else
4038	skip_loop = NULL;
4039
4040      reduction_list.empty ();
4041
4042      if (oacc_kernels_p)
4043	{
4044	  if (!loop->in_oacc_kernels_region)
4045	    continue;
4046
4047	  /* Don't try to parallelize inner loops in an oacc kernels region.  */
4048	  if (loop->inner)
4049	    skip_loop = loop->inner;
4050
4051	  if (dump_file && (dump_flags & TDF_DETAILS))
4052	    fprintf (dump_file,
4053		     "Trying loop %d with header bb %d in oacc kernels"
4054		     " region\n", loop->num, loop->header->index);
4055	}
4056
4057      if (dump_file && (dump_flags & TDF_DETAILS))
4058      {
4059        fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
4060	if (loop->inner)
4061	  fprintf (dump_file, "loop %d is not innermost\n",loop->num);
4062	else
4063	  fprintf (dump_file, "loop %d is innermost\n",loop->num);
4064      }
4065
4066      if (!single_dom_exit (loop))
4067      {
4068
4069        if (dump_file && (dump_flags & TDF_DETAILS))
4070	  fprintf (dump_file, "loop is !single_dom_exit\n");
4071
4072	continue;
4073      }
4074
4075      if (/* And of course, the loop must be parallelizable.  */
4076	  !can_duplicate_loop_p (loop)
4077	  || loop_has_blocks_with_irreducible_flag (loop)
4078	  || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
4079	  /* FIXME: the check for vector phi nodes could be removed.  */
4080	  || loop_has_vector_phi_nodes (loop))
4081	continue;
4082
4083      estimated = estimated_loop_iterations_int (loop);
4084      if (estimated == -1)
4085	estimated = get_likely_max_loop_iterations_int (loop);
4086      /* FIXME: Bypass this check as graphite doesn't update the
4087	 count and frequency correctly now.  */
4088      if (!flag_loop_parallelize_all
4089	  && !oacc_kernels_p
4090	  && ((estimated != -1
4091	       && (estimated
4092		   < ((HOST_WIDE_INT) n_threads
4093		      * (loop->inner ? 2 : MIN_PER_THREAD) - 1)))
4094	      /* Do not bother with loops in cold areas.  */
4095	      || optimize_loop_nest_for_size_p (loop)))
4096	continue;
4097
4098      if (!try_get_loop_niter (loop, &niter_desc))
4099	continue;
4100
4101      if (!try_create_reduction_list (loop, &reduction_list, oacc_kernels_p))
4102	continue;
4103
4104      if (loop_has_phi_with_address_arg (loop))
4105	continue;
4106
4107      if (!loop->can_be_parallel
4108	  && !loop_parallel_p (loop, &parloop_obstack))
4109	continue;
4110
4111      if (oacc_kernels_p
4112	&& !oacc_entry_exit_ok (loop, &reduction_list))
4113	{
4114	  if (dump_file)
4115	    fprintf (dump_file, "entry/exit not ok: FAILED\n");
4116	  continue;
4117	}
4118
4119      changed = true;
4120      skip_loop = loop->inner;
4121
4122      if (dump_enabled_p ())
4123	{
4124	  dump_user_location_t loop_loc = find_loop_location (loop);
4125	  if (loop->inner)
4126	    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4127			     "parallelizing outer loop %d\n", loop->num);
4128	  else
4129	    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4130			     "parallelizing inner loop %d\n", loop->num);
4131	}
4132
4133      gen_parallel_loop (loop, &reduction_list,
4134			 n_threads, &niter_desc, oacc_kernels_p);
4135    }
4136
4137  obstack_free (&parloop_obstack, NULL);
4138
4139  /* Parallelization will cause new function calls to be inserted through
4140     which local variables will escape.  Reset the points-to solution
4141     for ESCAPED.  */
4142  if (changed)
4143    pt_solution_reset (&cfun->gimple_df->escaped);
4144
4145  return changed;
4146}
4147
4148/* Parallelization.  */
4149
4150namespace {
4151
4152const pass_data pass_data_parallelize_loops =
4153{
4154  GIMPLE_PASS, /* type */
4155  "parloops", /* name */
4156  OPTGROUP_LOOP, /* optinfo_flags */
4157  TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
4158  ( PROP_cfg | PROP_ssa ), /* properties_required */
4159  0, /* properties_provided */
4160  0, /* properties_destroyed */
4161  0, /* todo_flags_start */
4162  0, /* todo_flags_finish */
4163};
4164
4165class pass_parallelize_loops : public gimple_opt_pass
4166{
4167public:
4168  pass_parallelize_loops (gcc::context *ctxt)
4169    : gimple_opt_pass (pass_data_parallelize_loops, ctxt),
4170      oacc_kernels_p (false)
4171  {}
4172
4173  /* opt_pass methods: */
4174  virtual bool gate (function *)
4175  {
4176    if (oacc_kernels_p)
4177      return flag_openacc;
4178    else
4179      return flag_tree_parallelize_loops > 1;
4180  }
4181  virtual unsigned int execute (function *);
4182  opt_pass * clone () { return new pass_parallelize_loops (m_ctxt); }
4183  void set_pass_param (unsigned int n, bool param)
4184    {
4185      gcc_assert (n == 0);
4186      oacc_kernels_p = param;
4187    }
4188
4189 private:
4190  bool oacc_kernels_p;
4191}; // class pass_parallelize_loops
4192
4193unsigned
4194pass_parallelize_loops::execute (function *fun)
4195{
4196  tree nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4197  if (nthreads == NULL_TREE)
4198    return 0;
4199
4200  bool in_loop_pipeline = scev_initialized_p ();
4201  if (!in_loop_pipeline)
4202    loop_optimizer_init (LOOPS_NORMAL
4203			 | LOOPS_HAVE_RECORDED_EXITS);
4204
4205  if (number_of_loops (fun) <= 1)
4206    return 0;
4207
4208  if (!in_loop_pipeline)
4209    {
4210      rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
4211      scev_initialize ();
4212    }
4213
4214  unsigned int todo = 0;
4215  if (parallelize_loops (oacc_kernels_p))
4216    {
4217      fun->curr_properties &= ~(PROP_gimple_eomp);
4218
4219      checking_verify_loop_structure ();
4220
4221      todo |= TODO_update_ssa;
4222    }
4223
4224  if (!in_loop_pipeline)
4225    {
4226      scev_finalize ();
4227      loop_optimizer_finalize ();
4228    }
4229
4230  return todo;
4231}
4232
4233} // anon namespace
4234
4235gimple_opt_pass *
4236make_pass_parallelize_loops (gcc::context *ctxt)
4237{
4238  return new pass_parallelize_loops (ctxt);
4239}
4240