1/* Loop autoparallelization.
2   Copyright (C) 2006, 2007, 2008, 2009, 2010
3   Free Software Foundation, Inc.
4   Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
5   Zdenek Dvorak <dvorakz@suse.cz>.
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3.  If not see
21<http://www.gnu.org/licenses/>.  */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "tree.h"
28#include "rtl.h"
29#include "tree-flow.h"
30#include "cfgloop.h"
31#include "ggc.h"
32#include "tree-data-ref.h"
33#include "diagnostic.h"
34#include "tree-pass.h"
35#include "tree-scalar-evolution.h"
36#include "hashtab.h"
37#include "langhooks.h"
38#include "tree-vectorizer.h"
39
40/* This pass tries to distribute iterations of loops into several threads.
41   The implementation is straightforward -- for each loop we test whether its
42   iterations are independent, and if it is the case (and some additional
43   conditions regarding profitability and correctness are satisfied), we
44   add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
45   machinery do its job.
46
47   The most of the complexity is in bringing the code into shape expected
48   by the omp expanders:
49   -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
50      variable and that the exit test is at the start of the loop body
51   -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
52      variables by accesses through pointers, and breaking up ssa chains
53      by storing the values incoming to the parallelized loop to a structure
54      passed to the new function as an argument (something similar is done
55      in omp gimplification, unfortunately only a small part of the code
56      can be shared).
57
58   TODO:
59   -- if there are several parallelizable loops in a function, it may be
60      possible to generate the threads just once (using synchronization to
61      ensure that cross-loop dependences are obeyed).
62   -- handling of common scalar dependence patterns (accumulation, ...)
63   -- handling of non-innermost loops  */
64
65/*
66  Reduction handling:
67  currently we use vect_is_simple_reduction() to detect reduction patterns.
68  The code transformation will be introduced by an example.
69
70
71parloop
72{
73  int sum=1;
74
75  for (i = 0; i < N; i++)
76   {
77    x[i] = i + 3;
78    sum+=x[i];
79   }
80}
81
82gimple-like code:
83header_bb:
84
85  # sum_29 = PHI <sum_11(5), 1(3)>
86  # i_28 = PHI <i_12(5), 0(3)>
87  D.1795_8 = i_28 + 3;
88  x[i_28] = D.1795_8;
89  sum_11 = D.1795_8 + sum_29;
90  i_12 = i_28 + 1;
91  if (N_6(D) > i_12)
92    goto header_bb;
93
94
95exit_bb:
96
97  # sum_21 = PHI <sum_11(4)>
98  printf (&"%d"[0], sum_21);
99
100
101after reduction transformation (only relevant parts):
102
103parloop
104{
105
106....
107
108
109  # Storing the initial value given by the user.  #
110
111  .paral_data_store.32.sum.27 = 1;
112
113  #pragma omp parallel num_threads(4)
114
115  #pragma omp for schedule(static)
116
117  # The neutral element corresponding to the particular
118  reduction's operation, e.g. 0 for PLUS_EXPR,
119  1 for MULT_EXPR, etc. replaces the user's initial value.  #
120
121  # sum.27_29 = PHI <sum.27_11, 0>
122
123  sum.27_11 = D.1827_8 + sum.27_29;
124
125  GIMPLE_OMP_CONTINUE
126
127  # Adding this reduction phi is done at create_phi_for_local_result() #
128  # sum.27_56 = PHI <sum.27_11, 0>
129  GIMPLE_OMP_RETURN
130
131  # Creating the atomic operation is done at
132  create_call_for_reduction_1()  #
133
134  #pragma omp atomic_load
135  D.1839_59 = *&.paral_data_load.33_51->reduction.23;
136  D.1840_60 = sum.27_56 + D.1839_59;
137  #pragma omp atomic_store (D.1840_60);
138
139  GIMPLE_OMP_RETURN
140
141 # collecting the result after the join of the threads is done at
142  create_loads_for_reductions().
143  The value computed by the threads is loaded from the
144  shared struct.  #
145
146
147  .paral_data_load.33_52 = &.paral_data_store.32;
148  sum_37 =  .paral_data_load.33_52->sum.27;
149  sum_43 = D.1795_41 + sum_37;
150
151  exit bb:
152  # sum_21 = PHI <sum_43, sum_26>
153  printf (&"%d"[0], sum_21);
154
155...
156
157}
158
159*/
160
161/* Minimal number of iterations of a loop that should be executed in each
162   thread.  */
163#define MIN_PER_THREAD 100
164
165/* Element of the hashtable, representing a
166   reduction in the current loop.  */
167struct reduction_info
168{
169  gimple reduc_stmt;		/* reduction statement.  */
170  gimple reduc_phi;		/* The phi node defining the reduction.  */
171  enum tree_code reduction_code;/* code for the reduction operation.  */
172  gimple keep_res;		/* The PHI_RESULT of this phi is the resulting value
173				   of the reduction variable when existing the loop. */
174  tree initial_value;		/* The initial value of the reduction var before entering the loop.  */
175  tree field;			/*  the name of the field in the parloop data structure intended for reduction.  */
176  tree init;			/* reduction initialization value.  */
177  gimple new_phi;		/* (helper field) Newly created phi node whose result
178				   will be passed to the atomic operation.  Represents
179				   the local result each thread computed for the reduction
180				   operation.  */
181};
182
183/* Equality and hash functions for hashtab code.  */
184
185static int
186reduction_info_eq (const void *aa, const void *bb)
187{
188  const struct reduction_info *a = (const struct reduction_info *) aa;
189  const struct reduction_info *b = (const struct reduction_info *) bb;
190
191  return (a->reduc_phi == b->reduc_phi);
192}
193
194static hashval_t
195reduction_info_hash (const void *aa)
196{
197  const struct reduction_info *a = (const struct reduction_info *) aa;
198
199  return htab_hash_pointer (a->reduc_phi);
200}
201
202static struct reduction_info *
203reduction_phi (htab_t reduction_list, gimple phi)
204{
205  struct reduction_info tmpred, *red;
206
207  if (htab_elements (reduction_list) == 0)
208    return NULL;
209
210  tmpred.reduc_phi = phi;
211  red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
212
213  return red;
214}
215
216/* Element of hashtable of names to copy.  */
217
218struct name_to_copy_elt
219{
220  unsigned version;	/* The version of the name to copy.  */
221  tree new_name;	/* The new name used in the copy.  */
222  tree field;		/* The field of the structure used to pass the
223			   value.  */
224};
225
226/* Equality and hash functions for hashtab code.  */
227
228static int
229name_to_copy_elt_eq (const void *aa, const void *bb)
230{
231  const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
232  const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
233
234  return a->version == b->version;
235}
236
237static hashval_t
238name_to_copy_elt_hash (const void *aa)
239{
240  const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
241
242  return (hashval_t) a->version;
243}
244
245
246/* Data dependency analysis. Returns true if the iterations of LOOP
247   are independent on each other (that is, if we can execute them
248   in parallel).  */
249
250static bool
251loop_parallel_p (struct loop *loop)
252{
253  VEC (ddr_p, heap) * dependence_relations;
254  VEC (data_reference_p, heap) *datarefs;
255  lambda_trans_matrix trans;
256  bool ret = false;
257
258  if (dump_file && (dump_flags & TDF_DETAILS))
259  {
260    fprintf (dump_file, "Considering loop %d\n", loop->num);
261    if (!loop->inner)
262      fprintf (dump_file, "loop is innermost\n");
263    else
264      fprintf (dump_file, "loop NOT innermost\n");
265   }
266
267  /* Check for problems with dependences.  If the loop can be reversed,
268     the iterations are independent.  */
269  datarefs = VEC_alloc (data_reference_p, heap, 10);
270  dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
271  compute_data_dependences_for_loop (loop, true, &datarefs,
272				     &dependence_relations);
273  if (dump_file && (dump_flags & TDF_DETAILS))
274    dump_data_dependence_relations (dump_file, dependence_relations);
275
276  trans = lambda_trans_matrix_new (1, 1);
277  LTM_MATRIX (trans)[0][0] = -1;
278
279  if (lambda_transform_legal_p (trans, 1, dependence_relations))
280    {
281      ret = true;
282      if (dump_file && (dump_flags & TDF_DETAILS))
283	fprintf (dump_file, "  SUCCESS: may be parallelized\n");
284    }
285  else if (dump_file && (dump_flags & TDF_DETAILS))
286    fprintf (dump_file,
287	     "  FAILED: data dependencies exist across iterations\n");
288
289  free_dependence_relations (dependence_relations);
290  free_data_refs (datarefs);
291
292  return ret;
293}
294
295/* Return true when LOOP contains basic blocks marked with the
296   BB_IRREDUCIBLE_LOOP flag.  */
297
298static inline bool
299loop_has_blocks_with_irreducible_flag (struct loop *loop)
300{
301  unsigned i;
302  basic_block *bbs = get_loop_body_in_dom_order (loop);
303  bool res = true;
304
305  for (i = 0; i < loop->num_nodes; i++)
306    if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
307      goto end;
308
309  res = false;
310 end:
311  free (bbs);
312  return res;
313}
314
315/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
316   The assignment statement is placed on edge ENTRY.  DECL_ADDRESS maps decls
317   to their addresses that can be reused.  The address of OBJ is known to
318   be invariant in the whole function.  Other needed statements are placed
319   right before GSI.  */
320
321static tree
322take_address_of (tree obj, tree type, edge entry, htab_t decl_address,
323		 gimple_stmt_iterator *gsi)
324{
325  int uid;
326  void **dslot;
327  struct int_tree_map ielt, *nielt;
328  tree *var_p, name, bvar, addr;
329  gimple stmt;
330  gimple_seq stmts;
331
332  /* Since the address of OBJ is invariant, the trees may be shared.
333     Avoid rewriting unrelated parts of the code.  */
334  obj = unshare_expr (obj);
335  for (var_p = &obj;
336       handled_component_p (*var_p);
337       var_p = &TREE_OPERAND (*var_p, 0))
338    continue;
339  uid = DECL_UID (*var_p);
340
341  ielt.uid = uid;
342  dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
343  if (!*dslot)
344    {
345      if (gsi == NULL)
346	return NULL;
347      addr = build_addr (*var_p, current_function_decl);
348      bvar = create_tmp_var (TREE_TYPE (addr), get_name (*var_p));
349      add_referenced_var (bvar);
350      stmt = gimple_build_assign (bvar, addr);
351      name = make_ssa_name (bvar, stmt);
352      gimple_assign_set_lhs (stmt, name);
353      gsi_insert_on_edge_immediate (entry, stmt);
354
355      nielt = XNEW (struct int_tree_map);
356      nielt->uid = uid;
357      nielt->to = name;
358      *dslot = nielt;
359    }
360  else
361    name = ((struct int_tree_map *) *dslot)->to;
362
363  if (gsi == NULL)
364    {
365      if (var_p != &obj)
366	{
367	  *var_p = build1 (INDIRECT_REF, TREE_TYPE (*var_p), name);
368	  name = build_fold_addr_expr_with_type (obj, type);
369	}
370      return fold_convert (type, name);
371    }
372  if (var_p != &obj)
373    {
374      *var_p = build1 (INDIRECT_REF, TREE_TYPE (*var_p), name);
375      name = force_gimple_operand (build_addr (obj, current_function_decl),
376				   &stmts, true, NULL_TREE);
377      if (!gimple_seq_empty_p (stmts))
378	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
379    }
380
381  if (TREE_TYPE (name) != type)
382    {
383      name = force_gimple_operand (fold_convert (type, name), &stmts, true,
384				   NULL_TREE);
385      if (!gimple_seq_empty_p (stmts))
386	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
387    }
388
389  return name;
390}
391
392/* Callback for htab_traverse.  Create the initialization statement
393   for reduction described in SLOT, and place it at the preheader of
394   the loop described in DATA.  */
395
396static int
397initialize_reductions (void **slot, void *data)
398{
399  tree init, c;
400  tree bvar, type, arg;
401  edge e;
402
403  struct reduction_info *const reduc = (struct reduction_info *) *slot;
404  struct loop *loop = (struct loop *) data;
405
406  /* Create initialization in preheader:
407     reduction_variable = initialization value of reduction.  */
408
409  /* In the phi node at the header, replace the argument coming
410     from the preheader with the reduction initialization value.  */
411
412  /* Create a new variable to initialize the reduction.  */
413  type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
414  bvar = create_tmp_var (type, "reduction");
415  add_referenced_var (bvar);
416
417  c = build_omp_clause (gimple_location (reduc->reduc_stmt),
418			OMP_CLAUSE_REDUCTION);
419  OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
420  OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
421
422  init = omp_reduction_init (c, TREE_TYPE (bvar));
423  reduc->init = init;
424
425  /* Replace the argument representing the initialization value
426     with the initialization value for the reduction (neutral
427     element for the particular operation, e.g. 0 for PLUS_EXPR,
428     1 for MULT_EXPR, etc).
429     Keep the old value in a new variable "reduction_initial",
430     that will be taken in consideration after the parallel
431     computing is done.  */
432
433  e = loop_preheader_edge (loop);
434  arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
435  /* Create new variable to hold the initial value.  */
436
437  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
438	   (reduc->reduc_phi, loop_preheader_edge (loop)), init);
439  reduc->initial_value = arg;
440  return 1;
441}
442
443struct elv_data
444{
445  struct walk_stmt_info info;
446  edge entry;
447  htab_t decl_address;
448  gimple_stmt_iterator *gsi;
449  bool changed;
450  bool reset;
451};
452
453/* Eliminates references to local variables in *TP out of the single
454   entry single exit region starting at DTA->ENTRY.
455   DECL_ADDRESS contains addresses of the references that had their
456   address taken already.  If the expression is changed, CHANGED is
457   set to true.  Callback for walk_tree.  */
458
459static tree
460eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
461{
462  struct elv_data *const dta = (struct elv_data *) data;
463  tree t = *tp, var, addr, addr_type, type, obj;
464
465  if (DECL_P (t))
466    {
467      *walk_subtrees = 0;
468
469      if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
470	return NULL_TREE;
471
472      type = TREE_TYPE (t);
473      addr_type = build_pointer_type (type);
474      addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
475			      dta->gsi);
476      if (dta->gsi == NULL && addr == NULL_TREE)
477	{
478	  dta->reset = true;
479	  return NULL_TREE;
480	}
481
482      *tp = build1 (INDIRECT_REF, TREE_TYPE (*tp), addr);
483
484      dta->changed = true;
485      return NULL_TREE;
486    }
487
488  if (TREE_CODE (t) == ADDR_EXPR)
489    {
490      /* ADDR_EXPR may appear in two contexts:
491	 -- as a gimple operand, when the address taken is a function invariant
492	 -- as gimple rhs, when the resulting address in not a function
493	    invariant
494	 We do not need to do anything special in the latter case (the base of
495	 the memory reference whose address is taken may be replaced in the
496	 DECL_P case).  The former case is more complicated, as we need to
497	 ensure that the new address is still a gimple operand.  Thus, it
498	 is not sufficient to replace just the base of the memory reference --
499	 we need to move the whole computation of the address out of the
500	 loop.  */
501      if (!is_gimple_val (t))
502	return NULL_TREE;
503
504      *walk_subtrees = 0;
505      obj = TREE_OPERAND (t, 0);
506      var = get_base_address (obj);
507      if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
508	return NULL_TREE;
509
510      addr_type = TREE_TYPE (t);
511      addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
512			      dta->gsi);
513      if (dta->gsi == NULL && addr == NULL_TREE)
514	{
515	  dta->reset = true;
516	  return NULL_TREE;
517	}
518      *tp = addr;
519
520      dta->changed = true;
521      return NULL_TREE;
522    }
523
524  if (!EXPR_P (t))
525    *walk_subtrees = 0;
526
527  return NULL_TREE;
528}
529
530/* Moves the references to local variables in STMT at *GSI out of the single
531   entry single exit region starting at ENTRY.  DECL_ADDRESS contains
532   addresses of the references that had their address taken
533   already.  */
534
535static void
536eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
537				htab_t decl_address)
538{
539  struct elv_data dta;
540  gimple stmt = gsi_stmt (*gsi);
541
542  memset (&dta.info, '\0', sizeof (dta.info));
543  dta.entry = entry;
544  dta.decl_address = decl_address;
545  dta.changed = false;
546  dta.reset = false;
547
548  if (gimple_debug_bind_p (stmt))
549    {
550      dta.gsi = NULL;
551      walk_tree (gimple_debug_bind_get_value_ptr (stmt),
552		 eliminate_local_variables_1, &dta.info, NULL);
553      if (dta.reset)
554	{
555	  gimple_debug_bind_reset_value (stmt);
556	  dta.changed = true;
557	}
558    }
559  else
560    {
561      dta.gsi = gsi;
562      walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
563    }
564
565  if (dta.changed)
566    update_stmt (stmt);
567}
568
569/* Eliminates the references to local variables from the single entry
570   single exit region between the ENTRY and EXIT edges.
571
572   This includes:
573   1) Taking address of a local variable -- these are moved out of the
574   region (and temporary variable is created to hold the address if
575   necessary).
576
577   2) Dereferencing a local variable -- these are replaced with indirect
578   references.  */
579
580static void
581eliminate_local_variables (edge entry, edge exit)
582{
583  basic_block bb;
584  VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
585  unsigned i;
586  gimple_stmt_iterator gsi;
587  bool has_debug_stmt = false;
588  htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
589				     free);
590  basic_block entry_bb = entry->src;
591  basic_block exit_bb = exit->dest;
592
593  gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
594
595  for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
596    if (bb != entry_bb && bb != exit_bb)
597      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
598	if (gimple_debug_bind_p (gsi_stmt (gsi)))
599	  has_debug_stmt = true;
600	else
601	  eliminate_local_variables_stmt (entry, &gsi, decl_address);
602
603  if (has_debug_stmt)
604    for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
605      if (bb != entry_bb && bb != exit_bb)
606	for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
607	  if (gimple_debug_bind_p (gsi_stmt (gsi)))
608	    eliminate_local_variables_stmt (entry, &gsi, decl_address);
609
610  htab_delete (decl_address);
611  VEC_free (basic_block, heap, body);
612}
613
614/* Returns true if expression EXPR is not defined between ENTRY and
615   EXIT, i.e. if all its operands are defined outside of the region.  */
616
617static bool
618expr_invariant_in_region_p (edge entry, edge exit, tree expr)
619{
620  basic_block entry_bb = entry->src;
621  basic_block exit_bb = exit->dest;
622  basic_block def_bb;
623
624  if (is_gimple_min_invariant (expr))
625    return true;
626
627  if (TREE_CODE (expr) == SSA_NAME)
628    {
629      def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
630      if (def_bb
631	  && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
632	  && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
633	return false;
634
635      return true;
636    }
637
638  return false;
639}
640
641/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
642   The copies are stored to NAME_COPIES, if NAME was already duplicated,
643   its duplicate stored in NAME_COPIES is returned.
644
645   Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
646   duplicated, storing the copies in DECL_COPIES.  */
647
648static tree
649separate_decls_in_region_name (tree name,
650			       htab_t name_copies, htab_t decl_copies,
651			       bool copy_name_p)
652{
653  tree copy, var, var_copy;
654  unsigned idx, uid, nuid;
655  struct int_tree_map ielt, *nielt;
656  struct name_to_copy_elt elt, *nelt;
657  void **slot, **dslot;
658
659  if (TREE_CODE (name) != SSA_NAME)
660    return name;
661
662  idx = SSA_NAME_VERSION (name);
663  elt.version = idx;
664  slot = htab_find_slot_with_hash (name_copies, &elt, idx,
665				   copy_name_p ? INSERT : NO_INSERT);
666  if (slot && *slot)
667    return ((struct name_to_copy_elt *) *slot)->new_name;
668
669  var = SSA_NAME_VAR (name);
670  uid = DECL_UID (var);
671  ielt.uid = uid;
672  dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
673  if (!*dslot)
674    {
675      var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
676      DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
677      add_referenced_var (var_copy);
678      nielt = XNEW (struct int_tree_map);
679      nielt->uid = uid;
680      nielt->to = var_copy;
681      *dslot = nielt;
682
683      /* Ensure that when we meet this decl next time, we won't duplicate
684         it again.  */
685      nuid = DECL_UID (var_copy);
686      ielt.uid = nuid;
687      dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
688      gcc_assert (!*dslot);
689      nielt = XNEW (struct int_tree_map);
690      nielt->uid = nuid;
691      nielt->to = var_copy;
692      *dslot = nielt;
693    }
694  else
695    var_copy = ((struct int_tree_map *) *dslot)->to;
696
697  if (copy_name_p)
698    {
699      copy = duplicate_ssa_name (name, NULL);
700      nelt = XNEW (struct name_to_copy_elt);
701      nelt->version = idx;
702      nelt->new_name = copy;
703      nelt->field = NULL_TREE;
704      *slot = nelt;
705    }
706  else
707    {
708      gcc_assert (!slot);
709      copy = name;
710    }
711
712  SSA_NAME_VAR (copy) = var_copy;
713  return copy;
714}
715
716/* Finds the ssa names used in STMT that are defined outside the
717   region between ENTRY and EXIT and replaces such ssa names with
718   their duplicates.  The duplicates are stored to NAME_COPIES.  Base
719   decls of all ssa names used in STMT (including those defined in
720   LOOP) are replaced with the new temporary variables; the
721   replacement decls are stored in DECL_COPIES.  */
722
723static void
724separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
725			       htab_t name_copies, htab_t decl_copies)
726{
727  use_operand_p use;
728  def_operand_p def;
729  ssa_op_iter oi;
730  tree name, copy;
731  bool copy_name_p;
732
733  mark_virtual_ops_for_renaming (stmt);
734
735  FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
736  {
737    name = DEF_FROM_PTR (def);
738    gcc_assert (TREE_CODE (name) == SSA_NAME);
739    copy = separate_decls_in_region_name (name, name_copies, decl_copies,
740					  false);
741    gcc_assert (copy == name);
742  }
743
744  FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
745  {
746    name = USE_FROM_PTR (use);
747    if (TREE_CODE (name) != SSA_NAME)
748      continue;
749
750    copy_name_p = expr_invariant_in_region_p (entry, exit, name);
751    copy = separate_decls_in_region_name (name, name_copies, decl_copies,
752					  copy_name_p);
753    SET_USE (use, copy);
754  }
755}
756
757/* Finds the ssa names used in STMT that are defined outside the
758   region between ENTRY and EXIT and replaces such ssa names with
759   their duplicates.  The duplicates are stored to NAME_COPIES.  Base
760   decls of all ssa names used in STMT (including those defined in
761   LOOP) are replaced with the new temporary variables; the
762   replacement decls are stored in DECL_COPIES.  */
763
764static bool
765separate_decls_in_region_debug_bind (gimple stmt,
766				     htab_t name_copies, htab_t decl_copies)
767{
768  use_operand_p use;
769  ssa_op_iter oi;
770  tree var, name;
771  struct int_tree_map ielt;
772  struct name_to_copy_elt elt;
773  void **slot, **dslot;
774
775  var = gimple_debug_bind_get_var (stmt);
776  if (TREE_CODE (var) == DEBUG_EXPR_DECL)
777    return true;
778  gcc_assert (DECL_P (var) && SSA_VAR_P (var));
779  ielt.uid = DECL_UID (var);
780  dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
781  if (!dslot)
782    return true;
783  gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
784
785  FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
786  {
787    name = USE_FROM_PTR (use);
788    if (TREE_CODE (name) != SSA_NAME)
789      continue;
790
791    elt.version = SSA_NAME_VERSION (name);
792    slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
793    if (!slot)
794      {
795	gimple_debug_bind_reset_value (stmt);
796	update_stmt (stmt);
797	break;
798      }
799
800    SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
801  }
802
803  return false;
804}
805
806/* Callback for htab_traverse.  Adds a field corresponding to the reduction
807   specified in SLOT. The type is passed in DATA.  */
808
809static int
810add_field_for_reduction (void **slot, void *data)
811{
812
813  struct reduction_info *const red = (struct reduction_info *) *slot;
814  tree const type = (tree) data;
815  tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
816  tree field = build_decl (gimple_location (red->reduc_stmt),
817			   FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
818
819  insert_field_into_struct (type, field);
820
821  red->field = field;
822
823  return 1;
824}
825
826/* Callback for htab_traverse.  Adds a field corresponding to a ssa name
827   described in SLOT. The type is passed in DATA.  */
828
829static int
830add_field_for_name (void **slot, void *data)
831{
832  struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
833  tree type = (tree) data;
834  tree name = ssa_name (elt->version);
835  tree var = SSA_NAME_VAR (name);
836  tree field = build_decl (DECL_SOURCE_LOCATION (var),
837			   FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
838
839  insert_field_into_struct (type, field);
840  elt->field = field;
841
842  return 1;
843}
844
845/* Callback for htab_traverse.  A local result is the intermediate result
846   computed by a single
847   thread, or the initial value in case no iteration was executed.
848   This function creates a phi node reflecting these values.
849   The phi's result will be stored in NEW_PHI field of the
850   reduction's data structure.  */
851
852static int
853create_phi_for_local_result (void **slot, void *data)
854{
855  struct reduction_info *const reduc = (struct reduction_info *) *slot;
856  const struct loop *const loop = (const struct loop *) data;
857  edge e;
858  gimple new_phi;
859  basic_block store_bb;
860  tree local_res;
861  source_location locus;
862
863  /* STORE_BB is the block where the phi
864     should be stored.  It is the destination of the loop exit.
865     (Find the fallthru edge from GIMPLE_OMP_CONTINUE).  */
866  store_bb = FALLTHRU_EDGE (loop->latch)->dest;
867
868  /* STORE_BB has two predecessors.  One coming from  the loop
869     (the reduction's result is computed at the loop),
870     and another coming from a block preceding the loop,
871     when no iterations
872     are executed (the initial value should be taken).  */
873  if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
874    e = EDGE_PRED (store_bb, 1);
875  else
876    e = EDGE_PRED (store_bb, 0);
877  local_res
878    = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
879		     NULL);
880  locus = gimple_location (reduc->reduc_stmt);
881  new_phi = create_phi_node (local_res, store_bb);
882  SSA_NAME_DEF_STMT (local_res) = new_phi;
883  add_phi_arg (new_phi, reduc->init, e, locus);
884  add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
885	       FALLTHRU_EDGE (loop->latch), locus);
886  reduc->new_phi = new_phi;
887
888  return 1;
889}
890
891struct clsn_data
892{
893  tree store;
894  tree load;
895
896  basic_block store_bb;
897  basic_block load_bb;
898};
899
900/* Callback for htab_traverse.  Create an atomic instruction for the
901   reduction described in SLOT.
902   DATA annotates the place in memory the atomic operation relates to,
903   and the basic block it needs to be generated in.  */
904
905static int
906create_call_for_reduction_1 (void **slot, void *data)
907{
908  struct reduction_info *const reduc = (struct reduction_info *) *slot;
909  struct clsn_data *const clsn_data = (struct clsn_data *) data;
910  gimple_stmt_iterator gsi;
911  tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
912  tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
913  tree load_struct;
914  basic_block bb;
915  basic_block new_bb;
916  edge e;
917  tree t, addr, ref, x;
918  tree tmp_load, name;
919  gimple load;
920
921  load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
922  t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
923
924  addr = build_addr (t, current_function_decl);
925
926  /* Create phi node.  */
927  bb = clsn_data->load_bb;
928
929  e = split_block (bb, t);
930  new_bb = e->dest;
931
932  tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
933  add_referenced_var (tmp_load);
934  tmp_load = make_ssa_name (tmp_load, NULL);
935  load = gimple_build_omp_atomic_load (tmp_load, addr);
936  SSA_NAME_DEF_STMT (tmp_load) = load;
937  gsi = gsi_start_bb (new_bb);
938  gsi_insert_after (&gsi, load, GSI_NEW_STMT);
939
940  e = split_block (new_bb, load);
941  new_bb = e->dest;
942  gsi = gsi_start_bb (new_bb);
943  ref = tmp_load;
944  x = fold_build2 (reduc->reduction_code,
945		   TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
946		   PHI_RESULT (reduc->new_phi));
947
948  name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
949				   GSI_CONTINUE_LINKING);
950
951  gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
952  return 1;
953}
954
955/* Create the atomic operation at the join point of the threads.
956   REDUCTION_LIST describes the reductions in the LOOP.
957   LD_ST_DATA describes the shared data structure where
958   shared data is stored in and loaded from.  */
959static void
960create_call_for_reduction (struct loop *loop, htab_t reduction_list,
961			   struct clsn_data *ld_st_data)
962{
963  htab_traverse (reduction_list, create_phi_for_local_result, loop);
964  /* Find the fallthru edge from GIMPLE_OMP_CONTINUE.  */
965  ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
966  htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
967}
968
969/* Callback for htab_traverse.  Loads the final reduction value at the
970   join point of all threads, and inserts it in the right place.  */
971
972static int
973create_loads_for_reductions (void **slot, void *data)
974{
975  struct reduction_info *const red = (struct reduction_info *) *slot;
976  struct clsn_data *const clsn_data = (struct clsn_data *) data;
977  gimple stmt;
978  gimple_stmt_iterator gsi;
979  tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
980  tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
981  tree load_struct;
982  tree name;
983  tree x;
984
985  gsi = gsi_after_labels (clsn_data->load_bb);
986  load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
987  load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
988			NULL_TREE);
989
990  x = load_struct;
991  name = PHI_RESULT (red->keep_res);
992  stmt = gimple_build_assign (name, x);
993  SSA_NAME_DEF_STMT (name) = stmt;
994
995  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
996
997  for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
998       !gsi_end_p (gsi); gsi_next (&gsi))
999    if (gsi_stmt (gsi) == red->keep_res)
1000      {
1001	remove_phi_node (&gsi, false);
1002	return 1;
1003      }
1004  gcc_unreachable ();
1005}
1006
1007/* Load the reduction result that was stored in LD_ST_DATA.
1008   REDUCTION_LIST describes the list of reductions that the
1009   loads should be generated for.  */
1010static void
1011create_final_loads_for_reduction (htab_t reduction_list,
1012				  struct clsn_data *ld_st_data)
1013{
1014  gimple_stmt_iterator gsi;
1015  tree t;
1016  gimple stmt;
1017
1018  gsi = gsi_after_labels (ld_st_data->load_bb);
1019  t = build_fold_addr_expr (ld_st_data->store);
1020  stmt = gimple_build_assign (ld_st_data->load, t);
1021
1022  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1023  SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1024
1025  htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1026
1027}
1028
1029/* Callback for htab_traverse.  Store the neutral value for the
1030  particular reduction's operation, e.g. 0 for PLUS_EXPR,
1031  1 for MULT_EXPR, etc. into the reduction field.
1032  The reduction is specified in SLOT. The store information is
1033  passed in DATA.  */
1034
1035static int
1036create_stores_for_reduction (void **slot, void *data)
1037{
1038  struct reduction_info *const red = (struct reduction_info *) *slot;
1039  struct clsn_data *const clsn_data = (struct clsn_data *) data;
1040  tree t;
1041  gimple stmt;
1042  gimple_stmt_iterator gsi;
1043  tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1044
1045  gsi = gsi_last_bb (clsn_data->store_bb);
1046  t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1047  stmt = gimple_build_assign (t, red->initial_value);
1048  mark_virtual_ops_for_renaming (stmt);
1049  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1050
1051  return 1;
1052}
1053
1054/* Callback for htab_traverse.  Creates loads to a field of LOAD in LOAD_BB and
1055   store to a field of STORE in STORE_BB for the ssa name and its duplicate
1056   specified in SLOT.  */
1057
1058static int
1059create_loads_and_stores_for_name (void **slot, void *data)
1060{
1061  struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1062  struct clsn_data *const clsn_data = (struct clsn_data *) data;
1063  tree t;
1064  gimple stmt;
1065  gimple_stmt_iterator gsi;
1066  tree type = TREE_TYPE (elt->new_name);
1067  tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
1068  tree load_struct;
1069
1070  gsi = gsi_last_bb (clsn_data->store_bb);
1071  t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1072  stmt = gimple_build_assign (t, ssa_name (elt->version));
1073  mark_virtual_ops_for_renaming (stmt);
1074  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1075
1076  gsi = gsi_last_bb (clsn_data->load_bb);
1077  load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
1078  t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1079  stmt = gimple_build_assign (elt->new_name, t);
1080  SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1081  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1082
1083  return 1;
1084}
1085
1086/* Moves all the variables used in LOOP and defined outside of it (including
1087   the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1088   name) to a structure created for this purpose.  The code
1089
1090   while (1)
1091     {
1092       use (a);
1093       use (b);
1094     }
1095
1096   is transformed this way:
1097
1098   bb0:
1099   old.a = a;
1100   old.b = b;
1101
1102   bb1:
1103   a' = new->a;
1104   b' = new->b;
1105   while (1)
1106     {
1107       use (a');
1108       use (b');
1109     }
1110
1111   `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT.  The
1112   pointer `new' is intentionally not initialized (the loop will be split to a
1113   separate function later, and `new' will be initialized from its arguments).
1114   LD_ST_DATA holds information about the shared data structure used to pass
1115   information among the threads.  It is initialized here, and
1116   gen_parallel_loop will pass it to create_call_for_reduction that
1117   needs this information.  REDUCTION_LIST describes the reductions
1118   in LOOP.  */
1119
1120static void
1121separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1122			  tree *arg_struct, tree *new_arg_struct,
1123			  struct clsn_data *ld_st_data)
1124
1125{
1126  basic_block bb1 = split_edge (entry);
1127  basic_block bb0 = single_pred (bb1);
1128  htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1129				    name_to_copy_elt_eq, free);
1130  htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1131				    free);
1132  unsigned i;
1133  tree type, type_name, nvar;
1134  gimple_stmt_iterator gsi;
1135  struct clsn_data clsn_data;
1136  VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1137  basic_block bb;
1138  basic_block entry_bb = bb1;
1139  basic_block exit_bb = exit->dest;
1140  bool has_debug_stmt = false;
1141
1142  entry = single_succ_edge (entry_bb);
1143  gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1144
1145  for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
1146    {
1147      if (bb != entry_bb && bb != exit_bb)
1148	{
1149	  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1150	    separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1151					   name_copies, decl_copies);
1152
1153	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1154	    {
1155	      gimple stmt = gsi_stmt (gsi);
1156
1157	      if (is_gimple_debug (stmt))
1158		has_debug_stmt = true;
1159	      else
1160		separate_decls_in_region_stmt (entry, exit, stmt,
1161					       name_copies, decl_copies);
1162	    }
1163	}
1164    }
1165
1166  /* Now process debug bind stmts.  We must not create decls while
1167     processing debug stmts, so we defer their processing so as to
1168     make sure we will have debug info for as many variables as
1169     possible (all of those that were dealt with in the loop above),
1170     and discard those for which we know there's nothing we can
1171     do.  */
1172  if (has_debug_stmt)
1173    for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
1174      if (bb != entry_bb && bb != exit_bb)
1175	{
1176	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1177	    {
1178	      gimple stmt = gsi_stmt (gsi);
1179
1180	      if (gimple_debug_bind_p (stmt))
1181		{
1182		  if (separate_decls_in_region_debug_bind (stmt,
1183							   name_copies,
1184							   decl_copies))
1185		    {
1186		      gsi_remove (&gsi, true);
1187		      continue;
1188		    }
1189		}
1190
1191	      gsi_next (&gsi);
1192	    }
1193	}
1194
1195  VEC_free (basic_block, heap, body);
1196
1197  if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
1198    {
1199      /* It may happen that there is nothing to copy (if there are only
1200         loop carried and external variables in the loop).  */
1201      *arg_struct = NULL;
1202      *new_arg_struct = NULL;
1203    }
1204  else
1205    {
1206      /* Create the type for the structure to store the ssa names to.  */
1207      type = lang_hooks.types.make_type (RECORD_TYPE);
1208      type_name = build_decl (BUILTINS_LOCATION,
1209			      TYPE_DECL, create_tmp_var_name (".paral_data"),
1210			      type);
1211      TYPE_NAME (type) = type_name;
1212
1213      htab_traverse (name_copies, add_field_for_name, type);
1214      if (reduction_list && htab_elements (reduction_list) > 0)
1215	{
1216	  /* Create the fields for reductions.  */
1217	  htab_traverse (reduction_list, add_field_for_reduction,
1218                         type);
1219	}
1220      layout_type (type);
1221
1222      /* Create the loads and stores.  */
1223      *arg_struct = create_tmp_var (type, ".paral_data_store");
1224      add_referenced_var (*arg_struct);
1225      nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1226      add_referenced_var (nvar);
1227      *new_arg_struct = make_ssa_name (nvar, NULL);
1228
1229      ld_st_data->store = *arg_struct;
1230      ld_st_data->load = *new_arg_struct;
1231      ld_st_data->store_bb = bb0;
1232      ld_st_data->load_bb = bb1;
1233
1234      htab_traverse (name_copies, create_loads_and_stores_for_name,
1235		     ld_st_data);
1236
1237      /* Load the calculation from memory (after the join of the threads).  */
1238
1239      if (reduction_list && htab_elements (reduction_list) > 0)
1240	{
1241	  htab_traverse (reduction_list, create_stores_for_reduction,
1242                        ld_st_data);
1243	  clsn_data.load = make_ssa_name (nvar, NULL);
1244	  clsn_data.load_bb = exit->dest;
1245	  clsn_data.store = ld_st_data->store;
1246	  create_final_loads_for_reduction (reduction_list, &clsn_data);
1247	}
1248    }
1249
1250  htab_delete (decl_copies);
1251  htab_delete (name_copies);
1252}
1253
1254/* Bitmap containing uids of functions created by parallelization.  We cannot
1255   allocate it from the default obstack, as it must live across compilation
1256   of several functions; we make it gc allocated instead.  */
1257
1258static GTY(()) bitmap parallelized_functions;
1259
1260/* Returns true if FN was created by create_loop_fn.  */
1261
1262static bool
1263parallelized_function_p (tree fn)
1264{
1265  if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1266    return false;
1267
1268  return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1269}
1270
1271/* Creates and returns an empty function that will receive the body of
1272   a parallelized loop.  */
1273
1274static tree
1275create_loop_fn (void)
1276{
1277  char buf[100];
1278  char *tname;
1279  tree decl, type, name, t;
1280  struct function *act_cfun = cfun;
1281  static unsigned loopfn_num;
1282
1283  snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1284  ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1285  clean_symbol_name (tname);
1286  name = get_identifier (tname);
1287  type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1288
1289  decl = build_decl (BUILTINS_LOCATION,
1290		     FUNCTION_DECL, name, type);
1291  if (!parallelized_functions)
1292    parallelized_functions = BITMAP_GGC_ALLOC ();
1293  bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1294
1295  TREE_STATIC (decl) = 1;
1296  TREE_USED (decl) = 1;
1297  DECL_ARTIFICIAL (decl) = 1;
1298  DECL_IGNORED_P (decl) = 0;
1299  TREE_PUBLIC (decl) = 0;
1300  DECL_UNINLINABLE (decl) = 1;
1301  DECL_EXTERNAL (decl) = 0;
1302  DECL_CONTEXT (decl) = NULL_TREE;
1303  DECL_INITIAL (decl) = make_node (BLOCK);
1304
1305  t = build_decl (BUILTINS_LOCATION,
1306		  RESULT_DECL, NULL_TREE, void_type_node);
1307  DECL_ARTIFICIAL (t) = 1;
1308  DECL_IGNORED_P (t) = 1;
1309  DECL_RESULT (decl) = t;
1310
1311  t = build_decl (BUILTINS_LOCATION,
1312		  PARM_DECL, get_identifier (".paral_data_param"),
1313		  ptr_type_node);
1314  DECL_ARTIFICIAL (t) = 1;
1315  DECL_ARG_TYPE (t) = ptr_type_node;
1316  DECL_CONTEXT (t) = decl;
1317  TREE_USED (t) = 1;
1318  DECL_ARGUMENTS (decl) = t;
1319
1320  allocate_struct_function (decl, false);
1321
1322  /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1323     it.  */
1324  set_cfun (act_cfun);
1325
1326  return decl;
1327}
1328
1329/* Moves the exit condition of LOOP to the beginning of its header, and
1330   duplicates the part of the last iteration that gets disabled to the
1331   exit of the loop.  NIT is the number of iterations of the loop
1332   (used to initialize the variables in the duplicated part).
1333
1334   TODO: the common case is that latch of the loop is empty and immediately
1335   follows the loop exit.  In this case, it would be better not to copy the
1336   body of the loop, but only move the entry of the loop directly before the
1337   exit check and increase the number of iterations of the loop by one.
1338   This may need some additional preconditioning in case NIT = ~0.
1339   REDUCTION_LIST describes the reductions in LOOP.  */
1340
1341static void
1342transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1343{
1344  basic_block *bbs, *nbbs, ex_bb, orig_header;
1345  unsigned n;
1346  bool ok;
1347  edge exit = single_dom_exit (loop), hpred;
1348  tree control, control_name, res, t;
1349  gimple phi, nphi, cond_stmt, stmt, cond_nit;
1350  gimple_stmt_iterator gsi;
1351  tree nit_1;
1352
1353  split_block_after_labels (loop->header);
1354  orig_header = single_succ (loop->header);
1355  hpred = single_succ_edge (loop->header);
1356
1357  cond_stmt = last_stmt (exit->src);
1358  control = gimple_cond_lhs (cond_stmt);
1359  gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1360
1361  /* Make sure that we have phi nodes on exit for all loop header phis
1362     (create_parallel_loop requires that).  */
1363  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1364    {
1365      phi = gsi_stmt (gsi);
1366      res = PHI_RESULT (phi);
1367      t = make_ssa_name (SSA_NAME_VAR (res), phi);
1368      SET_PHI_RESULT (phi, t);
1369      nphi = create_phi_node (res, orig_header);
1370      SSA_NAME_DEF_STMT (res) = nphi;
1371      add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1372
1373      if (res == control)
1374	{
1375	  gimple_cond_set_lhs (cond_stmt, t);
1376	  update_stmt (cond_stmt);
1377	  control = t;
1378	}
1379    }
1380  bbs = get_loop_body_in_dom_order (loop);
1381
1382  for (n = 0; bbs[n] != loop->latch; n++)
1383    continue;
1384  nbbs = XNEWVEC (basic_block, n);
1385  ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1386				   bbs + 1, n, nbbs);
1387  gcc_assert (ok);
1388  free (bbs);
1389  ex_bb = nbbs[0];
1390  free (nbbs);
1391
1392  /* Other than reductions, the only gimple reg that should be copied
1393     out of the loop is the control variable.  */
1394
1395  control_name = NULL_TREE;
1396  for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1397    {
1398      phi = gsi_stmt (gsi);
1399      res = PHI_RESULT (phi);
1400      if (!is_gimple_reg (res))
1401	{
1402	  gsi_next (&gsi);
1403	  continue;
1404	}
1405
1406      /* Check if it is a part of reduction.  If it is,
1407         keep the phi at the reduction's keep_res field.  The
1408         PHI_RESULT of this phi is the resulting value of the reduction
1409         variable when exiting the loop.  */
1410
1411      exit = single_dom_exit (loop);
1412
1413      if (htab_elements (reduction_list) > 0)
1414	{
1415	  struct reduction_info *red;
1416
1417	  tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1418	  red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1419	  if (red)
1420	    {
1421	      red->keep_res = phi;
1422	      gsi_next (&gsi);
1423	      continue;
1424	    }
1425	}
1426      gcc_assert (control_name == NULL_TREE
1427		  && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1428      control_name = res;
1429      remove_phi_node (&gsi, false);
1430    }
1431  gcc_assert (control_name != NULL_TREE);
1432
1433  /* Initialize the control variable to number of iterations
1434     according to the rhs of the exit condition.  */
1435  gsi = gsi_after_labels (ex_bb);
1436  cond_nit = last_stmt (exit->src);
1437  nit_1 =  gimple_cond_rhs (cond_nit);
1438  nit_1 = force_gimple_operand_gsi (&gsi,
1439				  fold_convert (TREE_TYPE (control_name), nit_1),
1440				  false, NULL_TREE, false, GSI_SAME_STMT);
1441  stmt = gimple_build_assign (control_name, nit_1);
1442  gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1443  SSA_NAME_DEF_STMT (control_name) = stmt;
1444}
1445
1446/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1447   LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1448   NEW_DATA is the variable that should be initialized from the argument
1449   of LOOP_FN.  N_THREADS is the requested number of threads.  Returns the
1450   basic block containing GIMPLE_OMP_PARALLEL tree.  */
1451
1452static basic_block
1453create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1454		      tree new_data, unsigned n_threads)
1455{
1456  gimple_stmt_iterator gsi;
1457  basic_block bb, paral_bb, for_bb, ex_bb;
1458  tree t, param;
1459  gimple stmt, for_stmt, phi, cond_stmt;
1460  tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1461  edge exit, nexit, guard, end, e;
1462
1463  /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
1464  bb = loop_preheader_edge (loop)->src;
1465  paral_bb = single_pred (bb);
1466  gsi = gsi_last_bb (paral_bb);
1467
1468  t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_NUM_THREADS);
1469  OMP_CLAUSE_NUM_THREADS_EXPR (t)
1470    = build_int_cst (integer_type_node, n_threads);
1471  stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1472
1473  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1474
1475  /* Initialize NEW_DATA.  */
1476  if (data)
1477    {
1478      gsi = gsi_after_labels (bb);
1479
1480      param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1481      stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1482      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1483      SSA_NAME_DEF_STMT (param) = stmt;
1484
1485      stmt = gimple_build_assign (new_data,
1486				  fold_convert (TREE_TYPE (new_data), param));
1487      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1488      SSA_NAME_DEF_STMT (new_data) = stmt;
1489    }
1490
1491  /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL.  */
1492  bb = split_loop_exit_edge (single_dom_exit (loop));
1493  gsi = gsi_last_bb (bb);
1494  gsi_insert_after (&gsi, gimple_build_omp_return (false), GSI_NEW_STMT);
1495
1496  /* Extract data for GIMPLE_OMP_FOR.  */
1497  gcc_assert (loop->header == single_dom_exit (loop)->src);
1498  cond_stmt = last_stmt (loop->header);
1499
1500  cvar = gimple_cond_lhs (cond_stmt);
1501  cvar_base = SSA_NAME_VAR (cvar);
1502  phi = SSA_NAME_DEF_STMT (cvar);
1503  cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1504  initvar = make_ssa_name (cvar_base, NULL);
1505  SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1506	   initvar);
1507  cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1508
1509  gsi = gsi_last_bb (loop->latch);
1510  gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1511  gsi_remove (&gsi, true);
1512
1513  /* Prepare cfg.  */
1514  for_bb = split_edge (loop_preheader_edge (loop));
1515  ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1516  extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1517  gcc_assert (exit == single_dom_exit (loop));
1518
1519  guard = make_edge (for_bb, ex_bb, 0);
1520  single_succ_edge (loop->latch)->flags = 0;
1521  end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1522  for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1523    {
1524      source_location locus;
1525      tree def;
1526      phi = gsi_stmt (gsi);
1527      stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1528
1529      def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1530      locus = gimple_phi_arg_location_from_edge (stmt,
1531						 loop_preheader_edge (loop));
1532      add_phi_arg (phi, def, guard, locus);
1533
1534      def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1535      locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1536      add_phi_arg (phi, def, end, locus);
1537    }
1538  e = redirect_edge_and_branch (exit, nexit->dest);
1539  PENDING_STMT (e) = NULL;
1540
1541  /* Emit GIMPLE_OMP_FOR.  */
1542  gimple_cond_set_lhs (cond_stmt, cvar_base);
1543  type = TREE_TYPE (cvar);
1544  t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_SCHEDULE);
1545  OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1546
1547  for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1548  gimple_omp_for_set_index (for_stmt, 0, initvar);
1549  gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1550  gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1551  gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1552  gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1553						cvar_base,
1554						build_int_cst (type, 1)));
1555
1556  gsi = gsi_last_bb (for_bb);
1557  gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1558  SSA_NAME_DEF_STMT (initvar) = for_stmt;
1559
1560  /* Emit GIMPLE_OMP_CONTINUE.  */
1561  gsi = gsi_last_bb (loop->latch);
1562  stmt = gimple_build_omp_continue (cvar_next, cvar);
1563  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1564  SSA_NAME_DEF_STMT (cvar_next) = stmt;
1565
1566  /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR.  */
1567  gsi = gsi_last_bb (ex_bb);
1568  gsi_insert_after (&gsi, gimple_build_omp_return (true), GSI_NEW_STMT);
1569
1570  return paral_bb;
1571}
1572
1573/* Generates code to execute the iterations of LOOP in N_THREADS
1574   threads in parallel.
1575
1576   NITER describes number of iterations of LOOP.
1577   REDUCTION_LIST describes the reductions existent in the LOOP.  */
1578
1579static void
1580gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1581		   unsigned n_threads, struct tree_niter_desc *niter)
1582{
1583  loop_iterator li;
1584  tree many_iterations_cond, type, nit;
1585  tree arg_struct, new_arg_struct;
1586  gimple_seq stmts;
1587  basic_block parallel_head;
1588  edge entry, exit;
1589  struct clsn_data clsn_data;
1590  unsigned prob;
1591
1592  /* From
1593
1594     ---------------------------------------------------------------------
1595     loop
1596       {
1597	 IV = phi (INIT, IV + STEP)
1598	 BODY1;
1599	 if (COND)
1600	   break;
1601	 BODY2;
1602       }
1603     ---------------------------------------------------------------------
1604
1605     with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1606     we generate the following code:
1607
1608     ---------------------------------------------------------------------
1609
1610     if (MAY_BE_ZERO
1611     || NITER < MIN_PER_THREAD * N_THREADS)
1612     goto original;
1613
1614     BODY1;
1615     store all local loop-invariant variables used in body of the loop to DATA.
1616     GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1617     load the variables from DATA.
1618     GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1619     BODY2;
1620     BODY1;
1621     GIMPLE_OMP_CONTINUE;
1622     GIMPLE_OMP_RETURN         -- GIMPLE_OMP_FOR
1623     GIMPLE_OMP_RETURN         -- GIMPLE_OMP_PARALLEL
1624     goto end;
1625
1626     original:
1627     loop
1628       {
1629	 IV = phi (INIT, IV + STEP)
1630	 BODY1;
1631	 if (COND)
1632	   break;
1633	 BODY2;
1634       }
1635
1636     end:
1637
1638   */
1639
1640  /* Create two versions of the loop -- in the old one, we know that the
1641     number of iterations is large enough, and we will transform it into the
1642     loop that will be split to loop_fn, the new one will be used for the
1643     remaining iterations.  */
1644
1645  type = TREE_TYPE (niter->niter);
1646  nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1647			      NULL_TREE);
1648  if (stmts)
1649    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1650
1651  many_iterations_cond =
1652    fold_build2 (GE_EXPR, boolean_type_node,
1653		 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
1654  many_iterations_cond
1655    = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1656		   invert_truthvalue (unshare_expr (niter->may_be_zero)),
1657		   many_iterations_cond);
1658  many_iterations_cond
1659    = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1660  if (stmts)
1661    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1662  if (!is_gimple_condexpr (many_iterations_cond))
1663    {
1664      many_iterations_cond
1665	= force_gimple_operand (many_iterations_cond, &stmts,
1666				true, NULL_TREE);
1667      if (stmts)
1668	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1669    }
1670
1671  initialize_original_copy_tables ();
1672
1673  /* We assume that the loop usually iterates a lot.  */
1674  prob = 4 * REG_BR_PROB_BASE / 5;
1675  loop_version (loop, many_iterations_cond, NULL,
1676		prob, prob, REG_BR_PROB_BASE - prob, true);
1677  update_ssa (TODO_update_ssa);
1678  free_original_copy_tables ();
1679
1680  /* Base all the induction variables in LOOP on a single control one.  */
1681  canonicalize_loop_ivs (loop, &nit, true);
1682
1683  /* Ensure that the exit condition is the first statement in the loop.  */
1684  transform_to_exit_first_loop (loop, reduction_list, nit);
1685
1686  /* Generate initializations for reductions.  */
1687  if (htab_elements (reduction_list) > 0)
1688    htab_traverse (reduction_list, initialize_reductions, loop);
1689
1690  /* Eliminate the references to local variables from the loop.  */
1691  gcc_assert (single_exit (loop));
1692  entry = loop_preheader_edge (loop);
1693  exit = single_dom_exit (loop);
1694
1695  eliminate_local_variables (entry, exit);
1696  /* In the old loop, move all variables non-local to the loop to a structure
1697     and back, and create separate decls for the variables used in loop.  */
1698  separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1699			    &new_arg_struct, &clsn_data);
1700
1701  /* Create the parallel constructs.  */
1702  parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct,
1703					new_arg_struct, n_threads);
1704  if (htab_elements (reduction_list) > 0)
1705    create_call_for_reduction (loop, reduction_list, &clsn_data);
1706
1707  scev_reset ();
1708
1709  /* Cancel the loop (it is simpler to do it here rather than to teach the
1710     expander to do it).  */
1711  cancel_loop_tree (loop);
1712
1713  /* Free loop bound estimations that could contain references to
1714     removed statements.  */
1715  FOR_EACH_LOOP (li, loop, 0)
1716    free_numbers_of_iterations_estimates_loop (loop);
1717
1718  /* Expand the parallel constructs.  We do it directly here instead of running
1719     a separate expand_omp pass, since it is more efficient, and less likely to
1720     cause troubles with further analyses not being able to deal with the
1721     OMP trees.  */
1722
1723  omp_expand_local (parallel_head);
1724}
1725
1726/* Returns true when LOOP contains vector phi nodes.  */
1727
1728static bool
1729loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1730{
1731  unsigned i;
1732  basic_block *bbs = get_loop_body_in_dom_order (loop);
1733  gimple_stmt_iterator gsi;
1734  bool res = true;
1735
1736  for (i = 0; i < loop->num_nodes; i++)
1737    for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1738      if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1739	goto end;
1740
1741  res = false;
1742 end:
1743  free (bbs);
1744  return res;
1745}
1746
1747/* Create a reduction_info struct, initialize it with REDUC_STMT
1748   and PHI, insert it to the REDUCTION_LIST.  */
1749
1750static void
1751build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1752{
1753  PTR *slot;
1754  struct reduction_info *new_reduction;
1755
1756  gcc_assert (reduc_stmt);
1757
1758  if (dump_file && (dump_flags & TDF_DETAILS))
1759    {
1760      fprintf (dump_file,
1761	       "Detected reduction. reduction stmt is: \n");
1762      print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1763      fprintf (dump_file, "\n");
1764    }
1765
1766  new_reduction = XCNEW (struct reduction_info);
1767
1768  new_reduction->reduc_stmt = reduc_stmt;
1769  new_reduction->reduc_phi = phi;
1770  new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1771  slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1772  *slot = new_reduction;
1773}
1774
1775/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST.  */
1776
1777static void
1778gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1779{
1780  gimple_stmt_iterator gsi;
1781  loop_vec_info simple_loop_info;
1782
1783  vect_dump = NULL;
1784  simple_loop_info = vect_analyze_loop_form (loop);
1785
1786  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1787    {
1788      gimple phi = gsi_stmt (gsi);
1789      affine_iv iv;
1790      tree res = PHI_RESULT (phi);
1791      bool double_reduc;
1792
1793      if (!is_gimple_reg (res))
1794	continue;
1795
1796      if (!simple_iv (loop, loop, res, &iv, true)
1797	&& simple_loop_info)
1798	{
1799           gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc);
1800	   if (reduc_stmt && !double_reduc)
1801              build_new_reduction (reduction_list, reduc_stmt, phi);
1802        }
1803    }
1804    destroy_loop_vec_info (simple_loop_info, true);
1805}
1806
1807/* Try to initialize NITER for code generation part.  */
1808
1809static bool
1810try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1811{
1812  edge exit = single_dom_exit (loop);
1813
1814  gcc_assert (exit);
1815
1816  /* We need to know # of iterations, and there should be no uses of values
1817     defined inside loop outside of it, unless the values are invariants of
1818     the loop.  */
1819  if (!number_of_iterations_exit (loop, exit, niter, false))
1820    {
1821      if (dump_file && (dump_flags & TDF_DETAILS))
1822	fprintf (dump_file, "  FAILED: number of iterations not known\n");
1823      return false;
1824    }
1825
1826  return true;
1827}
1828
1829/* Try to initialize REDUCTION_LIST for code generation part.
1830   REDUCTION_LIST describes the reductions.  */
1831
1832static bool
1833try_create_reduction_list (loop_p loop, htab_t reduction_list)
1834{
1835  edge exit = single_dom_exit (loop);
1836  gimple_stmt_iterator gsi;
1837
1838  gcc_assert (exit);
1839
1840  gather_scalar_reductions (loop, reduction_list);
1841
1842
1843  for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1844    {
1845      gimple phi = gsi_stmt (gsi);
1846      struct reduction_info *red;
1847      imm_use_iterator imm_iter;
1848      use_operand_p use_p;
1849      gimple reduc_phi;
1850      tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1851
1852      if (is_gimple_reg (val))
1853	{
1854	  if (dump_file && (dump_flags & TDF_DETAILS))
1855	    {
1856	      fprintf (dump_file, "phi is ");
1857	      print_gimple_stmt (dump_file, phi, 0, 0);
1858	      fprintf (dump_file, "arg of phi to exit:   value ");
1859	      print_generic_expr (dump_file, val, 0);
1860	      fprintf (dump_file, " used outside loop\n");
1861	      fprintf (dump_file,
1862		       "  checking if it a part of reduction pattern:  \n");
1863	    }
1864	  if (htab_elements (reduction_list) == 0)
1865	    {
1866	      if (dump_file && (dump_flags & TDF_DETAILS))
1867		fprintf (dump_file,
1868			 "  FAILED: it is not a part of reduction.\n");
1869	      return false;
1870	    }
1871	  reduc_phi = NULL;
1872	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
1873	    {
1874	      if (flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
1875		{
1876		  reduc_phi = USE_STMT (use_p);
1877		  break;
1878		}
1879	    }
1880	  red = reduction_phi (reduction_list, reduc_phi);
1881	  if (red == NULL)
1882	    {
1883	      if (dump_file && (dump_flags & TDF_DETAILS))
1884		fprintf (dump_file,
1885			 "  FAILED: it is not a part of reduction.\n");
1886	      return false;
1887	    }
1888	  if (dump_file && (dump_flags & TDF_DETAILS))
1889	    {
1890	      fprintf (dump_file, "reduction phi is  ");
1891	      print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
1892	      fprintf (dump_file, "reduction stmt is  ");
1893	      print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
1894	    }
1895	}
1896    }
1897
1898  /* The iterations of the loop may communicate only through bivs whose
1899     iteration space can be distributed efficiently.  */
1900  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1901    {
1902      gimple phi = gsi_stmt (gsi);
1903      tree def = PHI_RESULT (phi);
1904      affine_iv iv;
1905
1906      if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
1907	{
1908	  struct reduction_info *red;
1909
1910	  red = reduction_phi (reduction_list, phi);
1911	  if (red == NULL)
1912	    {
1913	      if (dump_file && (dump_flags & TDF_DETAILS))
1914		fprintf (dump_file,
1915			 "  FAILED: scalar dependency between iterations\n");
1916	      return false;
1917	    }
1918	}
1919    }
1920
1921
1922  return true;
1923}
1924
1925/* Detect parallel loops and generate parallel code using libgomp
1926   primitives.  Returns true if some loop was parallelized, false
1927   otherwise.  */
1928
1929bool
1930parallelize_loops (void)
1931{
1932  unsigned n_threads = flag_tree_parallelize_loops;
1933  bool changed = false;
1934  struct loop *loop;
1935  struct tree_niter_desc niter_desc;
1936  loop_iterator li;
1937  htab_t reduction_list;
1938  HOST_WIDE_INT estimated;
1939  LOC loop_loc;
1940
1941  /* Do not parallelize loops in the functions created by parallelization.  */
1942  if (parallelized_function_p (cfun->decl))
1943    return false;
1944  if (cfun->has_nonlocal_label)
1945    return false;
1946
1947  reduction_list = htab_create (10, reduction_info_hash,
1948				     reduction_info_eq, free);
1949  init_stmt_vec_info_vec ();
1950
1951  FOR_EACH_LOOP (li, loop, 0)
1952    {
1953      htab_empty (reduction_list);
1954      if (dump_file && (dump_flags & TDF_DETAILS))
1955      {
1956        fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
1957	if (loop->inner)
1958	  fprintf (dump_file, "loop %d is not innermost\n",loop->num);
1959	else
1960	  fprintf (dump_file, "loop %d is innermost\n",loop->num);
1961      }
1962
1963      /* If we use autopar in graphite pass, we use its marked dependency
1964      checking results.  */
1965      if (flag_loop_parallelize_all && !loop->can_be_parallel)
1966      {
1967        if (dump_file && (dump_flags & TDF_DETAILS))
1968	   fprintf (dump_file, "loop is not parallel according to graphite\n");
1969	continue;
1970      }
1971
1972      if (!single_dom_exit (loop))
1973      {
1974
1975        if (dump_file && (dump_flags & TDF_DETAILS))
1976	  fprintf (dump_file, "loop is !single_dom_exit\n");
1977
1978	continue;
1979      }
1980
1981      if (/* And of course, the loop must be parallelizable.  */
1982	  !can_duplicate_loop_p (loop)
1983	  || loop_has_blocks_with_irreducible_flag (loop)
1984	  || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
1985	  /* FIXME: the check for vector phi nodes could be removed.  */
1986	  || loop_has_vector_phi_nodes (loop))
1987	continue;
1988      estimated = estimated_loop_iterations_int (loop, false);
1989      /* FIXME: Bypass this check as graphite doesn't update the
1990      count and frequency correctly now.  */
1991      if (!flag_loop_parallelize_all
1992	  && ((estimated !=-1
1993	     && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
1994	      /* Do not bother with loops in cold areas.  */
1995	      || optimize_loop_nest_for_size_p (loop)))
1996	continue;
1997
1998      if (!try_get_loop_niter (loop, &niter_desc))
1999	continue;
2000
2001      if (!try_create_reduction_list (loop, reduction_list))
2002	continue;
2003
2004      if (!flag_loop_parallelize_all && !loop_parallel_p (loop))
2005	continue;
2006
2007      changed = true;
2008      if (dump_file && (dump_flags & TDF_DETAILS))
2009      {
2010	if (loop->inner)
2011	  fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2012	else
2013	  fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2014	loop_loc = find_loop_location (loop);
2015	if (loop_loc != UNKNOWN_LOC)
2016	  fprintf (dump_file, "\nloop at %s:%d: ",
2017		   LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2018      }
2019      gen_parallel_loop (loop, reduction_list,
2020			 n_threads, &niter_desc);
2021      verify_flow_info ();
2022      verify_dominators (CDI_DOMINATORS);
2023      verify_loop_structure ();
2024      verify_loop_closed_ssa ();
2025    }
2026
2027  free_stmt_vec_info_vec ();
2028  htab_delete (reduction_list);
2029
2030  /* Parallelization will cause new function calls to be inserted through
2031     which local variables will escape.  Reset the points-to solutions
2032     for ESCAPED and CALLUSED.  */
2033  if (changed)
2034    {
2035      pt_solution_reset (&cfun->gimple_df->escaped);
2036      pt_solution_reset (&cfun->gimple_df->callused);
2037    }
2038
2039  return changed;
2040}
2041
2042#include "gt-tree-parloops.h"
2043