1/* Predictive commoning.
2   Copyright (C) 2005-2022 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20/* This file implements the predictive commoning optimization.  Predictive
21   commoning can be viewed as CSE around a loop, and with some improvements,
22   as generalized strength reduction-- i.e., reusing values computed in
23   earlier iterations of a loop in the later ones.  So far, the pass only
24   handles the most useful case, that is, reusing values of memory references.
25   If you think this is all just a special case of PRE, you are sort of right;
26   however, concentrating on loops is simpler, and makes it possible to
27   incorporate data dependence analysis to detect the opportunities, perform
28   loop unrolling to avoid copies together with renaming immediately,
29   and if needed, we could also take register pressure into account.
30
31   Let us demonstrate what is done on an example:
32
33   for (i = 0; i < 100; i++)
34     {
35       a[i+2] = a[i] + a[i+1];
36       b[10] = b[10] + i;
37       c[i] = c[99 - i];
38       d[i] = d[i + 1];
39     }
40
41   1) We find data references in the loop, and split them to mutually
42      independent groups (i.e., we find components of a data dependence
43      graph).  We ignore read-read dependences whose distance is not constant.
44      (TODO -- we could also ignore antidependences).  In this example, we
45      find the following groups:
46
47      a[i]{read}, a[i+1]{read}, a[i+2]{write}
48      b[10]{read}, b[10]{write}
49      c[99 - i]{read}, c[i]{write}
50      d[i + 1]{read}, d[i]{write}
51
52   2) Inside each of the group, we verify several conditions:
53      a) all the references must differ in indices only, and the indices
54	 must all have the same step
55      b) the references must dominate loop latch (and thus, they must be
56	 ordered by dominance relation).
57      c) the distance of the indices must be a small multiple of the step
58      We are then able to compute the difference of the references (# of
59      iterations before they point to the same place as the first of them).
60      Also, in case there are writes in the loop, we split the groups into
61      chains whose head is the write whose values are used by the reads in
62      the same chain.  The chains are then processed independently,
63      making the further transformations simpler.  Also, the shorter chains
64      need the same number of registers, but may require lower unrolling
65      factor in order to get rid of the copies on the loop latch.
66
67      In our example, we get the following chains (the chain for c is invalid).
68
69      a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70      b[10]{read,+0}, b[10]{write,+0}
71      d[i + 1]{read,+0}, d[i]{write,+1}
72
73   3) For each read, we determine the read or write whose value it reuses,
74      together with the distance of this reuse.  I.e. we take the last
75      reference before it with distance 0, or the last of the references
76      with the smallest positive distance to the read.  Then, we remove
77      the references that are not used in any of these chains, discard the
78      empty groups, and propagate all the links so that they point to the
79      single root reference of the chain (adjusting their distance
80      appropriately).  Some extra care needs to be taken for references with
81      step 0.  In our example (the numbers indicate the distance of the
82      reuse),
83
84      a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85      b[10] --> (*) 1, b[10] (*)
86
87   4) The chains are combined together if possible.  If the corresponding
88      elements of two chains are always combined together with the same
89      operator, we remember just the result of this combination, instead
90      of remembering the values separately.  We may need to perform
91      reassociation to enable combining, for example
92
93      e[i] + f[i+1] + e[i+1] + f[i]
94
95      can be reassociated as
96
97      (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99      and we can combine the chains for e and f into one chain.
100
101   5) For each root reference (end of the chain) R, let N be maximum distance
102      of a reference reusing its value.  Variables R0 up to RN are created,
103      together with phi nodes that transfer values from R1 .. RN to
104      R0 .. R(N-1).
105      Initial values are loaded to R0..R(N-1) (in case not all references
106      must necessarily be accessed and they may trap, we may fail here;
107      TODO sometimes, the loads could be guarded by a check for the number
108      of iterations).  Values loaded/stored in roots are also copied to
109      RN.  Other reads are replaced with the appropriate variable Ri.
110      Everything is put to SSA form.
111
112      As a small improvement, if R0 is dead after the root (i.e., all uses of
113      the value with the maximum distance dominate the root), we can avoid
114      creating RN and use R0 instead of it.
115
116      In our example, we get (only the parts concerning a and b are shown):
117      for (i = 0; i < 100; i++)
118	{
119	  f = phi (a[0], s);
120	  s = phi (a[1], f);
121	  x = phi (b[10], x);
122
123	  f = f + s;
124	  a[i+2] = f;
125	  x = x + i;
126	  b[10] = x;
127	}
128
129   6) Factor F for unrolling is determined as the smallest common multiple of
130      (N + 1) for each root reference (N for references for that we avoided
131      creating RN).  If F and the loop is small enough, loop is unrolled F
132      times.  The stores to RN (R0) in the copies of the loop body are
133      periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134      be coalesced and the copies can be eliminated.
135
136      TODO -- copy propagation and other optimizations may change the live
137      ranges of the temporary registers and prevent them from being coalesced;
138      this may increase the register pressure.
139
140      In our case, F = 2 and the (main loop of the) result is
141
142      for (i = 0; i < ...; i += 2)
143        {
144          f = phi (a[0], f);
145          s = phi (a[1], s);
146          x = phi (b[10], x);
147
148          f = f + s;
149          a[i+2] = f;
150          x = x + i;
151          b[10] = x;
152
153          s = s + f;
154          a[i+3] = s;
155          x = x + i;
156          b[10] = x;
157       }
158
159   Apart from predictive commoning on Load-Load and Store-Load chains, we
160   also support Store-Store chains -- stores killed by other store can be
161   eliminated.  Given below example:
162
163     for (i = 0; i < n; i++)
164       {
165	 a[i] = 1;
166	 a[i+2] = 2;
167       }
168
169   It can be replaced with:
170
171     t0 = a[0];
172     t1 = a[1];
173     for (i = 0; i < n; i++)
174       {
175	 a[i] = 1;
176	 t2 = 2;
177	 t0 = t1;
178	 t1 = t2;
179       }
180     a[n] = t0;
181     a[n+1] = t1;
182
183   If the loop runs more than 1 iterations, it can be further simplified into:
184
185     for (i = 0; i < n; i++)
186       {
187	 a[i] = 1;
188       }
189     a[n] = 2;
190     a[n+1] = 2;
191
192   The interesting part is this can be viewed either as general store motion
193   or general dead store elimination in either intra/inter-iterations way.
194
195   With trivial effort, we also support load inside Store-Store chains if the
196   load is dominated by a store statement in the same iteration of loop.  You
197   can see this as a restricted Store-Mixed-Load-Store chain.
198
199   TODO: For now, we don't support store-store chains in multi-exit loops.  We
200   force to not unroll in case of store-store chain even if other chains might
201   ask for unroll.
202
203   Predictive commoning can be generalized for arbitrary computations (not
204   just memory loads), and also nontrivial transfer functions (e.g., replacing
205   i * i with ii_last + 2 * i + 1), to generalize strength reduction.  */
206
207#include "config.h"
208#include "system.h"
209#include "coretypes.h"
210#include "backend.h"
211#include "rtl.h"
212#include "tree.h"
213#include "gimple.h"
214#include "predict.h"
215#include "tree-pass.h"
216#include "ssa.h"
217#include "gimple-pretty-print.h"
218#include "alias.h"
219#include "fold-const.h"
220#include "cfgloop.h"
221#include "tree-eh.h"
222#include "gimplify.h"
223#include "gimple-iterator.h"
224#include "gimplify-me.h"
225#include "tree-ssa-loop-ivopts.h"
226#include "tree-ssa-loop-manip.h"
227#include "tree-ssa-loop-niter.h"
228#include "tree-ssa-loop.h"
229#include "tree-into-ssa.h"
230#include "tree-dfa.h"
231#include "tree-ssa.h"
232#include "tree-data-ref.h"
233#include "tree-scalar-evolution.h"
234#include "tree-affine.h"
235#include "builtins.h"
236#include "opts.h"
237
238/* The maximum number of iterations between the considered memory
239   references.  */
240
241#define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
242
243/* Data references (or phi nodes that carry data reference values across
244   loop iterations).  */
245
246typedef class dref_d
247{
248public:
249  /* The reference itself.  */
250  struct data_reference *ref;
251
252  /* The statement in that the reference appears.  */
253  gimple *stmt;
254
255  /* In case that STMT is a phi node, this field is set to the SSA name
256     defined by it in replace_phis_by_defined_names (in order to avoid
257     pointing to phi node that got reallocated in the meantime).  */
258  tree name_defined_by_phi;
259
260  /* Distance of the reference from the root of the chain (in number of
261     iterations of the loop).  */
262  unsigned distance;
263
264  /* Number of iterations offset from the first reference in the component.  */
265  widest_int offset;
266
267  /* Number of the reference in a component, in dominance ordering.  */
268  unsigned pos;
269
270  /* True if the memory reference is always accessed when the loop is
271     entered.  */
272  unsigned always_accessed : 1;
273} *dref;
274
275
276/* Type of the chain of the references.  */
277
278enum chain_type
279{
280  /* The addresses of the references in the chain are constant.  */
281  CT_INVARIANT,
282
283  /* There are only loads in the chain.  */
284  CT_LOAD,
285
286  /* Root of the chain is store, the rest are loads.  */
287  CT_STORE_LOAD,
288
289  /* There are only stores in the chain.  */
290  CT_STORE_STORE,
291
292  /* A combination of two chains.  */
293  CT_COMBINATION
294};
295
296/* Chains of data references.  */
297
298typedef struct chain
299{
300  chain (chain_type t) : type (t), op (ERROR_MARK), rslt_type (NULL_TREE),
301    ch1 (NULL), ch2 (NULL), length (0), init_seq (NULL), fini_seq (NULL),
302    has_max_use_after (false), all_always_accessed (false), combined (false),
303    inv_store_elimination (false) {}
304
305  /* Type of the chain.  */
306  enum chain_type type;
307
308  /* For combination chains, the operator and the two chains that are
309     combined, and the type of the result.  */
310  enum tree_code op;
311  tree rslt_type;
312  struct chain *ch1, *ch2;
313
314  /* The references in the chain.  */
315  auto_vec<dref> refs;
316
317  /* The maximum distance of the reference in the chain from the root.  */
318  unsigned length;
319
320  /* The variables used to copy the value throughout iterations.  */
321  auto_vec<tree> vars;
322
323  /* Initializers for the variables.  */
324  auto_vec<tree> inits;
325
326  /* Finalizers for the eliminated stores.  */
327  auto_vec<tree> finis;
328
329  /* gimple stmts intializing the initial variables of the chain.  */
330  gimple_seq init_seq;
331
332  /* gimple stmts finalizing the eliminated stores of the chain.  */
333  gimple_seq fini_seq;
334
335  /* True if there is a use of a variable with the maximal distance
336     that comes after the root in the loop.  */
337  unsigned has_max_use_after : 1;
338
339  /* True if all the memory references in the chain are always accessed.  */
340  unsigned all_always_accessed : 1;
341
342  /* True if this chain was combined together with some other chain.  */
343  unsigned combined : 1;
344
345  /* True if this is store elimination chain and eliminated stores store
346     loop invariant value into memory.  */
347  unsigned inv_store_elimination : 1;
348} *chain_p;
349
350
351/* Describes the knowledge about the step of the memory references in
352   the component.  */
353
354enum ref_step_type
355{
356  /* The step is zero.  */
357  RS_INVARIANT,
358
359  /* The step is nonzero.  */
360  RS_NONZERO,
361
362  /* The step may or may not be nonzero.  */
363  RS_ANY
364};
365
366/* Components of the data dependence graph.  */
367
368struct component
369{
370  component (bool es) : comp_step (RS_ANY), eliminate_store_p (es),
371    next (NULL) {}
372
373  /* The references in the component.  */
374  auto_vec<dref> refs;
375
376  /* What we know about the step of the references in the component.  */
377  enum ref_step_type comp_step;
378
379  /* True if all references in component are stores and we try to do
380     intra/inter loop iteration dead store elimination.  */
381  bool eliminate_store_p;
382
383  /* Next component in the list.  */
384  struct component *next;
385};
386
387/* A class to encapsulate the global states used for predictive
388   commoning work on top of one given LOOP.  */
389
390class pcom_worker
391{
392public:
393  pcom_worker (loop_p l) : m_loop (l), m_cache (NULL) {}
394
395  ~pcom_worker ()
396  {
397    free_data_refs (m_datarefs);
398    free_dependence_relations (m_dependences);
399    free_affine_expand_cache (&m_cache);
400    release_chains ();
401  }
402
403  pcom_worker (const pcom_worker &) = delete;
404  pcom_worker &operator= (const pcom_worker &) = delete;
405
406  /* Performs predictive commoning.  */
407  unsigned tree_predictive_commoning_loop (bool allow_unroll_p);
408
409  /* Perform the predictive commoning optimization for chains, make this
410     public for being called in callback execute_pred_commoning_cbck.  */
411  void execute_pred_commoning (bitmap tmp_vars);
412
413private:
414  /* The pointer to the given loop.  */
415  loop_p m_loop;
416
417  /* All data references.  */
418  auto_vec<data_reference_p, 10> m_datarefs;
419
420  /* All data dependences.  */
421  auto_vec<ddr_p, 10> m_dependences;
422
423  /* All chains.  */
424  auto_vec<chain_p> m_chains;
425
426  /* Bitmap of ssa names defined by looparound phi nodes covered by chains.  */
427  auto_bitmap m_looparound_phis;
428
429  typedef hash_map<tree, name_expansion *> tree_expand_map_t;
430  /* Cache used by tree_to_aff_combination_expand.  */
431  tree_expand_map_t *m_cache;
432
433  /* Splits dependence graph to components.  */
434  struct component *split_data_refs_to_components ();
435
436  /* Check the conditions on references inside each of components COMPS,
437     and remove the unsuitable components from the list.  */
438  struct component *filter_suitable_components (struct component *comps);
439
440  /* Find roots of the values and determine distances in components COMPS,
441     and separates the references to chains.  */
442  void determine_roots (struct component *comps);
443
444  /* Prepare initializers for chains, and free chains that cannot
445     be used because the initializers might trap.  */
446  void prepare_initializers ();
447
448  /* Generates finalizer memory reference for chains.  Returns true if
449     finalizer code generation for chains breaks loop closed ssa form.  */
450  bool prepare_finalizers ();
451
452  /* Try to combine the chains.  */
453  void try_combine_chains ();
454
455  /* Frees CHAINS.  */
456  void release_chains ();
457
458  /* Frees a chain CHAIN.  */
459  void release_chain (chain_p chain);
460
461  /* Prepare initializers for CHAIN.  Returns false if this is impossible
462     because one of these initializers may trap, true otherwise.  */
463  bool prepare_initializers_chain (chain_p chain);
464
465  /* Generates finalizer memory references for CHAIN.  Returns true
466     if finalizer code for CHAIN can be generated, otherwise false.  */
467  bool prepare_finalizers_chain (chain_p chain);
468
469  /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET.  */
470  void aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset);
471
472  /* Determines number of iterations of the innermost enclosing loop before
473     B refers to exactly the same location as A and stores it to OFF.  */
474  bool determine_offset (struct data_reference *a, struct data_reference *b,
475			 poly_widest_int *off);
476
477  /* Returns true if the component COMP satisfies the conditions
478     described in 2) at the beginning of this file.  */
479  bool suitable_component_p (struct component *comp);
480
481  /* Returns true if REF is a valid initializer for ROOT with given
482     DISTANCE (in iterations of the innermost enclosing loop).  */
483  bool valid_initializer_p (struct data_reference *ref, unsigned distance,
484			    struct data_reference *root);
485
486  /* Finds looparound phi node of loop that copies the value of REF.  */
487  gphi *find_looparound_phi (dref ref, dref root);
488
489  /* Find roots of the values and determine distances in the component
490     COMP.  The references are redistributed into chains.  */
491  void determine_roots_comp (struct component *comp);
492
493  /* For references in CHAIN that are copied around the loop, add the
494     results of such copies to the chain.  */
495  void add_looparound_copies (chain_p chain);
496
497  /* Returns the single statement in that NAME is used, excepting
498     the looparound phi nodes contained in one of the chains.  */
499  gimple *single_nonlooparound_use (tree name);
500
501  /* Remove statement STMT, as well as the chain of assignments in that
502     it is used.  */
503  void remove_stmt (gimple *stmt);
504
505  /* Perform the predictive commoning optimization for a chain CHAIN.  */
506  void execute_pred_commoning_chain (chain_p chain, bitmap tmp_vars);
507
508  /* Returns the modify statement that uses NAME.  */
509  gimple *find_use_stmt (tree *name);
510
511  /* If the operation used in STMT is associative and commutative, go
512     through the tree of the same operations and returns its root.  */
513  gimple *find_associative_operation_root (gimple *stmt, unsigned *distance);
514
515  /* Returns the common statement in that NAME1 and NAME2 have a use.  */
516  gimple *find_common_use_stmt (tree *name1, tree *name2);
517
518  /* Checks whether R1 and R2 are combined together using CODE, with the
519     result in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order
520     R2 CODE R1 if it is true.  */
521  bool combinable_refs_p (dref r1, dref r2, enum tree_code *code, bool *swap,
522			  tree *rslt_type);
523
524  /* Reassociates the expression in that NAME1 and NAME2 are used so that
525     they are combined in a single statement, and returns this statement.  */
526  gimple *reassociate_to_the_same_stmt (tree name1, tree name2);
527
528  /* Returns the statement that combines references R1 and R2.  */
529  gimple *stmt_combining_refs (dref r1, dref r2);
530
531  /* Tries to combine chains CH1 and CH2 together.  */
532  chain_p combine_chains (chain_p ch1, chain_p ch2);
533};
534
535/* Dumps data reference REF to FILE.  */
536
537extern void dump_dref (FILE *, dref);
538void
539dump_dref (FILE *file, dref ref)
540{
541  if (ref->ref)
542    {
543      fprintf (file, "    ");
544      print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
545      fprintf (file, " (id %u%s)\n", ref->pos,
546	       DR_IS_READ (ref->ref) ? "" : ", write");
547
548      fprintf (file, "      offset ");
549      print_decs (ref->offset, file);
550      fprintf (file, "\n");
551
552      fprintf (file, "      distance %u\n", ref->distance);
553    }
554  else
555    {
556      if (gimple_code (ref->stmt) == GIMPLE_PHI)
557	fprintf (file, "    looparound ref\n");
558      else
559	fprintf (file, "    combination ref\n");
560      fprintf (file, "      in statement ");
561      print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
562      fprintf (file, "\n");
563      fprintf (file, "      distance %u\n", ref->distance);
564    }
565
566}
567
568/* Dumps CHAIN to FILE.  */
569
570extern void dump_chain (FILE *, chain_p);
571void
572dump_chain (FILE *file, chain_p chain)
573{
574  dref a;
575  const char *chain_type;
576  unsigned i;
577  tree var;
578
579  switch (chain->type)
580    {
581    case CT_INVARIANT:
582      chain_type = "Load motion";
583      break;
584
585    case CT_LOAD:
586      chain_type = "Loads-only";
587      break;
588
589    case CT_STORE_LOAD:
590      chain_type = "Store-loads";
591      break;
592
593    case CT_STORE_STORE:
594      chain_type = "Store-stores";
595      break;
596
597    case CT_COMBINATION:
598      chain_type = "Combination";
599      break;
600
601    default:
602      gcc_unreachable ();
603    }
604
605  fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
606	   chain->combined ? " (combined)" : "");
607  if (chain->type != CT_INVARIANT)
608    fprintf (file, "  max distance %u%s\n", chain->length,
609	     chain->has_max_use_after ? "" : ", may reuse first");
610
611  if (chain->type == CT_COMBINATION)
612    {
613      fprintf (file, "  equal to %p %s %p in type ",
614	       (void *) chain->ch1, op_symbol_code (chain->op),
615	       (void *) chain->ch2);
616      print_generic_expr (file, chain->rslt_type, TDF_SLIM);
617      fprintf (file, "\n");
618    }
619
620  if (chain->vars.exists ())
621    {
622      fprintf (file, "  vars");
623      FOR_EACH_VEC_ELT (chain->vars, i, var)
624	{
625	  fprintf (file, " ");
626	  print_generic_expr (file, var, TDF_SLIM);
627	}
628      fprintf (file, "\n");
629    }
630
631  if (chain->inits.exists ())
632    {
633      fprintf (file, "  inits");
634      FOR_EACH_VEC_ELT (chain->inits, i, var)
635	{
636	  fprintf (file, " ");
637	  print_generic_expr (file, var, TDF_SLIM);
638	}
639      fprintf (file, "\n");
640    }
641
642  fprintf (file, "  references:\n");
643  FOR_EACH_VEC_ELT (chain->refs, i, a)
644    dump_dref (file, a);
645
646  fprintf (file, "\n");
647}
648
649/* Dumps CHAINS to FILE.  */
650
651void
652dump_chains (FILE *file, const vec<chain_p> &chains)
653{
654  chain_p chain;
655  unsigned i;
656
657  FOR_EACH_VEC_ELT (chains, i, chain)
658    dump_chain (file, chain);
659}
660
661/* Dumps COMP to FILE.  */
662
663extern void dump_component (FILE *, struct component *);
664void
665dump_component (FILE *file, struct component *comp)
666{
667  dref a;
668  unsigned i;
669
670  fprintf (file, "Component%s:\n",
671	   comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
672  FOR_EACH_VEC_ELT (comp->refs, i, a)
673    dump_dref (file, a);
674  fprintf (file, "\n");
675}
676
677/* Dumps COMPS to FILE.  */
678
679extern void dump_components (FILE *, struct component *);
680void
681dump_components (FILE *file, struct component *comps)
682{
683  struct component *comp;
684
685  for (comp = comps; comp; comp = comp->next)
686    dump_component (file, comp);
687}
688
689/* Frees a chain CHAIN.  */
690
691void
692pcom_worker::release_chain (chain_p chain)
693{
694  dref ref;
695  unsigned i;
696
697  if (chain == NULL)
698    return;
699
700  FOR_EACH_VEC_ELT (chain->refs, i, ref)
701    free (ref);
702
703  if (chain->init_seq)
704    gimple_seq_discard (chain->init_seq);
705
706  if (chain->fini_seq)
707    gimple_seq_discard (chain->fini_seq);
708
709  delete chain;
710}
711
712/* Frees CHAINS.  */
713
714void
715pcom_worker::release_chains ()
716{
717  unsigned i;
718  chain_p chain;
719
720  FOR_EACH_VEC_ELT (m_chains, i, chain)
721    release_chain (chain);
722}
723
724/* Frees list of components COMPS.  */
725
726static void
727release_components (struct component *comps)
728{
729  struct component *act, *next;
730
731  for (act = comps; act; act = next)
732    {
733      next = act->next;
734      delete act;
735    }
736}
737
738/* Finds a root of tree given by FATHERS containing A, and performs path
739   shortening.  */
740
741static unsigned
742component_of (vec<unsigned> &fathers, unsigned a)
743{
744  unsigned root, n;
745
746  for (root = a; root != fathers[root]; root = fathers[root])
747    continue;
748
749  for (; a != root; a = n)
750    {
751      n = fathers[a];
752      fathers[a] = root;
753    }
754
755  return root;
756}
757
758/* Join operation for DFU.  FATHERS gives the tree, SIZES are sizes of the
759   components, A and B are components to merge.  */
760
761static void
762merge_comps (vec<unsigned> &fathers, vec<unsigned> &sizes,
763	     unsigned a, unsigned b)
764{
765  unsigned ca = component_of (fathers, a);
766  unsigned cb = component_of (fathers, b);
767
768  if (ca == cb)
769    return;
770
771  if (sizes[ca] < sizes[cb])
772    {
773      sizes[cb] += sizes[ca];
774      fathers[ca] = cb;
775    }
776  else
777    {
778      sizes[ca] += sizes[cb];
779      fathers[cb] = ca;
780    }
781}
782
783/* Returns true if A is a reference that is suitable for predictive commoning
784   in the innermost loop that contains it.  REF_STEP is set according to the
785   step of the reference A.  */
786
787static bool
788suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
789{
790  tree ref = DR_REF (a), step = DR_STEP (a);
791
792  if (!step
793      || TREE_THIS_VOLATILE (ref)
794      || !is_gimple_reg_type (TREE_TYPE (ref))
795      || tree_could_throw_p (ref))
796    return false;
797
798  if (integer_zerop (step))
799    *ref_step = RS_INVARIANT;
800  else if (integer_nonzerop (step))
801    *ref_step = RS_NONZERO;
802  else
803    *ref_step = RS_ANY;
804
805  return true;
806}
807
808/* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET.  */
809
810void
811pcom_worker::aff_combination_dr_offset (struct data_reference *dr,
812					aff_tree *offset)
813{
814  tree type = TREE_TYPE (DR_OFFSET (dr));
815  aff_tree delta;
816
817  tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset, &m_cache);
818  aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
819  aff_combination_add (offset, &delta);
820}
821
822/* Determines number of iterations of the innermost enclosing loop before B
823   refers to exactly the same location as A and stores it to OFF.  If A and
824   B do not have the same step, they never meet, or anything else fails,
825   returns false, otherwise returns true.  Both A and B are assumed to
826   satisfy suitable_reference_p.  */
827
828bool
829pcom_worker::determine_offset (struct data_reference *a,
830			       struct data_reference *b, poly_widest_int *off)
831{
832  aff_tree diff, baseb, step;
833  tree typea, typeb;
834
835  /* Check that both the references access the location in the same type.  */
836  typea = TREE_TYPE (DR_REF (a));
837  typeb = TREE_TYPE (DR_REF (b));
838  if (!useless_type_conversion_p (typeb, typea))
839    return false;
840
841  /* Check whether the base address and the step of both references is the
842     same.  */
843  if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
844      || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
845    return false;
846
847  if (integer_zerop (DR_STEP (a)))
848    {
849      /* If the references have loop invariant address, check that they access
850	 exactly the same location.  */
851      *off = 0;
852      return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
853	      && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
854    }
855
856  /* Compare the offsets of the addresses, and check whether the difference
857     is a multiple of step.  */
858  aff_combination_dr_offset (a, &diff);
859  aff_combination_dr_offset (b, &baseb);
860  aff_combination_scale (&baseb, -1);
861  aff_combination_add (&diff, &baseb);
862
863  tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
864				  &step, &m_cache);
865  return aff_combination_constant_multiple_p (&diff, &step, off);
866}
867
868/* Returns the last basic block in LOOP for that we are sure that
869   it is executed whenever the loop is entered.  */
870
871static basic_block
872last_always_executed_block (class loop *loop)
873{
874  unsigned i;
875  auto_vec<edge> exits = get_loop_exit_edges (loop);
876  edge ex;
877  basic_block last = loop->latch;
878
879  FOR_EACH_VEC_ELT (exits, i, ex)
880    last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
881
882  return last;
883}
884
885/* Splits dependence graph on DATAREFS described by DEPENDENCES to
886   components.  */
887
888struct component *
889pcom_worker::split_data_refs_to_components ()
890{
891  unsigned i, n = m_datarefs.length ();
892  unsigned ca, ia, ib, bad;
893  struct data_reference *dr, *dra, *drb;
894  struct data_dependence_relation *ddr;
895  struct component *comp_list = NULL, *comp;
896  dref dataref;
897  /* Don't do store elimination if loop has multiple exit edges.  */
898  bool eliminate_store_p = single_exit (m_loop) != NULL;
899  basic_block last_always_executed = last_always_executed_block (m_loop);
900  auto_bitmap no_store_store_comps;
901  auto_vec<unsigned> comp_father (n + 1);
902  auto_vec<unsigned> comp_size (n + 1);
903  comp_father.quick_grow (n + 1);
904  comp_size.quick_grow (n + 1);
905
906  FOR_EACH_VEC_ELT (m_datarefs, i, dr)
907    {
908      if (!DR_REF (dr))
909	  /* A fake reference for call or asm_expr that may clobber memory;
910	     just fail.  */
911	  return NULL;
912      /* predcom pass isn't prepared to handle calls with data references.  */
913      if (is_gimple_call (DR_STMT (dr)))
914	return NULL;
915      dr->aux = (void *) (size_t) i;
916      comp_father[i] = i;
917      comp_size[i] = 1;
918    }
919
920  /* A component reserved for the "bad" data references.  */
921  comp_father[n] = n;
922  comp_size[n] = 1;
923
924  FOR_EACH_VEC_ELT (m_datarefs, i, dr)
925    {
926      enum ref_step_type dummy;
927
928      if (!suitable_reference_p (dr, &dummy))
929	{
930	  ia = (unsigned) (size_t) dr->aux;
931	  merge_comps (comp_father, comp_size, n, ia);
932	}
933    }
934
935  FOR_EACH_VEC_ELT (m_dependences, i, ddr)
936    {
937      poly_widest_int dummy_off;
938
939      if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
940	continue;
941
942      dra = DDR_A (ddr);
943      drb = DDR_B (ddr);
944
945      /* Don't do store elimination if there is any unknown dependence for
946	 any store data reference.  */
947      if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
948	  && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
949	      || DDR_NUM_DIST_VECTS (ddr) == 0))
950	eliminate_store_p = false;
951
952      ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
953      ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
954      if (ia == ib)
955	continue;
956
957      bad = component_of (comp_father, n);
958
959      /* If both A and B are reads, we may ignore unsuitable dependences.  */
960      if (DR_IS_READ (dra) && DR_IS_READ (drb))
961	{
962	  if (ia == bad || ib == bad
963	      || !determine_offset (dra, drb, &dummy_off))
964	    continue;
965	}
966      /* If A is read and B write or vice versa and there is unsuitable
967	 dependence, instead of merging both components into a component
968	 that will certainly not pass suitable_component_p, just put the
969	 read into bad component, perhaps at least the write together with
970	 all the other data refs in it's component will be optimizable.  */
971      else if (DR_IS_READ (dra) && ib != bad)
972	{
973	  if (ia == bad)
974	    {
975	      bitmap_set_bit (no_store_store_comps, ib);
976	      continue;
977	    }
978	  else if (!determine_offset (dra, drb, &dummy_off))
979	    {
980	      bitmap_set_bit (no_store_store_comps, ib);
981	      merge_comps (comp_father, comp_size, bad, ia);
982	      continue;
983	    }
984	}
985      else if (DR_IS_READ (drb) && ia != bad)
986	{
987	  if (ib == bad)
988	    {
989	      bitmap_set_bit (no_store_store_comps, ia);
990	      continue;
991	    }
992	  else if (!determine_offset (dra, drb, &dummy_off))
993	    {
994	      bitmap_set_bit (no_store_store_comps, ia);
995	      merge_comps (comp_father, comp_size, bad, ib);
996	      continue;
997	    }
998	}
999      else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
1000	       && ia != bad && ib != bad
1001	       && !determine_offset (dra, drb, &dummy_off))
1002	{
1003	  merge_comps (comp_father, comp_size, bad, ia);
1004	  merge_comps (comp_father, comp_size, bad, ib);
1005	  continue;
1006	}
1007
1008      merge_comps (comp_father, comp_size, ia, ib);
1009    }
1010
1011  if (eliminate_store_p)
1012    {
1013      tree niters = number_of_latch_executions (m_loop);
1014
1015      /* Don't do store elimination if niters info is unknown because stores
1016	 in the last iteration can't be eliminated and we need to recover it
1017	 after loop.  */
1018      eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
1019    }
1020
1021  auto_vec<struct component *> comps;
1022  comps.safe_grow_cleared (n, true);
1023  bad = component_of (comp_father, n);
1024  FOR_EACH_VEC_ELT (m_datarefs, i, dr)
1025    {
1026      ia = (unsigned) (size_t) dr->aux;
1027      ca = component_of (comp_father, ia);
1028      if (ca == bad)
1029	continue;
1030
1031      comp = comps[ca];
1032      if (!comp)
1033	{
1034	  comp = new component (eliminate_store_p);
1035	  comp->refs.reserve_exact (comp_size[ca]);
1036	  comps[ca] = comp;
1037	}
1038
1039      dataref = XCNEW (class dref_d);
1040      dataref->ref = dr;
1041      dataref->stmt = DR_STMT (dr);
1042      dataref->offset = 0;
1043      dataref->distance = 0;
1044
1045      dataref->always_accessed
1046	      = dominated_by_p (CDI_DOMINATORS, last_always_executed,
1047				gimple_bb (dataref->stmt));
1048      dataref->pos = comp->refs.length ();
1049      comp->refs.quick_push (dataref);
1050    }
1051
1052  if (eliminate_store_p)
1053    {
1054      bitmap_iterator bi;
1055      EXECUTE_IF_SET_IN_BITMAP (no_store_store_comps, 0, ia, bi)
1056	{
1057	  ca = component_of (comp_father, ia);
1058	  if (ca != bad)
1059	    comps[ca]->eliminate_store_p = false;
1060	}
1061    }
1062
1063  for (i = 0; i < n; i++)
1064    {
1065      comp = comps[i];
1066      if (comp)
1067	{
1068	  comp->next = comp_list;
1069	  comp_list = comp;
1070	}
1071    }
1072  return comp_list;
1073}
1074
1075/* Returns true if the component COMP satisfies the conditions
1076   described in 2) at the beginning of this file.  */
1077
1078bool
1079pcom_worker::suitable_component_p (struct component *comp)
1080{
1081  unsigned i;
1082  dref a, first;
1083  basic_block ba, bp = m_loop->header;
1084  bool ok, has_write = false;
1085
1086  FOR_EACH_VEC_ELT (comp->refs, i, a)
1087    {
1088      ba = gimple_bb (a->stmt);
1089
1090      if (!just_once_each_iteration_p (m_loop, ba))
1091	return false;
1092
1093      gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
1094      bp = ba;
1095
1096      if (DR_IS_WRITE (a->ref))
1097	has_write = true;
1098    }
1099
1100  first = comp->refs[0];
1101  ok = suitable_reference_p (first->ref, &comp->comp_step);
1102  gcc_assert (ok);
1103  first->offset = 0;
1104
1105  for (i = 1; comp->refs.iterate (i, &a); i++)
1106    {
1107      /* Polynomial offsets are no use, since we need to know the
1108	 gap between iteration numbers at compile time.  */
1109      poly_widest_int offset;
1110      if (!determine_offset (first->ref, a->ref, &offset)
1111	  || !offset.is_constant (&a->offset))
1112	return false;
1113
1114      enum ref_step_type a_step;
1115      gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
1116			   && a_step == comp->comp_step);
1117    }
1118
1119  /* If there is a write inside the component, we must know whether the
1120     step is nonzero or not -- we would not otherwise be able to recognize
1121     whether the value accessed by reads comes from the OFFSET-th iteration
1122     or the previous one.  */
1123  if (has_write && comp->comp_step == RS_ANY)
1124    return false;
1125
1126  return true;
1127}
1128
1129/* Check the conditions on references inside each of components COMPS,
1130   and remove the unsuitable components from the list.  The new list
1131   of components is returned.  The conditions are described in 2) at
1132   the beginning of this file.  */
1133
1134struct component *
1135pcom_worker::filter_suitable_components (struct component *comps)
1136{
1137  struct component **comp, *act;
1138
1139  for (comp = &comps; *comp; )
1140    {
1141      act = *comp;
1142      if (suitable_component_p (act))
1143	comp = &act->next;
1144      else
1145	{
1146	  dref ref;
1147	  unsigned i;
1148
1149	  *comp = act->next;
1150	  FOR_EACH_VEC_ELT (act->refs, i, ref)
1151	    free (ref);
1152	  delete act;
1153	}
1154    }
1155
1156  return comps;
1157}
1158
1159/* Compares two drefs A and B by their offset and position.  Callback for
1160   qsort.  */
1161
1162static int
1163order_drefs (const void *a, const void *b)
1164{
1165  const dref *const da = (const dref *) a;
1166  const dref *const db = (const dref *) b;
1167  int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
1168
1169  if (offcmp != 0)
1170    return offcmp;
1171
1172  return (*da)->pos - (*db)->pos;
1173}
1174
1175/* Compares two drefs A and B by their position.  Callback for qsort.  */
1176
1177static int
1178order_drefs_by_pos (const void *a, const void *b)
1179{
1180  const dref *const da = (const dref *) a;
1181  const dref *const db = (const dref *) b;
1182
1183  return (*da)->pos - (*db)->pos;
1184}
1185
1186/* Returns root of the CHAIN.  */
1187
1188static inline dref
1189get_chain_root (chain_p chain)
1190{
1191  return chain->refs[0];
1192}
1193
1194/* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
1195   exist.  */
1196
1197static inline dref
1198get_chain_last_write_at (chain_p chain, unsigned distance)
1199{
1200  for (unsigned i = chain->refs.length (); i > 0; i--)
1201    if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1202	&& distance == chain->refs[i - 1]->distance)
1203      return chain->refs[i - 1];
1204
1205  return NULL;
1206}
1207
1208/* Given CHAIN, returns the last write ref with the same distance before load
1209   at index LOAD_IDX, or NULL if it doesn't exist.  */
1210
1211static inline dref
1212get_chain_last_write_before_load (chain_p chain, unsigned load_idx)
1213{
1214  gcc_assert (load_idx < chain->refs.length ());
1215
1216  unsigned distance = chain->refs[load_idx]->distance;
1217
1218  for (unsigned i = load_idx; i > 0; i--)
1219    if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1220	&& distance == chain->refs[i - 1]->distance)
1221      return chain->refs[i - 1];
1222
1223  return NULL;
1224}
1225
1226/* Adds REF to the chain CHAIN.  */
1227
1228static void
1229add_ref_to_chain (chain_p chain, dref ref)
1230{
1231  dref root = get_chain_root (chain);
1232
1233  gcc_assert (wi::les_p (root->offset, ref->offset));
1234  widest_int dist = ref->offset - root->offset;
1235  gcc_assert (wi::fits_uhwi_p (dist));
1236
1237  chain->refs.safe_push (ref);
1238
1239  ref->distance = dist.to_uhwi ();
1240
1241  if (ref->distance >= chain->length)
1242    {
1243      chain->length = ref->distance;
1244      chain->has_max_use_after = false;
1245    }
1246
1247  /* Promote this chain to CT_STORE_STORE if it has multiple stores.  */
1248  if (DR_IS_WRITE (ref->ref))
1249    chain->type = CT_STORE_STORE;
1250
1251  /* Don't set the flag for store-store chain since there is no use.  */
1252  if (chain->type != CT_STORE_STORE
1253      && ref->distance == chain->length
1254      && ref->pos > root->pos)
1255    chain->has_max_use_after = true;
1256
1257  chain->all_always_accessed &= ref->always_accessed;
1258}
1259
1260/* Returns the chain for invariant component COMP.  */
1261
1262static chain_p
1263make_invariant_chain (struct component *comp)
1264{
1265  chain_p chain = new struct chain (CT_INVARIANT);
1266  unsigned i;
1267  dref ref;
1268
1269  chain->all_always_accessed = true;
1270
1271  FOR_EACH_VEC_ELT (comp->refs, i, ref)
1272    {
1273      chain->refs.safe_push (ref);
1274      chain->all_always_accessed &= ref->always_accessed;
1275    }
1276
1277  chain->inits = vNULL;
1278  chain->finis = vNULL;
1279
1280  return chain;
1281}
1282
1283/* Make a new chain of type TYPE rooted at REF.  */
1284
1285static chain_p
1286make_rooted_chain (dref ref, enum chain_type type)
1287{
1288  chain_p chain = new struct chain (type);
1289
1290  chain->refs.safe_push (ref);
1291  chain->all_always_accessed = ref->always_accessed;
1292  ref->distance = 0;
1293
1294  chain->inits = vNULL;
1295  chain->finis = vNULL;
1296
1297  return chain;
1298}
1299
1300/* Returns true if CHAIN is not trivial.  */
1301
1302static bool
1303nontrivial_chain_p (chain_p chain)
1304{
1305  return chain != NULL && chain->refs.length () > 1;
1306}
1307
1308/* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1309   is no such name.  */
1310
1311static tree
1312name_for_ref (dref ref)
1313{
1314  tree name;
1315
1316  if (is_gimple_assign (ref->stmt))
1317    {
1318      if (!ref->ref || DR_IS_READ (ref->ref))
1319	name = gimple_assign_lhs (ref->stmt);
1320      else
1321	name = gimple_assign_rhs1 (ref->stmt);
1322    }
1323  else
1324    name = PHI_RESULT (ref->stmt);
1325
1326  return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1327}
1328
1329/* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1330   iterations of the innermost enclosing loop).  */
1331
1332bool
1333pcom_worker::valid_initializer_p (struct data_reference *ref, unsigned distance,
1334				  struct data_reference *root)
1335{
1336  aff_tree diff, base, step;
1337  poly_widest_int off;
1338
1339  /* Both REF and ROOT must be accessing the same object.  */
1340  if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1341    return false;
1342
1343  /* The initializer is defined outside of loop, hence its address must be
1344     invariant inside the loop.  */
1345  gcc_assert (integer_zerop (DR_STEP (ref)));
1346
1347  /* If the address of the reference is invariant, initializer must access
1348     exactly the same location.  */
1349  if (integer_zerop (DR_STEP (root)))
1350    return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1351	    && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1352
1353  /* Verify that this index of REF is equal to the root's index at
1354     -DISTANCE-th iteration.  */
1355  aff_combination_dr_offset (root, &diff);
1356  aff_combination_dr_offset (ref, &base);
1357  aff_combination_scale (&base, -1);
1358  aff_combination_add (&diff, &base);
1359
1360  tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1361				  &step, &m_cache);
1362  if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1363    return false;
1364
1365  if (maybe_ne (off, distance))
1366    return false;
1367
1368  return true;
1369}
1370
1371/* Finds looparound phi node of loop that copies the value of REF, and if its
1372   initial value is correct (equal to initial value of REF shifted by one
1373   iteration), returns the phi node.  Otherwise, NULL_TREE is returned.  ROOT
1374   is the root of the current chain.  */
1375
1376gphi *
1377pcom_worker::find_looparound_phi (dref ref, dref root)
1378{
1379  tree name, init, init_ref;
1380  gimple *init_stmt;
1381  edge latch = loop_latch_edge (m_loop);
1382  struct data_reference init_dr;
1383  gphi_iterator psi;
1384
1385  if (is_gimple_assign (ref->stmt))
1386    {
1387      if (DR_IS_READ (ref->ref))
1388	name = gimple_assign_lhs (ref->stmt);
1389      else
1390	name = gimple_assign_rhs1 (ref->stmt);
1391    }
1392  else
1393    name = PHI_RESULT (ref->stmt);
1394  if (!name)
1395    return NULL;
1396
1397  tree entry_vuse = NULL_TREE;
1398  gphi *phi = NULL;
1399  for (psi = gsi_start_phis (m_loop->header); !gsi_end_p (psi); gsi_next (&psi))
1400    {
1401      gphi *p = psi.phi ();
1402      if (PHI_ARG_DEF_FROM_EDGE (p, latch) == name)
1403	phi = p;
1404      else if (virtual_operand_p (gimple_phi_result (p)))
1405	entry_vuse = PHI_ARG_DEF_FROM_EDGE (p, loop_preheader_edge (m_loop));
1406      if (phi && entry_vuse)
1407	break;
1408    }
1409  if (!phi || !entry_vuse)
1410    return NULL;
1411
1412  init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
1413  if (TREE_CODE (init) != SSA_NAME)
1414    return NULL;
1415  init_stmt = SSA_NAME_DEF_STMT (init);
1416  if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1417    return NULL;
1418  gcc_assert (gimple_assign_lhs (init_stmt) == init);
1419
1420  init_ref = gimple_assign_rhs1 (init_stmt);
1421  if (!REFERENCE_CLASS_P (init_ref)
1422      && !DECL_P (init_ref))
1423    return NULL;
1424
1425  /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1426     loop enclosing PHI).  */
1427  memset (&init_dr, 0, sizeof (struct data_reference));
1428  DR_REF (&init_dr) = init_ref;
1429  DR_STMT (&init_dr) = phi;
1430  if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, m_loop,
1431			     init_stmt))
1432    return NULL;
1433
1434  if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1435    return NULL;
1436
1437  /* Make sure nothing clobbers the location we re-use the initial value
1438     from.  */
1439  if (entry_vuse != gimple_vuse (init_stmt))
1440    {
1441      ao_ref ref;
1442      ao_ref_init (&ref, init_ref);
1443      unsigned limit = param_sccvn_max_alias_queries_per_access;
1444      tree vdef = entry_vuse;
1445      do
1446	{
1447	  gimple *def = SSA_NAME_DEF_STMT (vdef);
1448	  if (limit-- == 0 || gimple_code (def) == GIMPLE_PHI)
1449	    return NULL;
1450	  if (stmt_may_clobber_ref_p_1 (def, &ref))
1451	    /* When the stmt is an assign to init_ref we could in theory
1452	       use its RHS for the initial value of the looparound PHI
1453	       we replace in prepare_initializers_chain, but we have
1454	       no convenient place to store this info at the moment.  */
1455	    return NULL;
1456	  vdef = gimple_vuse (def);
1457	}
1458      while (vdef != gimple_vuse (init_stmt));
1459    }
1460
1461  return phi;
1462}
1463
1464/* Adds a reference for the looparound copy of REF in PHI to CHAIN.  */
1465
1466static void
1467insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1468{
1469  dref nw = XCNEW (class dref_d), aref;
1470  unsigned i;
1471
1472  nw->stmt = phi;
1473  nw->distance = ref->distance + 1;
1474  nw->always_accessed = 1;
1475
1476  FOR_EACH_VEC_ELT (chain->refs, i, aref)
1477    if (aref->distance >= nw->distance)
1478      break;
1479  chain->refs.safe_insert (i, nw);
1480
1481  if (nw->distance > chain->length)
1482    {
1483      chain->length = nw->distance;
1484      chain->has_max_use_after = false;
1485    }
1486}
1487
1488/* For references in CHAIN that are copied around the loop (created previously
1489   by PRE, or by user), add the results of such copies to the chain.  This
1490   enables us to remove the copies by unrolling, and may need less registers
1491   (also, it may allow us to combine chains together).  */
1492
1493void
1494pcom_worker::add_looparound_copies (chain_p chain)
1495{
1496  unsigned i;
1497  dref ref, root = get_chain_root (chain);
1498  gphi *phi;
1499
1500  if (chain->type == CT_STORE_STORE)
1501    return;
1502
1503  FOR_EACH_VEC_ELT (chain->refs, i, ref)
1504    {
1505      phi = find_looparound_phi (ref, root);
1506      if (!phi)
1507	continue;
1508
1509      bitmap_set_bit (m_looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1510      insert_looparound_copy (chain, ref, phi);
1511    }
1512}
1513
1514/* Find roots of the values and determine distances in the component COMP.
1515   The references are redistributed into chains.  */
1516
1517void
1518pcom_worker::determine_roots_comp (struct component *comp)
1519{
1520  unsigned i;
1521  dref a;
1522  chain_p chain = NULL;
1523  widest_int last_ofs = 0;
1524  enum chain_type type;
1525
1526  /* Invariants are handled specially.  */
1527  if (comp->comp_step == RS_INVARIANT)
1528    {
1529      chain = make_invariant_chain (comp);
1530      m_chains.safe_push (chain);
1531      return;
1532    }
1533
1534  /* Trivial component.  */
1535  if (comp->refs.length () <= 1)
1536    {
1537      if (comp->refs.length () == 1)
1538	{
1539	  free (comp->refs[0]);
1540	  comp->refs.truncate (0);
1541	}
1542      return;
1543    }
1544
1545  comp->refs.qsort (order_drefs);
1546
1547  /* For Store-Store chain, we only support load if it is dominated by a
1548     store statement in the same iteration of loop.  */
1549  if (comp->eliminate_store_p)
1550    for (a = NULL, i = 0; i < comp->refs.length (); i++)
1551      {
1552	if (DR_IS_WRITE (comp->refs[i]->ref))
1553	  a = comp->refs[i];
1554	else if (a == NULL || a->offset != comp->refs[i]->offset)
1555	  {
1556	    /* If there is load that is not dominated by a store in the
1557	       same iteration of loop, clear the flag so no Store-Store
1558	       chain is generated for this component.  */
1559	    comp->eliminate_store_p = false;
1560	    break;
1561	  }
1562      }
1563
1564  /* Determine roots and create chains for components.  */
1565  FOR_EACH_VEC_ELT (comp->refs, i, a)
1566    {
1567      if (!chain
1568	  || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref))
1569	  || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
1570	  || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1571	{
1572	  if (nontrivial_chain_p (chain))
1573	    {
1574	      add_looparound_copies (chain);
1575	      m_chains.safe_push (chain);
1576	    }
1577	  else
1578	    release_chain (chain);
1579
1580	  /* Determine type of the chain.  If the root reference is a load,
1581	     this can only be a CT_LOAD chain; other chains are intialized
1582	     to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1583	     new reference is added.  */
1584	  type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD;
1585	  chain = make_rooted_chain (a, type);
1586	  last_ofs = a->offset;
1587	  continue;
1588	}
1589
1590      add_ref_to_chain (chain, a);
1591    }
1592
1593  if (nontrivial_chain_p (chain))
1594    {
1595      add_looparound_copies (chain);
1596      m_chains.safe_push (chain);
1597    }
1598  else
1599    release_chain (chain);
1600}
1601
1602/* Find roots of the values and determine distances in components COMPS, and
1603   separates the references to chains.  */
1604
1605void
1606pcom_worker::determine_roots (struct component *comps)
1607{
1608  struct component *comp;
1609
1610  for (comp = comps; comp; comp = comp->next)
1611    determine_roots_comp (comp);
1612}
1613
1614/* Replace the reference in statement STMT with temporary variable
1615   NEW_TREE.  If SET is true, NEW_TREE is instead initialized to the value of
1616   the reference in the statement.  IN_LHS is true if the reference
1617   is in the lhs of STMT, false if it is in rhs.  */
1618
1619static void
1620replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1621{
1622  tree val;
1623  gassign *new_stmt;
1624  gimple_stmt_iterator bsi, psi;
1625
1626  if (gimple_code (stmt) == GIMPLE_PHI)
1627    {
1628      gcc_assert (!in_lhs && !set);
1629
1630      val = PHI_RESULT (stmt);
1631      bsi = gsi_after_labels (gimple_bb (stmt));
1632      psi = gsi_for_stmt (stmt);
1633      remove_phi_node (&psi, false);
1634
1635      /* Turn the phi node into GIMPLE_ASSIGN.  */
1636      new_stmt = gimple_build_assign (val, new_tree);
1637      gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1638      return;
1639    }
1640
1641  /* Since the reference is of gimple_reg type, it should only
1642     appear as lhs or rhs of modify statement.  */
1643  gcc_assert (is_gimple_assign (stmt));
1644
1645  bsi = gsi_for_stmt (stmt);
1646
1647  /* If we do not need to initialize NEW_TREE, just replace the use of OLD.  */
1648  if (!set)
1649    {
1650      gcc_assert (!in_lhs);
1651      gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1652      stmt = gsi_stmt (bsi);
1653      update_stmt (stmt);
1654      return;
1655    }
1656
1657  if (in_lhs)
1658    {
1659      /* We have statement
1660
1661	 OLD = VAL
1662
1663	 If OLD is a memory reference, then VAL is gimple_val, and we transform
1664	 this to
1665
1666	 OLD = VAL
1667	 NEW = VAL
1668
1669	 Otherwise, we are replacing a combination chain,
1670	 VAL is the expression that performs the combination, and OLD is an
1671	 SSA name.  In this case, we transform the assignment to
1672
1673	 OLD = VAL
1674	 NEW = OLD
1675
1676	 */
1677
1678      val = gimple_assign_lhs (stmt);
1679      if (TREE_CODE (val) != SSA_NAME)
1680	{
1681	  val = gimple_assign_rhs1 (stmt);
1682	  gcc_assert (gimple_assign_single_p (stmt));
1683	  if (TREE_CLOBBER_P (val))
1684	    val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1685	  else
1686	    gcc_assert (gimple_assign_copy_p (stmt));
1687	}
1688    }
1689  else
1690    {
1691      /* VAL = OLD
1692
1693	 is transformed to
1694
1695	 VAL = OLD
1696	 NEW = VAL  */
1697
1698      val = gimple_assign_lhs (stmt);
1699    }
1700
1701  new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1702  gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1703}
1704
1705/* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1706   of the loop it was analyzed in.  Append init stmts to STMTS.  */
1707
1708static tree
1709ref_at_iteration (data_reference_p dr, int iter,
1710		  gimple_seq *stmts, tree niters = NULL_TREE)
1711{
1712  tree off = DR_OFFSET (dr);
1713  tree coff = DR_INIT (dr);
1714  tree ref = DR_REF (dr);
1715  enum tree_code ref_code = ERROR_MARK;
1716  tree ref_type = NULL_TREE;
1717  tree ref_op1 = NULL_TREE;
1718  tree ref_op2 = NULL_TREE;
1719  tree new_offset;
1720
1721  if (iter != 0)
1722    {
1723      new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1724      if (TREE_CODE (new_offset) == INTEGER_CST)
1725	coff = size_binop (PLUS_EXPR, coff, new_offset);
1726      else
1727	off = size_binop (PLUS_EXPR, off, new_offset);
1728    }
1729
1730  if (niters != NULL_TREE)
1731    {
1732      niters = fold_convert (ssizetype, niters);
1733      new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1734      if (TREE_CODE (niters) == INTEGER_CST)
1735	coff = size_binop (PLUS_EXPR, coff, new_offset);
1736      else
1737	off = size_binop (PLUS_EXPR, off, new_offset);
1738    }
1739
1740  /* While data-ref analysis punts on bit offsets it still handles
1741     bitfield accesses at byte boundaries.  Cope with that.  Note that
1742     if the bitfield object also starts at a byte-boundary we can simply
1743     replicate the COMPONENT_REF, but we have to subtract the component's
1744     byte-offset from the MEM_REF address first.
1745     Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1746     start at offset zero.  */
1747  if (TREE_CODE (ref) == COMPONENT_REF
1748      && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1749    {
1750      unsigned HOST_WIDE_INT boff;
1751      tree field = TREE_OPERAND (ref, 1);
1752      tree offset = component_ref_field_offset (ref);
1753      ref_type = TREE_TYPE (ref);
1754      boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1755      /* This can occur in Ada.  See the comment in get_bit_range.  */
1756      if (boff % BITS_PER_UNIT != 0
1757	  || !tree_fits_uhwi_p (offset))
1758	{
1759	  ref_code = BIT_FIELD_REF;
1760	  ref_op1 = DECL_SIZE (field);
1761	  ref_op2 = bitsize_zero_node;
1762	}
1763      else
1764	{
1765	  boff >>= LOG2_BITS_PER_UNIT;
1766	  boff += tree_to_uhwi (offset);
1767	  coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1768	  ref_code = COMPONENT_REF;
1769	  ref_op1 = field;
1770	  ref_op2 = TREE_OPERAND (ref, 2);
1771	  ref = TREE_OPERAND (ref, 0);
1772	}
1773    }
1774  /* We may not associate the constant offset across the pointer plus
1775     expression because that might form a pointer to before the object
1776     then.  But for some cases we can retain that to allow tree_could_trap_p
1777     to return false - see gcc.dg/tree-ssa/predcom-1.c  */
1778  tree addr, alias_ptr;
1779  if (integer_zerop  (off))
1780    {
1781      alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1782      addr = DR_BASE_ADDRESS (dr);
1783    }
1784  else
1785    {
1786      alias_ptr = build_zero_cst (reference_alias_ptr_type (ref));
1787      off = size_binop (PLUS_EXPR, off, coff);
1788      addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1789    }
1790  addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1791				 is_gimple_mem_ref_addr, NULL_TREE);
1792  tree type = build_aligned_type (TREE_TYPE (ref),
1793				  get_object_alignment (ref));
1794  ref = build2 (MEM_REF, type, addr, alias_ptr);
1795  if (ref_type)
1796    ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1797  return ref;
1798}
1799
1800/* Get the initialization expression for the INDEX-th temporary variable
1801   of CHAIN.  */
1802
1803static tree
1804get_init_expr (chain_p chain, unsigned index)
1805{
1806  if (chain->type == CT_COMBINATION)
1807    {
1808      tree e1 = get_init_expr (chain->ch1, index);
1809      tree e2 = get_init_expr (chain->ch2, index);
1810
1811      return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1812    }
1813  else
1814    return chain->inits[index];
1815}
1816
1817/* Returns a new temporary variable used for the I-th variable carrying
1818   value of REF.  The variable's uid is marked in TMP_VARS.  */
1819
1820static tree
1821predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1822{
1823  tree type = TREE_TYPE (ref);
1824  /* We never access the components of the temporary variable in predictive
1825     commoning.  */
1826  tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1827  bitmap_set_bit (tmp_vars, DECL_UID (var));
1828  return var;
1829}
1830
1831/* Creates the variables for CHAIN, as well as phi nodes for them and
1832   initialization on entry to LOOP.  Uids of the newly created
1833   temporary variables are marked in TMP_VARS.  */
1834
1835static void
1836initialize_root_vars (class loop *loop, chain_p chain, bitmap tmp_vars)
1837{
1838  unsigned i;
1839  unsigned n = chain->length;
1840  dref root = get_chain_root (chain);
1841  bool reuse_first = !chain->has_max_use_after;
1842  tree ref, init, var, next;
1843  gphi *phi;
1844  gimple_seq stmts;
1845  edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1846
1847  /* If N == 0, then all the references are within the single iteration.  And
1848     since this is an nonempty chain, reuse_first cannot be true.  */
1849  gcc_assert (n > 0 || !reuse_first);
1850
1851  chain->vars.create (n + 1);
1852
1853  if (chain->type == CT_COMBINATION)
1854    ref = gimple_assign_lhs (root->stmt);
1855  else
1856    ref = DR_REF (root->ref);
1857
1858  for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1859    {
1860      var = predcom_tmp_var (ref, i, tmp_vars);
1861      chain->vars.quick_push (var);
1862    }
1863  if (reuse_first)
1864    chain->vars.quick_push (chain->vars[0]);
1865
1866  FOR_EACH_VEC_ELT (chain->vars, i, var)
1867    chain->vars[i] = make_ssa_name (var);
1868
1869  for (i = 0; i < n; i++)
1870    {
1871      var = chain->vars[i];
1872      next = chain->vars[i + 1];
1873      init = get_init_expr (chain, i);
1874
1875      init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1876      if (stmts)
1877	gsi_insert_seq_on_edge_immediate (entry, stmts);
1878
1879      phi = create_phi_node (var, loop->header);
1880      add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1881      add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1882    }
1883}
1884
1885/* For inter-iteration store elimination CHAIN in LOOP, returns true if
1886   all stores to be eliminated store loop invariant values into memory.
1887   In this case, we can use these invariant values directly after LOOP.  */
1888
1889static bool
1890is_inv_store_elimination_chain (class loop *loop, chain_p chain)
1891{
1892  if (chain->length == 0 || chain->type != CT_STORE_STORE)
1893    return false;
1894
1895  gcc_assert (!chain->has_max_use_after);
1896
1897  /* If loop iterates for unknown times or fewer times than chain->length,
1898     we still need to setup root variable and propagate it with PHI node.  */
1899  tree niters = number_of_latch_executions (loop);
1900  if (TREE_CODE (niters) != INTEGER_CST
1901      || wi::leu_p (wi::to_wide (niters), chain->length))
1902    return false;
1903
1904  /* Check stores in chain for elimination if they only store loop invariant
1905     values.  */
1906  for (unsigned i = 0; i < chain->length; i++)
1907    {
1908      dref a = get_chain_last_write_at (chain, i);
1909      if (a == NULL)
1910	continue;
1911
1912      gimple *def_stmt, *stmt = a->stmt;
1913      if (!gimple_assign_single_p (stmt))
1914	return false;
1915
1916      tree val = gimple_assign_rhs1 (stmt);
1917      if (TREE_CLOBBER_P (val))
1918	return false;
1919
1920      if (CONSTANT_CLASS_P (val))
1921	continue;
1922
1923      if (TREE_CODE (val) != SSA_NAME)
1924	return false;
1925
1926      def_stmt = SSA_NAME_DEF_STMT (val);
1927      if (gimple_nop_p (def_stmt))
1928	continue;
1929
1930      if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
1931	return false;
1932    }
1933  return true;
1934}
1935
1936/* Creates root variables for store elimination CHAIN in which stores for
1937   elimination only store loop invariant values.  In this case, we neither
1938   need to load root variables before loop nor propagate it with PHI nodes.  */
1939
1940static void
1941initialize_root_vars_store_elim_1 (chain_p chain)
1942{
1943  tree var;
1944  unsigned i, n = chain->length;
1945
1946  chain->vars.create (n);
1947  chain->vars.safe_grow_cleared (n, true);
1948
1949  /* Initialize root value for eliminated stores at each distance.  */
1950  for (i = 0; i < n; i++)
1951    {
1952      dref a = get_chain_last_write_at (chain, i);
1953      if (a == NULL)
1954	continue;
1955
1956      var = gimple_assign_rhs1 (a->stmt);
1957      chain->vars[a->distance] = var;
1958    }
1959
1960  /* We don't propagate values with PHI nodes, so manually propagate value
1961     to bubble positions.  */
1962  var = chain->vars[0];
1963  for (i = 1; i < n; i++)
1964    {
1965      if (chain->vars[i] != NULL_TREE)
1966	{
1967	  var = chain->vars[i];
1968	  continue;
1969	}
1970      chain->vars[i] = var;
1971    }
1972
1973  /* Revert the vector.  */
1974  for (i = 0; i < n / 2; i++)
1975    std::swap (chain->vars[i], chain->vars[n - i - 1]);
1976}
1977
1978/* Creates root variables for store elimination CHAIN in which stores for
1979   elimination store loop variant values.  In this case, we may need to
1980   load root variables before LOOP and propagate it with PHI nodes.  Uids
1981   of the newly created root variables are marked in TMP_VARS.  */
1982
1983static void
1984initialize_root_vars_store_elim_2 (class loop *loop,
1985				   chain_p chain, bitmap tmp_vars)
1986{
1987  unsigned i, n = chain->length;
1988  tree ref, init, var, next, val, phi_result;
1989  gimple *stmt;
1990  gimple_seq stmts;
1991
1992  chain->vars.create (n);
1993
1994  ref = DR_REF (get_chain_root (chain)->ref);
1995  for (i = 0; i < n; i++)
1996    {
1997      var = predcom_tmp_var (ref, i, tmp_vars);
1998      chain->vars.quick_push (var);
1999    }
2000
2001  FOR_EACH_VEC_ELT (chain->vars, i, var)
2002    chain->vars[i] = make_ssa_name (var);
2003
2004  /* Root values are either rhs operand of stores to be eliminated, or
2005     loaded from memory before loop.  */
2006  auto_vec<tree> vtemps;
2007  vtemps.safe_grow_cleared (n, true);
2008  for (i = 0; i < n; i++)
2009    {
2010      init = get_init_expr (chain, i);
2011      if (init == NULL_TREE)
2012	{
2013	  /* Root value is rhs operand of the store to be eliminated if
2014	     it isn't loaded from memory before loop.  */
2015	  dref a = get_chain_last_write_at (chain, i);
2016	  val = gimple_assign_rhs1 (a->stmt);
2017	  if (TREE_CLOBBER_P (val))
2018	    {
2019	      val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
2020	      gimple_assign_set_rhs1 (a->stmt, val);
2021	    }
2022
2023	  vtemps[n - i - 1] = val;
2024	}
2025      else
2026	{
2027	  edge latch = loop_latch_edge (loop);
2028	  edge entry = loop_preheader_edge (loop);
2029
2030	  /* Root value is loaded from memory before loop, we also need
2031	     to add PHI nodes to propagate the value across iterations.  */
2032	  init = force_gimple_operand (init, &stmts, true, NULL_TREE);
2033	  if (stmts)
2034	    gsi_insert_seq_on_edge_immediate (entry, stmts);
2035
2036	  next = chain->vars[n - i];
2037	  phi_result = copy_ssa_name (next);
2038	  gphi *phi = create_phi_node (phi_result, loop->header);
2039	  add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
2040	  add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
2041	  vtemps[n - i - 1] = phi_result;
2042	}
2043    }
2044
2045  /* Find the insertion position.  */
2046  dref last = get_chain_root (chain);
2047  for (i = 0; i < chain->refs.length (); i++)
2048    {
2049      if (chain->refs[i]->pos > last->pos)
2050	last = chain->refs[i];
2051    }
2052
2053  gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
2054
2055  /* Insert statements copying root value to root variable.  */
2056  for (i = 0; i < n; i++)
2057    {
2058      var = chain->vars[i];
2059      val = vtemps[i];
2060      stmt = gimple_build_assign (var, val);
2061      gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
2062    }
2063}
2064
2065/* Generates stores for CHAIN's eliminated stores in LOOP's last
2066   (CHAIN->length - 1) iterations.  */
2067
2068static void
2069finalize_eliminated_stores (class loop *loop, chain_p chain)
2070{
2071  unsigned i, n = chain->length;
2072
2073  for (i = 0; i < n; i++)
2074    {
2075      tree var = chain->vars[i];
2076      tree fini = chain->finis[n - i - 1];
2077      gimple *stmt = gimple_build_assign (fini, var);
2078
2079      gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
2080    }
2081
2082  if (chain->fini_seq)
2083    {
2084      gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
2085      chain->fini_seq = NULL;
2086    }
2087}
2088
2089/* Initializes a variable for load motion for ROOT and prepares phi nodes and
2090   initialization on entry to LOOP if necessary.  The ssa name for the variable
2091   is stored in VARS.  If WRITTEN is true, also a phi node to copy its value
2092   around the loop is created.  Uid of the newly created temporary variable
2093   is marked in TMP_VARS.  INITS is the list containing the (single)
2094   initializer.  */
2095
2096static void
2097initialize_root_vars_lm (class loop *loop, dref root, bool written,
2098			 vec<tree> *vars, const vec<tree> &inits,
2099			 bitmap tmp_vars)
2100{
2101  unsigned i;
2102  tree ref = DR_REF (root->ref), init, var, next;
2103  gimple_seq stmts;
2104  gphi *phi;
2105  edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
2106
2107  /* Find the initializer for the variable, and check that it cannot
2108     trap.  */
2109  init = inits[0];
2110
2111  vars->create (written ? 2 : 1);
2112  var = predcom_tmp_var (ref, 0, tmp_vars);
2113  vars->quick_push (var);
2114  if (written)
2115    vars->quick_push ((*vars)[0]);
2116
2117  FOR_EACH_VEC_ELT (*vars, i, var)
2118    (*vars)[i] = make_ssa_name (var);
2119
2120  var = (*vars)[0];
2121
2122  init = force_gimple_operand (init, &stmts, written, NULL_TREE);
2123  if (stmts)
2124    gsi_insert_seq_on_edge_immediate (entry, stmts);
2125
2126  if (written)
2127    {
2128      next = (*vars)[1];
2129      phi = create_phi_node (var, loop->header);
2130      add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
2131      add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
2132    }
2133  else
2134    {
2135      gassign *init_stmt = gimple_build_assign (var, init);
2136      gsi_insert_on_edge_immediate (entry, init_stmt);
2137    }
2138}
2139
2140
2141/* Execute load motion for references in chain CHAIN.  Uids of the newly
2142   created temporary variables are marked in TMP_VARS.  */
2143
2144static void
2145execute_load_motion (class loop *loop, chain_p chain, bitmap tmp_vars)
2146{
2147  auto_vec<tree> vars;
2148  dref a;
2149  unsigned n_writes = 0, ridx, i;
2150  tree var;
2151
2152  gcc_assert (chain->type == CT_INVARIANT);
2153  gcc_assert (!chain->combined);
2154  FOR_EACH_VEC_ELT (chain->refs, i, a)
2155    if (DR_IS_WRITE (a->ref))
2156      n_writes++;
2157
2158  /* If there are no reads in the loop, there is nothing to do.  */
2159  if (n_writes == chain->refs.length ())
2160    return;
2161
2162  initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
2163			   &vars, chain->inits, tmp_vars);
2164
2165  ridx = 0;
2166  FOR_EACH_VEC_ELT (chain->refs, i, a)
2167    {
2168      bool is_read = DR_IS_READ (a->ref);
2169
2170      if (DR_IS_WRITE (a->ref))
2171	{
2172	  n_writes--;
2173	  if (n_writes)
2174	    {
2175	      var = vars[0];
2176	      var = make_ssa_name (SSA_NAME_VAR (var));
2177	      vars[0] = var;
2178	    }
2179	  else
2180	    ridx = 1;
2181	}
2182
2183      replace_ref_with (a->stmt, vars[ridx],
2184			!is_read, !is_read);
2185    }
2186}
2187
2188/* Returns the single statement in that NAME is used, excepting
2189   the looparound phi nodes contained in one of the chains.  If there is no
2190   such statement, or more statements, NULL is returned.  */
2191
2192gimple *
2193pcom_worker::single_nonlooparound_use (tree name)
2194{
2195  use_operand_p use;
2196  imm_use_iterator it;
2197  gimple *stmt, *ret = NULL;
2198
2199  FOR_EACH_IMM_USE_FAST (use, it, name)
2200    {
2201      stmt = USE_STMT (use);
2202
2203      if (gimple_code (stmt) == GIMPLE_PHI)
2204	{
2205	  /* Ignore uses in looparound phi nodes.  Uses in other phi nodes
2206	     could not be processed anyway, so just fail for them.  */
2207	  if (bitmap_bit_p (m_looparound_phis,
2208			    SSA_NAME_VERSION (PHI_RESULT (stmt))))
2209	    continue;
2210
2211	  return NULL;
2212	}
2213      else if (is_gimple_debug (stmt))
2214	continue;
2215      else if (ret != NULL)
2216	return NULL;
2217      else
2218	ret = stmt;
2219    }
2220
2221  return ret;
2222}
2223
2224/* Remove statement STMT, as well as the chain of assignments in that it is
2225   used.  */
2226
2227void
2228pcom_worker::remove_stmt (gimple *stmt)
2229{
2230  tree name;
2231  gimple *next;
2232  gimple_stmt_iterator psi;
2233
2234  if (gimple_code (stmt) == GIMPLE_PHI)
2235    {
2236      name = PHI_RESULT (stmt);
2237      next = single_nonlooparound_use (name);
2238      reset_debug_uses (stmt);
2239      psi = gsi_for_stmt (stmt);
2240      remove_phi_node (&psi, true);
2241
2242      if (!next
2243	  || !gimple_assign_ssa_name_copy_p (next)
2244	  || gimple_assign_rhs1 (next) != name)
2245	return;
2246
2247      stmt = next;
2248    }
2249
2250  while (1)
2251    {
2252      gimple_stmt_iterator bsi;
2253
2254      bsi = gsi_for_stmt (stmt);
2255
2256      name = gimple_assign_lhs (stmt);
2257      if (TREE_CODE (name) == SSA_NAME)
2258	{
2259	  next = single_nonlooparound_use (name);
2260	  reset_debug_uses (stmt);
2261	}
2262      else
2263	{
2264	  /* This is a store to be eliminated.  */
2265	  gcc_assert (gimple_vdef (stmt) != NULL);
2266	  next = NULL;
2267	}
2268
2269      unlink_stmt_vdef (stmt);
2270      gsi_remove (&bsi, true);
2271      release_defs (stmt);
2272
2273      if (!next
2274	  || !gimple_assign_ssa_name_copy_p (next)
2275	  || gimple_assign_rhs1 (next) != name)
2276	return;
2277
2278      stmt = next;
2279    }
2280}
2281
2282/* Perform the predictive commoning optimization for a chain CHAIN.
2283   Uids of the newly created temporary variables are marked in TMP_VARS.*/
2284
2285void
2286pcom_worker::execute_pred_commoning_chain (chain_p chain,
2287			      bitmap tmp_vars)
2288{
2289  unsigned i;
2290  dref a;
2291  tree var;
2292  bool in_lhs;
2293
2294  if (chain->combined)
2295    {
2296      /* For combined chains, just remove the statements that are used to
2297	 compute the values of the expression (except for the root one).
2298	 We delay this until after all chains are processed.  */
2299    }
2300  else if (chain->type == CT_STORE_STORE)
2301    {
2302      if (chain->length > 0)
2303	{
2304	  if (chain->inv_store_elimination)
2305	    {
2306	      /* If dead stores in this chain only store loop invariant
2307		 values, we can simply record the invariant value and use
2308		 it directly after loop.  */
2309	      initialize_root_vars_store_elim_1 (chain);
2310	    }
2311	  else
2312	    {
2313	      /* If dead stores in this chain store loop variant values,
2314		 we need to set up the variables by loading from memory
2315		 before loop and propagating it with PHI nodes.  */
2316	      initialize_root_vars_store_elim_2 (m_loop, chain, tmp_vars);
2317	    }
2318
2319	  /* For inter-iteration store elimination chain, stores at each
2320	     distance in loop's last (chain->length - 1) iterations can't
2321	     be eliminated, because there is no following killing store.
2322	     We need to generate these stores after loop.  */
2323	  finalize_eliminated_stores (m_loop, chain);
2324	}
2325
2326      bool last_store_p = true;
2327      for (i = chain->refs.length (); i > 0; i--)
2328	{
2329	  a = chain->refs[i - 1];
2330	  /* Preserve the last store of the chain.  Eliminate other stores
2331	     which are killed by the last one.  */
2332	  if (DR_IS_WRITE (a->ref))
2333	    {
2334	      if (last_store_p)
2335		last_store_p = false;
2336	      else
2337		remove_stmt (a->stmt);
2338
2339	      continue;
2340	    }
2341
2342	  /* Any load in Store-Store chain must be dominated by a previous
2343	     store, we replace the load reference with rhs of the store.  */
2344	  dref b = get_chain_last_write_before_load (chain, i - 1);
2345	  gcc_assert (b != NULL);
2346	  var = gimple_assign_rhs1 (b->stmt);
2347	  replace_ref_with (a->stmt, var, false, false);
2348	}
2349    }
2350  else
2351    {
2352      /* For non-combined chains, set up the variables that hold its value.  */
2353      initialize_root_vars (m_loop, chain, tmp_vars);
2354      a = get_chain_root (chain);
2355      in_lhs = (chain->type == CT_STORE_LOAD
2356		|| chain->type == CT_COMBINATION);
2357      replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
2358
2359      /* Replace the uses of the original references by these variables.  */
2360      for (i = 1; chain->refs.iterate (i, &a); i++)
2361	{
2362	  var = chain->vars[chain->length - a->distance];
2363	  replace_ref_with (a->stmt, var, false, false);
2364	}
2365    }
2366}
2367
2368/* Determines the unroll factor necessary to remove as many temporary variable
2369   copies as possible.  CHAINS is the list of chains that will be
2370   optimized.  */
2371
2372static unsigned
2373determine_unroll_factor (const vec<chain_p> &chains)
2374{
2375  chain_p chain;
2376  unsigned factor = 1, af, nfactor, i;
2377  unsigned max = param_max_unroll_times;
2378
2379  FOR_EACH_VEC_ELT (chains, i, chain)
2380    {
2381      if (chain->type == CT_INVARIANT)
2382	continue;
2383      /* For now we can't handle unrolling when eliminating stores.  */
2384      else if (chain->type == CT_STORE_STORE)
2385	return 1;
2386
2387      if (chain->combined)
2388	{
2389	  /* For combined chains, we can't handle unrolling if we replace
2390	     looparound PHIs.  */
2391	  dref a;
2392	  unsigned j;
2393	  for (j = 1; chain->refs.iterate (j, &a); j++)
2394	    if (gimple_code (a->stmt) == GIMPLE_PHI)
2395	      return 1;
2396	  continue;
2397	}
2398
2399      /* The best unroll factor for this chain is equal to the number of
2400	 temporary variables that we create for it.  */
2401      af = chain->length;
2402      if (chain->has_max_use_after)
2403	af++;
2404
2405      nfactor = factor * af / gcd (factor, af);
2406      if (nfactor <= max)
2407	factor = nfactor;
2408    }
2409
2410  return factor;
2411}
2412
2413/* Perform the predictive commoning optimization for chains.
2414   Uids of the newly created temporary variables are marked in TMP_VARS.  */
2415
2416void
2417pcom_worker::execute_pred_commoning (bitmap tmp_vars)
2418{
2419  chain_p chain;
2420  unsigned i;
2421
2422  FOR_EACH_VEC_ELT (m_chains, i, chain)
2423    {
2424      if (chain->type == CT_INVARIANT)
2425	execute_load_motion (m_loop, chain, tmp_vars);
2426      else
2427	execute_pred_commoning_chain (chain, tmp_vars);
2428    }
2429
2430  FOR_EACH_VEC_ELT (m_chains, i, chain)
2431    {
2432      if (chain->type == CT_INVARIANT)
2433	;
2434      else if (chain->combined)
2435	{
2436	  /* For combined chains, just remove the statements that are used to
2437	     compute the values of the expression (except for the root one).  */
2438	  dref a;
2439	  unsigned j;
2440	  for (j = 1; chain->refs.iterate (j, &a); j++)
2441	    remove_stmt (a->stmt);
2442	}
2443    }
2444}
2445
2446/* For each reference in CHAINS, if its defining statement is
2447   phi node, record the ssa name that is defined by it.  */
2448
2449static void
2450replace_phis_by_defined_names (vec<chain_p> &chains)
2451{
2452  chain_p chain;
2453  dref a;
2454  unsigned i, j;
2455
2456  FOR_EACH_VEC_ELT (chains, i, chain)
2457    FOR_EACH_VEC_ELT (chain->refs, j, a)
2458      {
2459	if (gimple_code (a->stmt) == GIMPLE_PHI)
2460	  {
2461	    a->name_defined_by_phi = PHI_RESULT (a->stmt);
2462	    a->stmt = NULL;
2463	  }
2464      }
2465}
2466
2467/* For each reference in CHAINS, if name_defined_by_phi is not
2468   NULL, use it to set the stmt field.  */
2469
2470static void
2471replace_names_by_phis (vec<chain_p> chains)
2472{
2473  chain_p chain;
2474  dref a;
2475  unsigned i, j;
2476
2477  FOR_EACH_VEC_ELT (chains, i, chain)
2478    FOR_EACH_VEC_ELT (chain->refs, j, a)
2479      if (a->stmt == NULL)
2480	{
2481	  a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
2482	  gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
2483	  a->name_defined_by_phi = NULL_TREE;
2484	}
2485}
2486
2487/* Wrapper over execute_pred_commoning, to pass it as a callback
2488   to tree_transform_and_unroll_loop.  */
2489
2490struct epcc_data
2491{
2492  vec<chain_p> chains;
2493  bitmap tmp_vars;
2494  pcom_worker *worker;
2495};
2496
2497static void
2498execute_pred_commoning_cbck (class loop *loop ATTRIBUTE_UNUSED, void *data)
2499{
2500  struct epcc_data *const dta = (struct epcc_data *) data;
2501  pcom_worker *worker = dta->worker;
2502
2503  /* Restore phi nodes that were replaced by ssa names before
2504     tree_transform_and_unroll_loop (see detailed description in
2505     tree_predictive_commoning_loop).  */
2506  replace_names_by_phis (dta->chains);
2507  worker->execute_pred_commoning (dta->tmp_vars);
2508}
2509
2510/* Base NAME and all the names in the chain of phi nodes that use it
2511   on variable VAR.  The phi nodes are recognized by being in the copies of
2512   the header of the LOOP.  */
2513
2514static void
2515base_names_in_chain_on (class loop *loop, tree name, tree var)
2516{
2517  gimple *stmt, *phi;
2518  imm_use_iterator iter;
2519
2520  replace_ssa_name_symbol (name, var);
2521
2522  while (1)
2523    {
2524      phi = NULL;
2525      FOR_EACH_IMM_USE_STMT (stmt, iter, name)
2526	{
2527	  if (gimple_code (stmt) == GIMPLE_PHI
2528	      && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
2529	    {
2530	      phi = stmt;
2531	      break;
2532	    }
2533	}
2534      if (!phi)
2535	return;
2536
2537      name = PHI_RESULT (phi);
2538      replace_ssa_name_symbol (name, var);
2539    }
2540}
2541
2542/* Given an unrolled LOOP after predictive commoning, remove the
2543   register copies arising from phi nodes by changing the base
2544   variables of SSA names.  TMP_VARS is the set of the temporary variables
2545   for those we want to perform this.  */
2546
2547static void
2548eliminate_temp_copies (class loop *loop, bitmap tmp_vars)
2549{
2550  edge e;
2551  gphi *phi;
2552  gimple *stmt;
2553  tree name, use, var;
2554  gphi_iterator psi;
2555
2556  e = loop_latch_edge (loop);
2557  for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
2558    {
2559      phi = psi.phi ();
2560      name = PHI_RESULT (phi);
2561      var = SSA_NAME_VAR (name);
2562      if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
2563	continue;
2564      use = PHI_ARG_DEF_FROM_EDGE (phi, e);
2565      gcc_assert (TREE_CODE (use) == SSA_NAME);
2566
2567      /* Base all the ssa names in the ud and du chain of NAME on VAR.  */
2568      stmt = SSA_NAME_DEF_STMT (use);
2569      while (gimple_code (stmt) == GIMPLE_PHI
2570	     /* In case we could not unroll the loop enough to eliminate
2571		all copies, we may reach the loop header before the defining
2572		statement (in that case, some register copies will be present
2573		in loop latch in the final code, corresponding to the newly
2574		created looparound phi nodes).  */
2575	     && gimple_bb (stmt) != loop->header)
2576	{
2577	  gcc_assert (single_pred_p (gimple_bb (stmt)));
2578	  use = PHI_ARG_DEF (stmt, 0);
2579	  stmt = SSA_NAME_DEF_STMT (use);
2580	}
2581
2582      base_names_in_chain_on (loop, use, var);
2583    }
2584}
2585
2586/* Returns true if CHAIN is suitable to be combined.  */
2587
2588static bool
2589chain_can_be_combined_p (chain_p chain)
2590{
2591  return (!chain->combined
2592	  && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
2593}
2594
2595/* Returns the modify statement that uses NAME.  Skips over assignment
2596   statements, NAME is replaced with the actual name used in the returned
2597   statement.  */
2598
2599gimple *
2600pcom_worker::find_use_stmt (tree *name)
2601{
2602  gimple *stmt;
2603  tree rhs, lhs;
2604
2605  /* Skip over assignments.  */
2606  while (1)
2607    {
2608      stmt = single_nonlooparound_use (*name);
2609      if (!stmt)
2610	return NULL;
2611
2612      if (gimple_code (stmt) != GIMPLE_ASSIGN)
2613	return NULL;
2614
2615      lhs = gimple_assign_lhs (stmt);
2616      if (TREE_CODE (lhs) != SSA_NAME)
2617	return NULL;
2618
2619      if (gimple_assign_copy_p (stmt))
2620	{
2621	  rhs = gimple_assign_rhs1 (stmt);
2622	  if (rhs != *name)
2623	    return NULL;
2624
2625	  *name = lhs;
2626	}
2627      else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2628	       == GIMPLE_BINARY_RHS)
2629	return stmt;
2630      else
2631	return NULL;
2632    }
2633}
2634
2635/* Returns true if we may perform reassociation for operation CODE in TYPE.  */
2636
2637static bool
2638may_reassociate_p (tree type, enum tree_code code)
2639{
2640  if (FLOAT_TYPE_P (type)
2641      && !flag_unsafe_math_optimizations)
2642    return false;
2643
2644  return (commutative_tree_code (code)
2645	  && associative_tree_code (code));
2646}
2647
2648/* If the operation used in STMT is associative and commutative, go through the
2649   tree of the same operations and returns its root.  Distance to the root
2650   is stored in DISTANCE.  */
2651
2652gimple *
2653pcom_worker::find_associative_operation_root (gimple *stmt, unsigned *distance)
2654{
2655  tree lhs;
2656  gimple *next;
2657  enum tree_code code = gimple_assign_rhs_code (stmt);
2658  tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2659  unsigned dist = 0;
2660
2661  if (!may_reassociate_p (type, code))
2662    return NULL;
2663
2664  while (1)
2665    {
2666      lhs = gimple_assign_lhs (stmt);
2667      gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2668
2669      next = find_use_stmt (&lhs);
2670      if (!next
2671	  || gimple_assign_rhs_code (next) != code)
2672	break;
2673
2674      stmt = next;
2675      dist++;
2676    }
2677
2678  if (distance)
2679    *distance = dist;
2680  return stmt;
2681}
2682
2683/* Returns the common statement in that NAME1 and NAME2 have a use.  If there
2684   is no such statement, returns NULL_TREE.  In case the operation used on
2685   NAME1 and NAME2 is associative and commutative, returns the root of the
2686   tree formed by this operation instead of the statement that uses NAME1 or
2687   NAME2.  */
2688
2689gimple *
2690pcom_worker::find_common_use_stmt (tree *name1, tree *name2)
2691{
2692  gimple *stmt1, *stmt2;
2693
2694  stmt1 = find_use_stmt (name1);
2695  if (!stmt1)
2696    return NULL;
2697
2698  stmt2 = find_use_stmt (name2);
2699  if (!stmt2)
2700    return NULL;
2701
2702  if (stmt1 == stmt2)
2703    return stmt1;
2704
2705  stmt1 = find_associative_operation_root (stmt1, NULL);
2706  if (!stmt1)
2707    return NULL;
2708  stmt2 = find_associative_operation_root (stmt2, NULL);
2709  if (!stmt2)
2710    return NULL;
2711
2712  return (stmt1 == stmt2 ? stmt1 : NULL);
2713}
2714
2715/* Checks whether R1 and R2 are combined together using CODE, with the result
2716   in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2717   if it is true.  If CODE is ERROR_MARK, set these values instead.  */
2718
2719bool
2720pcom_worker::combinable_refs_p (dref r1, dref r2,
2721		   enum tree_code *code, bool *swap, tree *rslt_type)
2722{
2723  enum tree_code acode;
2724  bool aswap;
2725  tree atype;
2726  tree name1, name2;
2727  gimple *stmt;
2728
2729  name1 = name_for_ref (r1);
2730  name2 = name_for_ref (r2);
2731  gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2732
2733  stmt = find_common_use_stmt (&name1, &name2);
2734
2735  if (!stmt
2736      /* A simple post-dominance check - make sure the combination
2737         is executed under the same condition as the references.  */
2738      || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2739	  && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2740    return false;
2741
2742  acode = gimple_assign_rhs_code (stmt);
2743  aswap = (!commutative_tree_code (acode)
2744	   && gimple_assign_rhs1 (stmt) != name1);
2745  atype = TREE_TYPE (gimple_assign_lhs (stmt));
2746
2747  if (*code == ERROR_MARK)
2748    {
2749      *code = acode;
2750      *swap = aswap;
2751      *rslt_type = atype;
2752      return true;
2753    }
2754
2755  return (*code == acode
2756	  && *swap == aswap
2757	  && *rslt_type == atype);
2758}
2759
2760/* Remove OP from the operation on rhs of STMT, and replace STMT with
2761   an assignment of the remaining operand.  */
2762
2763static void
2764remove_name_from_operation (gimple *stmt, tree op)
2765{
2766  tree other_op;
2767  gimple_stmt_iterator si;
2768
2769  gcc_assert (is_gimple_assign (stmt));
2770
2771  if (gimple_assign_rhs1 (stmt) == op)
2772    other_op = gimple_assign_rhs2 (stmt);
2773  else
2774    other_op = gimple_assign_rhs1 (stmt);
2775
2776  si = gsi_for_stmt (stmt);
2777  gimple_assign_set_rhs_from_tree (&si, other_op);
2778
2779  /* We should not have reallocated STMT.  */
2780  gcc_assert (gsi_stmt (si) == stmt);
2781
2782  update_stmt (stmt);
2783}
2784
2785/* Reassociates the expression in that NAME1 and NAME2 are used so that they
2786   are combined in a single statement, and returns this statement.  */
2787
2788gimple *
2789pcom_worker::reassociate_to_the_same_stmt (tree name1, tree name2)
2790{
2791  gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2792  gassign *new_stmt, *tmp_stmt;
2793  tree new_name, tmp_name, var, r1, r2;
2794  unsigned dist1, dist2;
2795  enum tree_code code;
2796  tree type = TREE_TYPE (name1);
2797  gimple_stmt_iterator bsi;
2798
2799  stmt1 = find_use_stmt (&name1);
2800  stmt2 = find_use_stmt (&name2);
2801  root1 = find_associative_operation_root (stmt1, &dist1);
2802  root2 = find_associative_operation_root (stmt2, &dist2);
2803  code = gimple_assign_rhs_code (stmt1);
2804
2805  gcc_assert (root1 && root2 && root1 == root2
2806	      && code == gimple_assign_rhs_code (stmt2));
2807
2808  /* Find the root of the nearest expression in that both NAME1 and NAME2
2809     are used.  */
2810  r1 = name1;
2811  s1 = stmt1;
2812  r2 = name2;
2813  s2 = stmt2;
2814
2815  while (dist1 > dist2)
2816    {
2817      s1 = find_use_stmt (&r1);
2818      r1 = gimple_assign_lhs (s1);
2819      dist1--;
2820    }
2821  while (dist2 > dist1)
2822    {
2823      s2 = find_use_stmt (&r2);
2824      r2 = gimple_assign_lhs (s2);
2825      dist2--;
2826    }
2827
2828  while (s1 != s2)
2829    {
2830      s1 = find_use_stmt (&r1);
2831      r1 = gimple_assign_lhs (s1);
2832      s2 = find_use_stmt (&r2);
2833      r2 = gimple_assign_lhs (s2);
2834    }
2835
2836  /* Remove NAME1 and NAME2 from the statements in that they are used
2837     currently.  */
2838  remove_name_from_operation (stmt1, name1);
2839  remove_name_from_operation (stmt2, name2);
2840
2841  /* Insert the new statement combining NAME1 and NAME2 before S1, and
2842     combine it with the rhs of S1.  */
2843  var = create_tmp_reg (type, "predreastmp");
2844  new_name = make_ssa_name (var);
2845  new_stmt = gimple_build_assign (new_name, code, name1, name2);
2846
2847  var = create_tmp_reg (type, "predreastmp");
2848  tmp_name = make_ssa_name (var);
2849
2850  /* Rhs of S1 may now be either a binary expression with operation
2851     CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2852     so that name1 or name2 was removed from it).  */
2853  tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2854				  gimple_assign_rhs1 (s1),
2855				  gimple_assign_rhs2 (s1));
2856
2857  bsi = gsi_for_stmt (s1);
2858  gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2859  s1 = gsi_stmt (bsi);
2860  update_stmt (s1);
2861
2862  gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2863  gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2864
2865  return new_stmt;
2866}
2867
2868/* Returns the statement that combines references R1 and R2.  In case R1
2869   and R2 are not used in the same statement, but they are used with an
2870   associative and commutative operation in the same expression, reassociate
2871   the expression so that they are used in the same statement.  */
2872
2873gimple *
2874pcom_worker::stmt_combining_refs (dref r1, dref r2)
2875{
2876  gimple *stmt1, *stmt2;
2877  tree name1 = name_for_ref (r1);
2878  tree name2 = name_for_ref (r2);
2879
2880  stmt1 = find_use_stmt (&name1);
2881  stmt2 = find_use_stmt (&name2);
2882  if (stmt1 == stmt2)
2883    return stmt1;
2884
2885  return reassociate_to_the_same_stmt (name1, name2);
2886}
2887
2888/* Tries to combine chains CH1 and CH2 together.  If this succeeds, the
2889   description of the new chain is returned, otherwise we return NULL.  */
2890
2891chain_p
2892pcom_worker::combine_chains (chain_p ch1, chain_p ch2)
2893{
2894  dref r1, r2, nw;
2895  enum tree_code op = ERROR_MARK;
2896  bool swap = false;
2897  chain_p new_chain;
2898  unsigned i;
2899  tree rslt_type = NULL_TREE;
2900
2901  if (ch1 == ch2)
2902    return NULL;
2903  if (ch1->length != ch2->length)
2904    return NULL;
2905
2906  if (ch1->refs.length () != ch2->refs.length ())
2907    return NULL;
2908
2909  for (i = 0; (ch1->refs.iterate (i, &r1)
2910	       && ch2->refs.iterate (i, &r2)); i++)
2911    {
2912      if (r1->distance != r2->distance)
2913	return NULL;
2914
2915      if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2916	return NULL;
2917    }
2918
2919  if (swap)
2920    std::swap (ch1, ch2);
2921
2922  new_chain = new struct chain (CT_COMBINATION);
2923  new_chain->op = op;
2924  new_chain->ch1 = ch1;
2925  new_chain->ch2 = ch2;
2926  new_chain->rslt_type = rslt_type;
2927  new_chain->length = ch1->length;
2928
2929  for (i = 0; (ch1->refs.iterate (i, &r1)
2930	       && ch2->refs.iterate (i, &r2)); i++)
2931    {
2932      nw = XCNEW (class dref_d);
2933      nw->stmt = stmt_combining_refs (r1, r2);
2934      nw->distance = r1->distance;
2935
2936      new_chain->refs.safe_push (nw);
2937    }
2938
2939  ch1->combined = true;
2940  ch2->combined = true;
2941  return new_chain;
2942}
2943
2944/* Recursively update position information of all offspring chains to ROOT
2945   chain's position information.  */
2946
2947static void
2948update_pos_for_combined_chains (chain_p root)
2949{
2950  chain_p ch1 = root->ch1, ch2 = root->ch2;
2951  dref ref, ref1, ref2;
2952  for (unsigned j = 0; (root->refs.iterate (j, &ref)
2953			&& ch1->refs.iterate (j, &ref1)
2954			&& ch2->refs.iterate (j, &ref2)); ++j)
2955    ref1->pos = ref2->pos = ref->pos;
2956
2957  if (ch1->type == CT_COMBINATION)
2958    update_pos_for_combined_chains (ch1);
2959  if (ch2->type == CT_COMBINATION)
2960    update_pos_for_combined_chains (ch2);
2961}
2962
2963/* Returns true if statement S1 dominates statement S2.  */
2964
2965static bool
2966pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2967{
2968  basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
2969
2970  if (!bb1 || s1 == s2)
2971    return true;
2972
2973  if (bb1 == bb2)
2974    return gimple_uid (s1) < gimple_uid (s2);
2975
2976  return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
2977}
2978
2979/* Try to combine the chains.  */
2980
2981void
2982pcom_worker::try_combine_chains ()
2983{
2984  unsigned i, j;
2985  chain_p ch1, ch2, cch;
2986  auto_vec<chain_p> worklist;
2987  bool combined_p = false;
2988
2989  FOR_EACH_VEC_ELT (m_chains, i, ch1)
2990    if (chain_can_be_combined_p (ch1))
2991      worklist.safe_push (ch1);
2992
2993  while (!worklist.is_empty ())
2994    {
2995      ch1 = worklist.pop ();
2996      if (!chain_can_be_combined_p (ch1))
2997	continue;
2998
2999      FOR_EACH_VEC_ELT (m_chains, j, ch2)
3000	{
3001	  if (!chain_can_be_combined_p (ch2))
3002	    continue;
3003
3004	  cch = combine_chains (ch1, ch2);
3005	  if (cch)
3006	    {
3007	      worklist.safe_push (cch);
3008	      m_chains.safe_push (cch);
3009	      combined_p = true;
3010	      break;
3011	    }
3012	}
3013    }
3014  if (!combined_p)
3015    return;
3016
3017  /* Setup UID for all statements in dominance order.  */
3018  basic_block *bbs = get_loop_body_in_dom_order (m_loop);
3019  renumber_gimple_stmt_uids_in_blocks (bbs, m_loop->num_nodes);
3020  free (bbs);
3021
3022  /* Re-association in combined chains may generate statements different to
3023     order of references of the original chain.  We need to keep references
3024     of combined chain in dominance order so that all uses will be inserted
3025     after definitions.  Note:
3026       A) This is necessary for all combined chains.
3027       B) This is only necessary for ZERO distance references because other
3028	  references inherit value from loop carried PHIs.
3029
3030     We first update position information for all combined chains.  */
3031  dref ref;
3032  for (i = 0; m_chains.iterate (i, &ch1); ++i)
3033    {
3034      if (ch1->type != CT_COMBINATION || ch1->combined)
3035	continue;
3036
3037      for (j = 0; ch1->refs.iterate (j, &ref); ++j)
3038	ref->pos = gimple_uid (ref->stmt);
3039
3040      update_pos_for_combined_chains (ch1);
3041    }
3042  /* Then sort references according to newly updated position information.  */
3043  for (i = 0; m_chains.iterate (i, &ch1); ++i)
3044    {
3045      if (ch1->type != CT_COMBINATION && !ch1->combined)
3046	continue;
3047
3048      /* Find the first reference with non-ZERO distance.  */
3049      if (ch1->length == 0)
3050	j = ch1->refs.length();
3051      else
3052	{
3053	  for (j = 0; ch1->refs.iterate (j, &ref); ++j)
3054	    if (ref->distance != 0)
3055	      break;
3056	}
3057
3058      /* Sort all ZERO distance references by position.  */
3059      qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
3060
3061      if (ch1->combined)
3062	continue;
3063
3064      /* For ZERO length chain, has_max_use_after must be true since root
3065	 combined stmt must dominates others.  */
3066      if (ch1->length == 0)
3067	{
3068	  ch1->has_max_use_after = true;
3069	  continue;
3070	}
3071      /* Check if there is use at max distance after root for combined chains
3072	 and set flag accordingly.  */
3073      ch1->has_max_use_after = false;
3074      gimple *root_stmt = get_chain_root (ch1)->stmt;
3075      for (j = 1; ch1->refs.iterate (j, &ref); ++j)
3076	{
3077	  if (ref->distance == ch1->length
3078	      && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
3079	    {
3080	      ch1->has_max_use_after = true;
3081	      break;
3082	    }
3083	}
3084    }
3085}
3086
3087/* Prepare initializers for store elimination CHAIN in LOOP.  Returns false
3088   if this is impossible because one of these initializers may trap, true
3089   otherwise.  */
3090
3091static bool
3092prepare_initializers_chain_store_elim (class loop *loop, chain_p chain)
3093{
3094  unsigned i, n = chain->length;
3095
3096  /* For now we can't eliminate stores if some of them are conditional
3097     executed.  */
3098  if (!chain->all_always_accessed)
3099    return false;
3100
3101  /* Nothing to intialize for intra-iteration store elimination.  */
3102  if (n == 0 && chain->type == CT_STORE_STORE)
3103    return true;
3104
3105  /* For store elimination chain, there is nothing to initialize if stores
3106     to be eliminated only store loop invariant values into memory.  */
3107  if (chain->type == CT_STORE_STORE
3108      && is_inv_store_elimination_chain (loop, chain))
3109    {
3110      chain->inv_store_elimination = true;
3111      return true;
3112    }
3113
3114  chain->inits.create (n);
3115  chain->inits.safe_grow_cleared (n, true);
3116
3117  /* For store eliminatin chain like below:
3118
3119     for (i = 0; i < len; i++)
3120       {
3121	 a[i] = 1;
3122	 // a[i + 1] = ...
3123	 a[i + 2] = 3;
3124       }
3125
3126     store to a[i + 1] is missed in loop body, it acts like bubbles.  The
3127     content of a[i + 1] remain the same if the loop iterates fewer times
3128     than chain->length.  We need to set up root variables for such stores
3129     by loading from memory before loop.  Note we only need to load bubble
3130     elements because loop body is guaranteed to be executed at least once
3131     after loop's preheader edge.  */
3132  auto_vec<bool> bubbles;
3133  bubbles.safe_grow_cleared (n + 1, true);
3134  for (i = 0; i < chain->refs.length (); i++)
3135    bubbles[chain->refs[i]->distance] = true;
3136
3137  struct data_reference *dr = get_chain_root (chain)->ref;
3138  for (i = 0; i < n; i++)
3139    {
3140      if (bubbles[i])
3141	continue;
3142
3143      gimple_seq stmts = NULL;
3144
3145      tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
3146      if (stmts)
3147	gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3148
3149      chain->inits[i] = init;
3150    }
3151
3152  return true;
3153}
3154
3155/* Prepare initializers for CHAIN.  Returns false if this is impossible
3156   because one of these initializers may trap, true otherwise.  */
3157
3158bool
3159pcom_worker::prepare_initializers_chain (chain_p chain)
3160{
3161  unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
3162  struct data_reference *dr = get_chain_root (chain)->ref;
3163  tree init;
3164  dref laref;
3165  edge entry = loop_preheader_edge (m_loop);
3166
3167  if (chain->type == CT_STORE_STORE)
3168    return prepare_initializers_chain_store_elim (m_loop, chain);
3169
3170  /* Find the initializers for the variables, and check that they cannot
3171     trap.  */
3172  chain->inits.create (n);
3173  for (i = 0; i < n; i++)
3174    chain->inits.quick_push (NULL_TREE);
3175
3176  /* If we have replaced some looparound phi nodes, use their initializers
3177     instead of creating our own.  */
3178  FOR_EACH_VEC_ELT (chain->refs, i, laref)
3179    {
3180      if (gimple_code (laref->stmt) != GIMPLE_PHI)
3181	continue;
3182
3183      gcc_assert (laref->distance > 0);
3184      chain->inits[n - laref->distance]
3185	= PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
3186    }
3187
3188  for (i = 0; i < n; i++)
3189    {
3190      gimple_seq stmts = NULL;
3191
3192      if (chain->inits[i] != NULL_TREE)
3193	continue;
3194
3195      init = ref_at_iteration (dr, (int) i - n, &stmts);
3196      if (!chain->all_always_accessed && tree_could_trap_p (init))
3197	{
3198	  gimple_seq_discard (stmts);
3199	  return false;
3200	}
3201
3202      if (stmts)
3203	gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3204
3205      chain->inits[i] = init;
3206    }
3207
3208  return true;
3209}
3210
3211/* Prepare initializers for chains, and free chains that cannot
3212   be used because the initializers might trap.  */
3213
3214void
3215pcom_worker::prepare_initializers ()
3216{
3217  chain_p chain;
3218  unsigned i;
3219
3220  for (i = 0; i < m_chains.length (); )
3221    {
3222      chain = m_chains[i];
3223      if (prepare_initializers_chain (chain))
3224	i++;
3225      else
3226	{
3227	  release_chain (chain);
3228	  m_chains.unordered_remove (i);
3229	}
3230    }
3231}
3232
3233/* Generates finalizer memory references for CHAIN.  Returns true
3234   if finalizer code for CHAIN can be generated, otherwise false.  */
3235
3236bool
3237pcom_worker::prepare_finalizers_chain (chain_p chain)
3238{
3239  unsigned i, n = chain->length;
3240  struct data_reference *dr = get_chain_root (chain)->ref;
3241  tree fini, niters = number_of_latch_executions (m_loop);
3242
3243  /* For now we can't eliminate stores if some of them are conditional
3244     executed.  */
3245  if (!chain->all_always_accessed)
3246    return false;
3247
3248  chain->finis.create (n);
3249  for (i = 0; i < n; i++)
3250    chain->finis.quick_push (NULL_TREE);
3251
3252  /* We never use looparound phi node for store elimination chains.  */
3253
3254  /* Find the finalizers for the variables, and check that they cannot
3255     trap.  */
3256  for (i = 0; i < n; i++)
3257    {
3258      gimple_seq stmts = NULL;
3259      gcc_assert (chain->finis[i] == NULL_TREE);
3260
3261      if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
3262	{
3263	  niters = unshare_expr (niters);
3264	  niters = force_gimple_operand (niters, &stmts, true, NULL);
3265	  if (stmts)
3266	    {
3267	      gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3268	      stmts = NULL;
3269	    }
3270	}
3271      fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
3272      if (stmts)
3273	gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3274
3275      chain->finis[i] = fini;
3276    }
3277
3278  return true;
3279}
3280
3281/* Generates finalizer memory reference for chains.  Returns true if
3282   finalizer code generation for chains breaks loop closed ssa form.  */
3283
3284bool
3285pcom_worker::prepare_finalizers ()
3286{
3287  chain_p chain;
3288  unsigned i;
3289  bool loop_closed_ssa = false;
3290
3291  for (i = 0; i < m_chains.length ();)
3292    {
3293      chain = m_chains[i];
3294
3295      /* Finalizer is only necessary for inter-iteration store elimination
3296	 chains.  */
3297      if (chain->length == 0 || chain->type != CT_STORE_STORE)
3298	{
3299	  i++;
3300	  continue;
3301	}
3302
3303      if (prepare_finalizers_chain (chain))
3304	{
3305	  i++;
3306	  /* Be conservative, assume loop closed ssa form is corrupted
3307	     by store-store chain.  Though it's not always the case if
3308	     eliminated stores only store loop invariant values into
3309	     memory.  */
3310	  loop_closed_ssa = true;
3311	}
3312      else
3313	{
3314	  release_chain (chain);
3315	  m_chains.unordered_remove (i);
3316	}
3317    }
3318  return loop_closed_ssa;
3319}
3320
3321/* Insert all initializing gimple stmts into LOOP's entry edge.  */
3322
3323static void
3324insert_init_seqs (class loop *loop, vec<chain_p> &chains)
3325{
3326  unsigned i;
3327  edge entry = loop_preheader_edge (loop);
3328
3329  for (i = 0; i < chains.length (); ++i)
3330    if (chains[i]->init_seq)
3331      {
3332	gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3333	chains[i]->init_seq = NULL;
3334      }
3335}
3336
3337/* Performs predictive commoning for LOOP.  Sets bit 1<<1 of return value
3338   if LOOP was unrolled; Sets bit 1<<2 of return value if loop closed ssa
3339   form was corrupted.  Non-zero return value indicates some changes were
3340   applied to this loop.  */
3341
3342unsigned
3343pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p)
3344{
3345  struct component *components;
3346  unsigned unroll_factor = 0;
3347  class tree_niter_desc desc;
3348  bool unroll = false, loop_closed_ssa = false;
3349
3350  if (dump_file && (dump_flags & TDF_DETAILS))
3351    fprintf (dump_file, "Processing loop %d\n", m_loop->num);
3352
3353  /* Nothing for predicitive commoning if loop only iterates 1 time.  */
3354  if (get_max_loop_iterations_int (m_loop) == 0)
3355    {
3356      if (dump_file && (dump_flags & TDF_DETAILS))
3357	fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
3358
3359      return 0;
3360    }
3361
3362  /* Find the data references and split them into components according to their
3363     dependence relations.  */
3364  auto_vec<loop_p, 3> loop_nest;
3365  if (!compute_data_dependences_for_loop (m_loop, true, &loop_nest, &m_datarefs,
3366					  &m_dependences))
3367    {
3368      if (dump_file && (dump_flags & TDF_DETAILS))
3369	fprintf (dump_file, "Cannot analyze data dependencies\n");
3370      return 0;
3371    }
3372
3373  if (dump_file && (dump_flags & TDF_DETAILS))
3374    dump_data_dependence_relations (dump_file, m_dependences);
3375
3376  components = split_data_refs_to_components ();
3377
3378  loop_nest.release ();
3379  if (!components)
3380    return 0;
3381
3382  if (dump_file && (dump_flags & TDF_DETAILS))
3383    {
3384      fprintf (dump_file, "Initial state:\n\n");
3385      dump_components (dump_file, components);
3386    }
3387
3388  /* Find the suitable components and split them into chains.  */
3389  components = filter_suitable_components (components);
3390
3391  auto_bitmap tmp_vars;
3392  determine_roots (components);
3393  release_components (components);
3394
3395  if (!m_chains.exists ())
3396    {
3397      if (dump_file && (dump_flags & TDF_DETAILS))
3398	fprintf (dump_file,
3399		 "Predictive commoning failed: no suitable chains\n");
3400      return 0;
3401    }
3402
3403  prepare_initializers ();
3404  loop_closed_ssa = prepare_finalizers ();
3405
3406  /* Try to combine the chains that are always worked with together.  */
3407  try_combine_chains ();
3408
3409  insert_init_seqs (m_loop, m_chains);
3410
3411  if (dump_file && (dump_flags & TDF_DETAILS))
3412    {
3413      fprintf (dump_file, "Before commoning:\n\n");
3414      dump_chains (dump_file, m_chains);
3415    }
3416
3417  if (allow_unroll_p)
3418    /* Determine the unroll factor, and if the loop should be unrolled, ensure
3419       that its number of iterations is divisible by the factor.  */
3420    unroll_factor = determine_unroll_factor (m_chains);
3421
3422  if (unroll_factor > 1)
3423    unroll = can_unroll_loop_p (m_loop, unroll_factor, &desc);
3424
3425  /* Execute the predictive commoning transformations, and possibly unroll the
3426     loop.  */
3427  if (unroll)
3428    {
3429      struct epcc_data dta;
3430
3431      if (dump_file && (dump_flags & TDF_DETAILS))
3432	fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
3433
3434      dta.tmp_vars = tmp_vars;
3435      dta.chains = m_chains.to_vec_legacy ();
3436      dta.worker = this;
3437
3438      /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3439	 execute_pred_commoning_cbck is called may cause phi nodes to be
3440	 reallocated, which is a problem since CHAINS may point to these
3441	 statements.  To fix this, we store the ssa names defined by the
3442	 phi nodes here instead of the phi nodes themselves, and restore
3443	 the phi nodes in execute_pred_commoning_cbck.  A bit hacky.  */
3444      replace_phis_by_defined_names (m_chains);
3445
3446      tree_transform_and_unroll_loop (m_loop, unroll_factor, &desc,
3447				      execute_pred_commoning_cbck, &dta);
3448      eliminate_temp_copies (m_loop, tmp_vars);
3449    }
3450  else
3451    {
3452      if (dump_file && (dump_flags & TDF_DETAILS))
3453	fprintf (dump_file,
3454		 "Executing predictive commoning without unrolling.\n");
3455      execute_pred_commoning (tmp_vars);
3456    }
3457
3458  return (unroll ? 2 : 1) | (loop_closed_ssa ? 4 : 1);
3459}
3460
3461/* Runs predictive commoning.  */
3462
3463unsigned
3464tree_predictive_commoning (bool allow_unroll_p)
3465{
3466  unsigned ret = 0, changed = 0;
3467
3468  initialize_original_copy_tables ();
3469  for (auto loop : loops_list (cfun, LI_ONLY_INNERMOST))
3470    if (optimize_loop_for_speed_p (loop))
3471      {
3472	pcom_worker w(loop);
3473	changed |= w.tree_predictive_commoning_loop (allow_unroll_p);
3474      }
3475  free_original_copy_tables ();
3476
3477  if (changed > 0)
3478    {
3479      ret = TODO_update_ssa_only_virtuals;
3480
3481      /* Some loop(s) got unrolled.  */
3482      if (changed > 1)
3483	{
3484	  scev_reset ();
3485
3486	  /* Need to fix up loop closed SSA.  */
3487	  if (changed >= 4)
3488	    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3489
3490	  ret |= TODO_cleanup_cfg;
3491	}
3492    }
3493
3494  return ret;
3495}
3496
3497/* Predictive commoning Pass.  */
3498
3499static unsigned
3500run_tree_predictive_commoning (struct function *fun, bool allow_unroll_p)
3501{
3502  if (number_of_loops (fun) <= 1)
3503    return 0;
3504
3505  return tree_predictive_commoning (allow_unroll_p);
3506}
3507
3508namespace {
3509
3510const pass_data pass_data_predcom =
3511{
3512  GIMPLE_PASS, /* type */
3513  "pcom", /* name */
3514  OPTGROUP_LOOP, /* optinfo_flags */
3515  TV_PREDCOM, /* tv_id */
3516  PROP_cfg, /* properties_required */
3517  0, /* properties_provided */
3518  0, /* properties_destroyed */
3519  0, /* todo_flags_start */
3520  0, /* todo_flags_finish */
3521};
3522
3523class pass_predcom : public gimple_opt_pass
3524{
3525public:
3526  pass_predcom (gcc::context *ctxt)
3527    : gimple_opt_pass (pass_data_predcom, ctxt)
3528  {}
3529
3530  /* opt_pass methods: */
3531  virtual bool
3532  gate (function *)
3533  {
3534    if (flag_predictive_commoning != 0)
3535      return true;
3536    /* Loop vectorization enables predictive commoning implicitly
3537       only if predictive commoning isn't set explicitly, and it
3538       doesn't allow unrolling.  */
3539    if (flag_tree_loop_vectorize
3540	&& !OPTION_SET_P (flag_predictive_commoning))
3541      return true;
3542
3543    return false;
3544  }
3545
3546  virtual unsigned int
3547  execute (function *fun)
3548  {
3549    bool allow_unroll_p = flag_predictive_commoning != 0;
3550    return run_tree_predictive_commoning (fun, allow_unroll_p);
3551  }
3552
3553}; // class pass_predcom
3554
3555} // anon namespace
3556
3557gimple_opt_pass *
3558make_pass_predcom (gcc::context *ctxt)
3559{
3560  return new pass_predcom (ctxt);
3561}
3562