1/* Induction variable optimizations.
2   Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 2, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING.  If not, write to the Free
18Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
1902110-1301, USA.  */
20
21/* This pass tries to find the optimal set of induction variables for the loop.
22   It optimizes just the basic linear induction variables (although adding
23   support for other types should not be too hard).  It includes the
24   optimizations commonly known as strength reduction, induction variable
25   coalescing and induction variable elimination.  It does it in the
26   following steps:
27
28   1) The interesting uses of induction variables are found.  This includes
29
30      -- uses of induction variables in non-linear expressions
31      -- addresses of arrays
32      -- comparisons of induction variables
33
34   2) Candidates for the induction variables are found.  This includes
35
36      -- old induction variables
37      -- the variables defined by expressions derived from the "interesting
38	 uses" above
39
40   3) The optimal (w.r. to a cost function) set of variables is chosen.  The
41      cost function assigns a cost to sets of induction variables and consists
42      of three parts:
43
44      -- The use costs.  Each of the interesting uses chooses the best induction
45	 variable in the set and adds its cost to the sum.  The cost reflects
46	 the time spent on modifying the induction variables value to be usable
47	 for the given purpose (adding base and offset for arrays, etc.).
48      -- The variable costs.  Each of the variables has a cost assigned that
49	 reflects the costs associated with incrementing the value of the
50	 variable.  The original variables are somewhat preferred.
51      -- The set cost.  Depending on the size of the set, extra cost may be
52	 added to reflect register pressure.
53
54      All the costs are defined in a machine-specific way, using the target
55      hooks and machine descriptions to determine them.
56
57   4) The trees are transformed to use the new variables, the dead code is
58      removed.
59
60   All of this is done loop by loop.  Doing it globally is theoretically
61   possible, it might give a better performance and it might enable us
62   to decide costs more precisely, but getting all the interactions right
63   would be complicated.  */
64
65#include "config.h"
66#include "system.h"
67#include "coretypes.h"
68#include "tm.h"
69#include "tree.h"
70#include "rtl.h"
71#include "tm_p.h"
72#include "hard-reg-set.h"
73#include "basic-block.h"
74#include "output.h"
75#include "diagnostic.h"
76#include "tree-flow.h"
77#include "tree-dump.h"
78#include "timevar.h"
79#include "cfgloop.h"
80#include "varray.h"
81#include "expr.h"
82#include "tree-pass.h"
83#include "ggc.h"
84#include "insn-config.h"
85#include "recog.h"
86#include "hashtab.h"
87#include "tree-chrec.h"
88#include "tree-scalar-evolution.h"
89#include "cfgloop.h"
90#include "params.h"
91#include "langhooks.h"
92
93/* The infinite cost.  */
94#define INFTY 10000000
95
96/* The expected number of loop iterations.  TODO -- use profiling instead of
97   this.  */
98#define AVG_LOOP_NITER(LOOP) 5
99
100
101/* Representation of the induction variable.  */
102struct iv
103{
104  tree base;		/* Initial value of the iv.  */
105  tree base_object;	/* A memory object to that the induction variable points.  */
106  tree step;		/* Step of the iv (constant only).  */
107  tree ssa_name;	/* The ssa name with the value.  */
108  bool biv_p;		/* Is it a biv?  */
109  bool have_use_for;	/* Do we already have a use for it?  */
110  unsigned use_id;	/* The identifier in the use if it is the case.  */
111};
112
113/* Per-ssa version information (induction variable descriptions, etc.).  */
114struct version_info
115{
116  tree name;		/* The ssa name.  */
117  struct iv *iv;	/* Induction variable description.  */
118  bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
119			   an expression that is not an induction variable.  */
120  unsigned inv_id;	/* Id of an invariant.  */
121  bool preserve_biv;	/* For the original biv, whether to preserve it.  */
122};
123
124/* Information attached to loop.  */
125struct loop_data
126{
127  unsigned regs_used;	/* Number of registers used.  */
128};
129
130/* Types of uses.  */
131enum use_type
132{
133  USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
134  USE_OUTER,		/* The induction variable is used outside the loop.  */
135  USE_ADDRESS,		/* Use in an address.  */
136  USE_COMPARE		/* Use is a compare.  */
137};
138
139/* The candidate - cost pair.  */
140struct cost_pair
141{
142  struct iv_cand *cand;	/* The candidate.  */
143  unsigned cost;	/* The cost.  */
144  bitmap depends_on;	/* The list of invariants that have to be
145			   preserved.  */
146  tree value;		/* For final value elimination, the expression for
147			   the final value of the iv.  For iv elimination,
148			   the new bound to compare with.  */
149};
150
151/* Use.  */
152struct iv_use
153{
154  unsigned id;		/* The id of the use.  */
155  enum use_type type;	/* Type of the use.  */
156  struct iv *iv;	/* The induction variable it is based on.  */
157  tree stmt;		/* Statement in that it occurs.  */
158  tree *op_p;		/* The place where it occurs.  */
159  bitmap related_cands;	/* The set of "related" iv candidates, plus the common
160			   important ones.  */
161
162  unsigned n_map_members; /* Number of candidates in the cost_map list.  */
163  struct cost_pair *cost_map;
164			/* The costs wrto the iv candidates.  */
165
166  struct iv_cand *selected;
167			/* The selected candidate.  */
168};
169
170/* The position where the iv is computed.  */
171enum iv_position
172{
173  IP_NORMAL,		/* At the end, just before the exit condition.  */
174  IP_END,		/* At the end of the latch block.  */
175  IP_ORIGINAL		/* The original biv.  */
176};
177
178/* The induction variable candidate.  */
179struct iv_cand
180{
181  unsigned id;		/* The number of the candidate.  */
182  bool important;	/* Whether this is an "important" candidate, i.e. such
183			   that it should be considered by all uses.  */
184  enum iv_position pos;	/* Where it is computed.  */
185  tree incremented_at;	/* For original biv, the statement where it is
186			   incremented.  */
187  tree var_before;	/* The variable used for it before increment.  */
188  tree var_after;	/* The variable used for it after increment.  */
189  struct iv *iv;	/* The value of the candidate.  NULL for
190			   "pseudocandidate" used to indicate the possibility
191			   to replace the final value of an iv by direct
192			   computation of the value.  */
193  unsigned cost;	/* Cost of the candidate.  */
194  bitmap depends_on;	/* The list of invariants that are used in step of the
195			   biv.  */
196};
197
198/* The data used by the induction variable optimizations.  */
199
200typedef struct iv_use *iv_use_p;
201DEF_VEC_P(iv_use_p);
202DEF_VEC_ALLOC_P(iv_use_p,heap);
203
204typedef struct iv_cand *iv_cand_p;
205DEF_VEC_P(iv_cand_p);
206DEF_VEC_ALLOC_P(iv_cand_p,heap);
207
208struct ivopts_data
209{
210  /* The currently optimized loop.  */
211  struct loop *current_loop;
212
213  /* Numbers of iterations for all exits of the current loop.  */
214  htab_t niters;
215
216  /* The size of version_info array allocated.  */
217  unsigned version_info_size;
218
219  /* The array of information for the ssa names.  */
220  struct version_info *version_info;
221
222  /* The bitmap of indices in version_info whose value was changed.  */
223  bitmap relevant;
224
225  /* The maximum invariant id.  */
226  unsigned max_inv_id;
227
228  /* The uses of induction variables.  */
229  VEC(iv_use_p,heap) *iv_uses;
230
231  /* The candidates.  */
232  VEC(iv_cand_p,heap) *iv_candidates;
233
234  /* A bitmap of important candidates.  */
235  bitmap important_candidates;
236
237  /* Whether to consider just related and important candidates when replacing a
238     use.  */
239  bool consider_all_candidates;
240};
241
242/* An assignment of iv candidates to uses.  */
243
244struct iv_ca
245{
246  /* The number of uses covered by the assignment.  */
247  unsigned upto;
248
249  /* Number of uses that cannot be expressed by the candidates in the set.  */
250  unsigned bad_uses;
251
252  /* Candidate assigned to a use, together with the related costs.  */
253  struct cost_pair **cand_for_use;
254
255  /* Number of times each candidate is used.  */
256  unsigned *n_cand_uses;
257
258  /* The candidates used.  */
259  bitmap cands;
260
261  /* The number of candidates in the set.  */
262  unsigned n_cands;
263
264  /* Total number of registers needed.  */
265  unsigned n_regs;
266
267  /* Total cost of expressing uses.  */
268  unsigned cand_use_cost;
269
270  /* Total cost of candidates.  */
271  unsigned cand_cost;
272
273  /* Number of times each invariant is used.  */
274  unsigned *n_invariant_uses;
275
276  /* Total cost of the assignment.  */
277  unsigned cost;
278};
279
280/* Difference of two iv candidate assignments.  */
281
282struct iv_ca_delta
283{
284  /* Changed use.  */
285  struct iv_use *use;
286
287  /* An old assignment (for rollback purposes).  */
288  struct cost_pair *old_cp;
289
290  /* A new assignment.  */
291  struct cost_pair *new_cp;
292
293  /* Next change in the list.  */
294  struct iv_ca_delta *next_change;
295};
296
297/* Bound on number of candidates below that all candidates are considered.  */
298
299#define CONSIDER_ALL_CANDIDATES_BOUND \
300  ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
301
302/* If there are more iv occurrences, we just give up (it is quite unlikely that
303   optimizing such a loop would help, and it would take ages).  */
304
305#define MAX_CONSIDERED_USES \
306  ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
307
308/* If there are at most this number of ivs in the set, try removing unnecessary
309   ivs from the set always.  */
310
311#define ALWAYS_PRUNE_CAND_SET_BOUND \
312  ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
313
314/* The list of trees for that the decl_rtl field must be reset is stored
315   here.  */
316
317static VEC(tree,heap) *decl_rtl_to_reset;
318
319/* Number of uses recorded in DATA.  */
320
321static inline unsigned
322n_iv_uses (struct ivopts_data *data)
323{
324  return VEC_length (iv_use_p, data->iv_uses);
325}
326
327/* Ith use recorded in DATA.  */
328
329static inline struct iv_use *
330iv_use (struct ivopts_data *data, unsigned i)
331{
332  return VEC_index (iv_use_p, data->iv_uses, i);
333}
334
335/* Number of candidates recorded in DATA.  */
336
337static inline unsigned
338n_iv_cands (struct ivopts_data *data)
339{
340  return VEC_length (iv_cand_p, data->iv_candidates);
341}
342
343/* Ith candidate recorded in DATA.  */
344
345static inline struct iv_cand *
346iv_cand (struct ivopts_data *data, unsigned i)
347{
348  return VEC_index (iv_cand_p, data->iv_candidates, i);
349}
350
351/* The data for LOOP.  */
352
353static inline struct loop_data *
354loop_data (struct loop *loop)
355{
356  return loop->aux;
357}
358
359/* The single loop exit if it dominates the latch, NULL otherwise.  */
360
361edge
362single_dom_exit (struct loop *loop)
363{
364  edge exit = loop->single_exit;
365
366  if (!exit)
367    return NULL;
368
369  if (!just_once_each_iteration_p (loop, exit->src))
370    return NULL;
371
372  return exit;
373}
374
375/* Dumps information about the induction variable IV to FILE.  */
376
377extern void dump_iv (FILE *, struct iv *);
378void
379dump_iv (FILE *file, struct iv *iv)
380{
381  if (iv->ssa_name)
382    {
383      fprintf (file, "ssa name ");
384      print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385      fprintf (file, "\n");
386    }
387
388  fprintf (file, "  type ");
389  print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390  fprintf (file, "\n");
391
392  if (iv->step)
393    {
394      fprintf (file, "  base ");
395      print_generic_expr (file, iv->base, TDF_SLIM);
396      fprintf (file, "\n");
397
398      fprintf (file, "  step ");
399      print_generic_expr (file, iv->step, TDF_SLIM);
400      fprintf (file, "\n");
401    }
402  else
403    {
404      fprintf (file, "  invariant ");
405      print_generic_expr (file, iv->base, TDF_SLIM);
406      fprintf (file, "\n");
407    }
408
409  if (iv->base_object)
410    {
411      fprintf (file, "  base object ");
412      print_generic_expr (file, iv->base_object, TDF_SLIM);
413      fprintf (file, "\n");
414    }
415
416  if (iv->biv_p)
417    fprintf (file, "  is a biv\n");
418}
419
420/* Dumps information about the USE to FILE.  */
421
422extern void dump_use (FILE *, struct iv_use *);
423void
424dump_use (FILE *file, struct iv_use *use)
425{
426  fprintf (file, "use %d\n", use->id);
427
428  switch (use->type)
429    {
430    case USE_NONLINEAR_EXPR:
431      fprintf (file, "  generic\n");
432      break;
433
434    case USE_OUTER:
435      fprintf (file, "  outside\n");
436      break;
437
438    case USE_ADDRESS:
439      fprintf (file, "  address\n");
440      break;
441
442    case USE_COMPARE:
443      fprintf (file, "  compare\n");
444      break;
445
446    default:
447      gcc_unreachable ();
448    }
449
450  fprintf (file, "  in statement ");
451  print_generic_expr (file, use->stmt, TDF_SLIM);
452  fprintf (file, "\n");
453
454  fprintf (file, "  at position ");
455  if (use->op_p)
456    print_generic_expr (file, *use->op_p, TDF_SLIM);
457  fprintf (file, "\n");
458
459  dump_iv (file, use->iv);
460
461  if (use->related_cands)
462    {
463      fprintf (file, "  related candidates ");
464      dump_bitmap (file, use->related_cands);
465    }
466}
467
468/* Dumps information about the uses to FILE.  */
469
470extern void dump_uses (FILE *, struct ivopts_data *);
471void
472dump_uses (FILE *file, struct ivopts_data *data)
473{
474  unsigned i;
475  struct iv_use *use;
476
477  for (i = 0; i < n_iv_uses (data); i++)
478    {
479      use = iv_use (data, i);
480
481      dump_use (file, use);
482      fprintf (file, "\n");
483    }
484}
485
486/* Dumps information about induction variable candidate CAND to FILE.  */
487
488extern void dump_cand (FILE *, struct iv_cand *);
489void
490dump_cand (FILE *file, struct iv_cand *cand)
491{
492  struct iv *iv = cand->iv;
493
494  fprintf (file, "candidate %d%s\n",
495	   cand->id, cand->important ? " (important)" : "");
496
497  if (cand->depends_on)
498    {
499      fprintf (file, "  depends on ");
500      dump_bitmap (file, cand->depends_on);
501    }
502
503  if (!iv)
504    {
505      fprintf (file, "  final value replacement\n");
506      return;
507    }
508
509  switch (cand->pos)
510    {
511    case IP_NORMAL:
512      fprintf (file, "  incremented before exit test\n");
513      break;
514
515    case IP_END:
516      fprintf (file, "  incremented at end\n");
517      break;
518
519    case IP_ORIGINAL:
520      fprintf (file, "  original biv\n");
521      break;
522    }
523
524  dump_iv (file, iv);
525}
526
527/* Returns the info for ssa version VER.  */
528
529static inline struct version_info *
530ver_info (struct ivopts_data *data, unsigned ver)
531{
532  return data->version_info + ver;
533}
534
535/* Returns the info for ssa name NAME.  */
536
537static inline struct version_info *
538name_info (struct ivopts_data *data, tree name)
539{
540  return ver_info (data, SSA_NAME_VERSION (name));
541}
542
543/* Checks whether there exists number X such that X * B = A, counting modulo
544   2^BITS.  */
545
546static bool
547divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
548	HOST_WIDE_INT *x)
549{
550  unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551  unsigned HOST_WIDE_INT inv, ex, val;
552  unsigned i;
553
554  a &= mask;
555  b &= mask;
556
557  /* First divide the whole equation by 2 as long as possible.  */
558  while (!(a & 1) && !(b & 1))
559    {
560      a >>= 1;
561      b >>= 1;
562      bits--;
563      mask >>= 1;
564    }
565
566  if (!(b & 1))
567    {
568      /* If b is still even, a is odd and there is no such x.  */
569      return false;
570    }
571
572  /* Find the inverse of b.  We compute it as
573     b^(2^(bits - 1) - 1) (mod 2^bits).  */
574  inv = 1;
575  ex = b;
576  for (i = 0; i < bits - 1; i++)
577    {
578      inv = (inv * ex) & mask;
579      ex = (ex * ex) & mask;
580    }
581
582  val = (a * inv) & mask;
583
584  gcc_assert (((val * b) & mask) == a);
585
586  if ((val >> (bits - 1)) & 1)
587    val |= ~mask;
588
589  *x = val;
590
591  return true;
592}
593
594/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
595   emitted in LOOP.  */
596
597static bool
598stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
599{
600  basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
601
602  gcc_assert (bb);
603
604  if (sbb == loop->latch)
605    return true;
606
607  if (sbb != bb)
608    return false;
609
610  return stmt == last_stmt (bb);
611}
612
613/* Returns true if STMT if after the place where the original induction
614   variable CAND is incremented.  */
615
616static bool
617stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
618{
619  basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620  basic_block stmt_bb = bb_for_stmt (stmt);
621  block_stmt_iterator bsi;
622
623  if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
624    return false;
625
626  if (stmt_bb != cand_bb)
627    return true;
628
629  /* Scan the block from the end, since the original ivs are usually
630     incremented at the end of the loop body.  */
631  for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
632    {
633      if (bsi_stmt (bsi) == cand->incremented_at)
634	return false;
635      if (bsi_stmt (bsi) == stmt)
636	return true;
637    }
638}
639
640/* Returns true if STMT if after the place where the induction variable
641   CAND is incremented in LOOP.  */
642
643static bool
644stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
645{
646  switch (cand->pos)
647    {
648    case IP_END:
649      return false;
650
651    case IP_NORMAL:
652      return stmt_after_ip_normal_pos (loop, stmt);
653
654    case IP_ORIGINAL:
655      return stmt_after_ip_original_pos (cand, stmt);
656
657    default:
658      gcc_unreachable ();
659    }
660}
661
662/* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
663
664static bool
665abnormal_ssa_name_p (tree exp)
666{
667  if (!exp)
668    return false;
669
670  if (TREE_CODE (exp) != SSA_NAME)
671    return false;
672
673  return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
674}
675
676/* Returns false if BASE or INDEX contains a ssa name that occurs in an
677   abnormal phi node.  Callback for for_each_index.  */
678
679static bool
680idx_contains_abnormal_ssa_name_p (tree base, tree *index,
681				  void *data ATTRIBUTE_UNUSED)
682{
683  if (TREE_CODE (base) == ARRAY_REF)
684    {
685      if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
686	return false;
687      if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
688	return false;
689    }
690
691  return !abnormal_ssa_name_p (*index);
692}
693
694/* Returns true if EXPR contains a ssa name that occurs in an
695   abnormal phi node.  */
696
697bool
698contains_abnormal_ssa_name_p (tree expr)
699{
700  enum tree_code code;
701  enum tree_code_class class;
702
703  if (!expr)
704    return false;
705
706  code = TREE_CODE (expr);
707  class = TREE_CODE_CLASS (code);
708
709  if (code == SSA_NAME)
710    return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
711
712  if (code == INTEGER_CST
713      || is_gimple_min_invariant (expr))
714    return false;
715
716  if (code == ADDR_EXPR)
717    return !for_each_index (&TREE_OPERAND (expr, 0),
718			    idx_contains_abnormal_ssa_name_p,
719			    NULL);
720
721  switch (class)
722    {
723    case tcc_binary:
724    case tcc_comparison:
725      if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
726	return true;
727
728      /* Fallthru.  */
729    case tcc_unary:
730      if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
731	return true;
732
733      break;
734
735    default:
736      gcc_unreachable ();
737    }
738
739  return false;
740}
741
742/* Element of the table in that we cache the numbers of iterations obtained
743   from exits of the loop.  */
744
745struct nfe_cache_elt
746{
747  /* The edge for that the number of iterations is cached.  */
748  edge exit;
749
750  /* Number of iterations corresponding to this exit, or NULL if it cannot be
751     determined.  */
752  tree niter;
753};
754
755/* Hash function for nfe_cache_elt E.  */
756
757static hashval_t
758nfe_hash (const void *e)
759{
760  const struct nfe_cache_elt *elt = e;
761
762  return htab_hash_pointer (elt->exit);
763}
764
765/* Equality function for nfe_cache_elt E1 and edge E2.  */
766
767static int
768nfe_eq (const void *e1, const void *e2)
769{
770  const struct nfe_cache_elt *elt1 = e1;
771
772  return elt1->exit == e2;
773}
774
775/*  Returns tree describing number of iterations determined from
776    EXIT of DATA->current_loop, or NULL if something goes wrong.  */
777
778static tree
779niter_for_exit (struct ivopts_data *data, edge exit)
780{
781  struct nfe_cache_elt *nfe_desc;
782  struct tree_niter_desc desc;
783  PTR *slot;
784
785  slot = htab_find_slot_with_hash (data->niters, exit,
786				   htab_hash_pointer (exit),
787				   INSERT);
788
789  if (!*slot)
790    {
791      nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
792      nfe_desc->exit = exit;
793
794      /* Try to determine number of iterations.  We must know it
795	 unconditionally (i.e., without possibility of # of iterations
796	 being zero).  Also, we cannot safely work with ssa names that
797	 appear in phi nodes on abnormal edges, so that we do not create
798	 overlapping life ranges for them (PR 27283).  */
799      if (number_of_iterations_exit (data->current_loop,
800				     exit, &desc, true)
801	  && zero_p (desc.may_be_zero)
802     	  && !contains_abnormal_ssa_name_p (desc.niter))
803	nfe_desc->niter = desc.niter;
804      else
805	nfe_desc->niter = NULL_TREE;
806    }
807  else
808    nfe_desc = *slot;
809
810  return nfe_desc->niter;
811}
812
813/* Returns tree describing number of iterations determined from
814   single dominating exit of DATA->current_loop, or NULL if something
815   goes wrong.  */
816
817static tree
818niter_for_single_dom_exit (struct ivopts_data *data)
819{
820  edge exit = single_dom_exit (data->current_loop);
821
822  if (!exit)
823    return NULL;
824
825  return niter_for_exit (data, exit);
826}
827
828/* Initializes data structures used by the iv optimization pass, stored
829   in DATA.  LOOPS is the loop tree.  */
830
831static void
832tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
833{
834  unsigned i;
835
836  data->version_info_size = 2 * num_ssa_names;
837  data->version_info = xcalloc (data->version_info_size,
838				sizeof (struct version_info));
839  data->relevant = BITMAP_ALLOC (NULL);
840  data->important_candidates = BITMAP_ALLOC (NULL);
841  data->max_inv_id = 0;
842  data->niters = htab_create (10, nfe_hash, nfe_eq, free);
843
844  for (i = 1; i < loops->num; i++)
845    if (loops->parray[i])
846      loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
847
848  data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
849  data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
850  decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
851}
852
853/* Returns a memory object to that EXPR points.  In case we are able to
854   determine that it does not point to any such object, NULL is returned.  */
855
856static tree
857determine_base_object (tree expr)
858{
859  enum tree_code code = TREE_CODE (expr);
860  tree base, obj, op0, op1;
861
862  if (!POINTER_TYPE_P (TREE_TYPE (expr)))
863    return NULL_TREE;
864
865  switch (code)
866    {
867    case INTEGER_CST:
868      return NULL_TREE;
869
870    case ADDR_EXPR:
871      obj = TREE_OPERAND (expr, 0);
872      base = get_base_address (obj);
873
874      if (!base)
875	return expr;
876
877      if (TREE_CODE (base) == INDIRECT_REF)
878	return determine_base_object (TREE_OPERAND (base, 0));
879
880      return fold_convert (ptr_type_node,
881		           build_fold_addr_expr (base));
882
883    case PLUS_EXPR:
884    case MINUS_EXPR:
885      op0 = determine_base_object (TREE_OPERAND (expr, 0));
886      op1 = determine_base_object (TREE_OPERAND (expr, 1));
887
888      if (!op1)
889	return op0;
890
891      if (!op0)
892	return (code == PLUS_EXPR
893		? op1
894		: fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
895
896      return fold_build2 (code, ptr_type_node, op0, op1);
897
898    case NOP_EXPR:
899    case CONVERT_EXPR:
900      return determine_base_object (TREE_OPERAND (expr, 0));
901
902    default:
903      return fold_convert (ptr_type_node, expr);
904    }
905}
906
907/* Allocates an induction variable with given initial value BASE and step STEP
908   for loop LOOP.  */
909
910static struct iv *
911alloc_iv (tree base, tree step)
912{
913  struct iv *iv = xcalloc (1, sizeof (struct iv));
914
915  if (step && integer_zerop (step))
916    step = NULL_TREE;
917
918  iv->base = base;
919  iv->base_object = determine_base_object (base);
920  iv->step = step;
921  iv->biv_p = false;
922  iv->have_use_for = false;
923  iv->use_id = 0;
924  iv->ssa_name = NULL_TREE;
925
926  return iv;
927}
928
929/* Sets STEP and BASE for induction variable IV.  */
930
931static void
932set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
933{
934  struct version_info *info = name_info (data, iv);
935
936  gcc_assert (!info->iv);
937
938  bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
939  info->iv = alloc_iv (base, step);
940  info->iv->ssa_name = iv;
941}
942
943/* Finds induction variable declaration for VAR.  */
944
945static struct iv *
946get_iv (struct ivopts_data *data, tree var)
947{
948  basic_block bb;
949
950  if (!name_info (data, var)->iv)
951    {
952      bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
953
954      if (!bb
955	  || !flow_bb_inside_loop_p (data->current_loop, bb))
956	set_iv (data, var, var, NULL_TREE);
957    }
958
959  return name_info (data, var)->iv;
960}
961
962/* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
963   not define a simple affine biv with nonzero step.  */
964
965static tree
966determine_biv_step (tree phi)
967{
968  struct loop *loop = bb_for_stmt (phi)->loop_father;
969  tree name = PHI_RESULT (phi);
970  affine_iv iv;
971
972  if (!is_gimple_reg (name))
973    return NULL_TREE;
974
975  if (!simple_iv (loop, phi, name, &iv, true))
976    return NULL_TREE;
977
978  return (zero_p (iv.step) ? NULL_TREE : iv.step);
979}
980
981/* Finds basic ivs.  */
982
983static bool
984find_bivs (struct ivopts_data *data)
985{
986  tree phi, step, type, base;
987  bool found = false;
988  struct loop *loop = data->current_loop;
989
990  for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
991    {
992      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
993	continue;
994
995      step = determine_biv_step (phi);
996      if (!step)
997	continue;
998
999      base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1000      base = expand_simple_operations (base);
1001      if (contains_abnormal_ssa_name_p (base)
1002	  || contains_abnormal_ssa_name_p (step))
1003	continue;
1004
1005      type = TREE_TYPE (PHI_RESULT (phi));
1006      base = fold_convert (type, base);
1007      if (step)
1008	step = fold_convert (type, step);
1009
1010      set_iv (data, PHI_RESULT (phi), base, step);
1011      found = true;
1012    }
1013
1014  return found;
1015}
1016
1017/* Marks basic ivs.  */
1018
1019static void
1020mark_bivs (struct ivopts_data *data)
1021{
1022  tree phi, var;
1023  struct iv *iv, *incr_iv;
1024  struct loop *loop = data->current_loop;
1025  basic_block incr_bb;
1026
1027  for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1028    {
1029      iv = get_iv (data, PHI_RESULT (phi));
1030      if (!iv)
1031	continue;
1032
1033      var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1034      incr_iv = get_iv (data, var);
1035      if (!incr_iv)
1036	continue;
1037
1038      /* If the increment is in the subloop, ignore it.  */
1039      incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1040      if (incr_bb->loop_father != data->current_loop
1041	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1042	continue;
1043
1044      iv->biv_p = true;
1045      incr_iv->biv_p = true;
1046    }
1047}
1048
1049/* Checks whether STMT defines a linear induction variable and stores its
1050   parameters to IV.  */
1051
1052static bool
1053find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1054{
1055  tree lhs;
1056  struct loop *loop = data->current_loop;
1057
1058  iv->base = NULL_TREE;
1059  iv->step = NULL_TREE;
1060
1061  if (TREE_CODE (stmt) != MODIFY_EXPR)
1062    return false;
1063
1064  lhs = TREE_OPERAND (stmt, 0);
1065  if (TREE_CODE (lhs) != SSA_NAME)
1066    return false;
1067
1068  if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1069    return false;
1070  iv->base = expand_simple_operations (iv->base);
1071
1072  if (contains_abnormal_ssa_name_p (iv->base)
1073      || contains_abnormal_ssa_name_p (iv->step))
1074    return false;
1075
1076  return true;
1077}
1078
1079/* Finds general ivs in statement STMT.  */
1080
1081static void
1082find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1083{
1084  affine_iv iv;
1085
1086  if (!find_givs_in_stmt_scev (data, stmt, &iv))
1087    return;
1088
1089  set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1090}
1091
1092/* Finds general ivs in basic block BB.  */
1093
1094static void
1095find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1096{
1097  block_stmt_iterator bsi;
1098
1099  for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1100    find_givs_in_stmt (data, bsi_stmt (bsi));
1101}
1102
1103/* Finds general ivs.  */
1104
1105static void
1106find_givs (struct ivopts_data *data)
1107{
1108  struct loop *loop = data->current_loop;
1109  basic_block *body = get_loop_body_in_dom_order (loop);
1110  unsigned i;
1111
1112  for (i = 0; i < loop->num_nodes; i++)
1113    find_givs_in_bb (data, body[i]);
1114  free (body);
1115}
1116
1117/* For each ssa name defined in LOOP determines whether it is an induction
1118   variable and if so, its initial value and step.  */
1119
1120static bool
1121find_induction_variables (struct ivopts_data *data)
1122{
1123  unsigned i;
1124  bitmap_iterator bi;
1125
1126  if (!find_bivs (data))
1127    return false;
1128
1129  find_givs (data);
1130  mark_bivs (data);
1131
1132  if (dump_file && (dump_flags & TDF_DETAILS))
1133    {
1134      tree niter = niter_for_single_dom_exit (data);
1135
1136      if (niter)
1137	{
1138	  fprintf (dump_file, "  number of iterations ");
1139	  print_generic_expr (dump_file, niter, TDF_SLIM);
1140	  fprintf (dump_file, "\n\n");
1141    	};
1142
1143      fprintf (dump_file, "Induction variables:\n\n");
1144
1145      EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1146	{
1147	  if (ver_info (data, i)->iv)
1148	    dump_iv (dump_file, ver_info (data, i)->iv);
1149	}
1150    }
1151
1152  return true;
1153}
1154
1155/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1156
1157static struct iv_use *
1158record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1159	    tree stmt, enum use_type use_type)
1160{
1161  struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1162
1163  use->id = n_iv_uses (data);
1164  use->type = use_type;
1165  use->iv = iv;
1166  use->stmt = stmt;
1167  use->op_p = use_p;
1168  use->related_cands = BITMAP_ALLOC (NULL);
1169
1170  /* To avoid showing ssa name in the dumps, if it was not reset by the
1171     caller.  */
1172  iv->ssa_name = NULL_TREE;
1173
1174  if (dump_file && (dump_flags & TDF_DETAILS))
1175    dump_use (dump_file, use);
1176
1177  VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1178
1179  return use;
1180}
1181
1182/* Checks whether OP is a loop-level invariant and if so, records it.
1183   NONLINEAR_USE is true if the invariant is used in a way we do not
1184   handle specially.  */
1185
1186static void
1187record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1188{
1189  basic_block bb;
1190  struct version_info *info;
1191
1192  if (TREE_CODE (op) != SSA_NAME
1193      || !is_gimple_reg (op))
1194    return;
1195
1196  bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1197  if (bb
1198      && flow_bb_inside_loop_p (data->current_loop, bb))
1199    return;
1200
1201  info = name_info (data, op);
1202  info->name = op;
1203  info->has_nonlin_use |= nonlinear_use;
1204  if (!info->inv_id)
1205    info->inv_id = ++data->max_inv_id;
1206  bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1207}
1208
1209/* Checks whether the use OP is interesting and if so, records it
1210   as TYPE.  */
1211
1212static struct iv_use *
1213find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1214				       enum use_type type)
1215{
1216  struct iv *iv;
1217  struct iv *civ;
1218  tree stmt;
1219  struct iv_use *use;
1220
1221  if (TREE_CODE (op) != SSA_NAME)
1222    return NULL;
1223
1224  iv = get_iv (data, op);
1225  if (!iv)
1226    return NULL;
1227
1228  if (iv->have_use_for)
1229    {
1230      use = iv_use (data, iv->use_id);
1231
1232      gcc_assert (use->type == USE_NONLINEAR_EXPR
1233		  || use->type == USE_OUTER);
1234
1235      if (type == USE_NONLINEAR_EXPR)
1236	use->type = USE_NONLINEAR_EXPR;
1237      return use;
1238    }
1239
1240  if (zero_p (iv->step))
1241    {
1242      record_invariant (data, op, true);
1243      return NULL;
1244    }
1245  iv->have_use_for = true;
1246
1247  civ = xmalloc (sizeof (struct iv));
1248  *civ = *iv;
1249
1250  stmt = SSA_NAME_DEF_STMT (op);
1251  gcc_assert (TREE_CODE (stmt) == PHI_NODE
1252	      || TREE_CODE (stmt) == MODIFY_EXPR);
1253
1254  use = record_use (data, NULL, civ, stmt, type);
1255  iv->use_id = use->id;
1256
1257  return use;
1258}
1259
1260/* Checks whether the use OP is interesting and if so, records it.  */
1261
1262static struct iv_use *
1263find_interesting_uses_op (struct ivopts_data *data, tree op)
1264{
1265  return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1266}
1267
1268/* Records a definition of induction variable OP that is used outside of the
1269   loop.  */
1270
1271static struct iv_use *
1272find_interesting_uses_outer (struct ivopts_data *data, tree op)
1273{
1274  return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1275}
1276
1277/* Checks whether the condition *COND_P in STMT is interesting
1278   and if so, records it.  */
1279
1280static void
1281find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1282{
1283  tree *op0_p;
1284  tree *op1_p;
1285  struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1286  struct iv const_iv;
1287  tree zero = integer_zero_node;
1288
1289  const_iv.step = NULL_TREE;
1290
1291  if (TREE_CODE (*cond_p) != SSA_NAME
1292      && !COMPARISON_CLASS_P (*cond_p))
1293    return;
1294
1295  if (TREE_CODE (*cond_p) == SSA_NAME)
1296    {
1297      op0_p = cond_p;
1298      op1_p = &zero;
1299    }
1300  else
1301    {
1302      op0_p = &TREE_OPERAND (*cond_p, 0);
1303      op1_p = &TREE_OPERAND (*cond_p, 1);
1304    }
1305
1306  if (TREE_CODE (*op0_p) == SSA_NAME)
1307    iv0 = get_iv (data, *op0_p);
1308  else
1309    iv0 = &const_iv;
1310
1311  if (TREE_CODE (*op1_p) == SSA_NAME)
1312    iv1 = get_iv (data, *op1_p);
1313  else
1314    iv1 = &const_iv;
1315
1316  if (/* When comparing with non-invariant value, we may not do any senseful
1317	 induction variable elimination.  */
1318      (!iv0 || !iv1)
1319      /* Eliminating condition based on two ivs would be nontrivial.
1320	 ??? TODO -- it is not really important to handle this case.  */
1321      || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1322    {
1323      find_interesting_uses_op (data, *op0_p);
1324      find_interesting_uses_op (data, *op1_p);
1325      return;
1326    }
1327
1328  if (zero_p (iv0->step) && zero_p (iv1->step))
1329    {
1330      /* If both are invariants, this is a work for unswitching.  */
1331      return;
1332    }
1333
1334  civ = xmalloc (sizeof (struct iv));
1335  *civ = zero_p (iv0->step) ? *iv1: *iv0;
1336  record_use (data, cond_p, civ, stmt, USE_COMPARE);
1337}
1338
1339/* Returns true if expression EXPR is obviously invariant in LOOP,
1340   i.e. if all its operands are defined outside of the LOOP.  */
1341
1342bool
1343expr_invariant_in_loop_p (struct loop *loop, tree expr)
1344{
1345  basic_block def_bb;
1346  unsigned i, len;
1347
1348  if (is_gimple_min_invariant (expr))
1349    return true;
1350
1351  if (TREE_CODE (expr) == SSA_NAME)
1352    {
1353      def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1354      if (def_bb
1355	  && flow_bb_inside_loop_p (loop, def_bb))
1356	return false;
1357
1358      return true;
1359    }
1360
1361  if (!EXPR_P (expr))
1362    return false;
1363
1364  len = TREE_CODE_LENGTH (TREE_CODE (expr));
1365  for (i = 0; i < len; i++)
1366    if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1367      return false;
1368
1369  return true;
1370}
1371
1372/* Cumulates the steps of indices into DATA and replaces their values with the
1373   initial ones.  Returns false when the value of the index cannot be determined.
1374   Callback for for_each_index.  */
1375
1376struct ifs_ivopts_data
1377{
1378  struct ivopts_data *ivopts_data;
1379  tree stmt;
1380  tree *step_p;
1381};
1382
1383static bool
1384idx_find_step (tree base, tree *idx, void *data)
1385{
1386  struct ifs_ivopts_data *dta = data;
1387  struct iv *iv;
1388  tree step, iv_base, iv_step, lbound, off;
1389  struct loop *loop = dta->ivopts_data->current_loop;
1390
1391  if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1392      || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1393    return false;
1394
1395  /* If base is a component ref, require that the offset of the reference
1396     be invariant.  */
1397  if (TREE_CODE (base) == COMPONENT_REF)
1398    {
1399      off = component_ref_field_offset (base);
1400      return expr_invariant_in_loop_p (loop, off);
1401    }
1402
1403  /* If base is array, first check whether we will be able to move the
1404     reference out of the loop (in order to take its address in strength
1405     reduction).  In order for this to work we need both lower bound
1406     and step to be loop invariants.  */
1407  if (TREE_CODE (base) == ARRAY_REF)
1408    {
1409      step = array_ref_element_size (base);
1410      lbound = array_ref_low_bound (base);
1411
1412      if (!expr_invariant_in_loop_p (loop, step)
1413	  || !expr_invariant_in_loop_p (loop, lbound))
1414	return false;
1415    }
1416
1417  if (TREE_CODE (*idx) != SSA_NAME)
1418    return true;
1419
1420  iv = get_iv (dta->ivopts_data, *idx);
1421  if (!iv)
1422    return false;
1423
1424  *idx = iv->base;
1425
1426  if (!iv->step)
1427    return true;
1428
1429  if (TREE_CODE (base) == ARRAY_REF)
1430    {
1431      step = array_ref_element_size (base);
1432
1433      /* We only handle addresses whose step is an integer constant.  */
1434      if (TREE_CODE (step) != INTEGER_CST)
1435	return false;
1436    }
1437  else
1438    /* The step for pointer arithmetics already is 1 byte.  */
1439    step = build_int_cst (sizetype, 1);
1440
1441  iv_base = iv->base;
1442  iv_step = iv->step;
1443  if (!convert_affine_scev (dta->ivopts_data->current_loop,
1444			    sizetype, &iv_base, &iv_step, dta->stmt,
1445			    false))
1446    {
1447      /* The index might wrap.  */
1448      return false;
1449    }
1450
1451  step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1452
1453  if (!*dta->step_p)
1454    *dta->step_p = step;
1455  else
1456    *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1457
1458  return true;
1459}
1460
1461/* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1462   object is passed to it in DATA.  */
1463
1464static bool
1465idx_record_use (tree base, tree *idx,
1466		void *data)
1467{
1468  find_interesting_uses_op (data, *idx);
1469  if (TREE_CODE (base) == ARRAY_REF)
1470    {
1471      find_interesting_uses_op (data, array_ref_element_size (base));
1472      find_interesting_uses_op (data, array_ref_low_bound (base));
1473    }
1474  return true;
1475}
1476
1477/* Returns true if memory reference REF may be unaligned.  */
1478
1479static bool
1480may_be_unaligned_p (tree ref)
1481{
1482  tree base;
1483  tree base_type;
1484  HOST_WIDE_INT bitsize;
1485  HOST_WIDE_INT bitpos;
1486  tree toffset;
1487  enum machine_mode mode;
1488  int unsignedp, volatilep;
1489  unsigned base_align;
1490
1491  /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1492     thus they are not misaligned.  */
1493  if (TREE_CODE (ref) == TARGET_MEM_REF)
1494    return false;
1495
1496  /* The test below is basically copy of what expr.c:normal_inner_ref
1497     does to check whether the object must be loaded by parts when
1498     STRICT_ALIGNMENT is true.  */
1499  base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1500			      &unsignedp, &volatilep, true);
1501  base_type = TREE_TYPE (base);
1502  base_align = TYPE_ALIGN (base_type);
1503
1504  if (mode != BLKmode
1505      && (base_align < GET_MODE_ALIGNMENT (mode)
1506	  || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1507	  || bitpos % BITS_PER_UNIT != 0))
1508    return true;
1509
1510  return false;
1511}
1512
1513/* Finds addresses in *OP_P inside STMT.  */
1514
1515static void
1516find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1517{
1518  tree base = *op_p, step = NULL;
1519  struct iv *civ;
1520  struct ifs_ivopts_data ifs_ivopts_data;
1521
1522  /* Do not play with volatile memory references.  A bit too conservative,
1523     perhaps, but safe.  */
1524  if (stmt_ann (stmt)->has_volatile_ops)
1525    goto fail;
1526
1527  /* Ignore bitfields for now.  Not really something terribly complicated
1528     to handle.  TODO.  */
1529  if (TREE_CODE (base) == BIT_FIELD_REF
1530      || (TREE_CODE (base) == COMPONENT_REF
1531	  && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1))))
1532    goto fail;
1533
1534  if (STRICT_ALIGNMENT
1535      && may_be_unaligned_p (base))
1536    goto fail;
1537
1538  base = unshare_expr (base);
1539
1540  if (TREE_CODE (base) == TARGET_MEM_REF)
1541    {
1542      tree type = build_pointer_type (TREE_TYPE (base));
1543      tree astep;
1544
1545      if (TMR_BASE (base)
1546	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1547	{
1548	  civ = get_iv (data, TMR_BASE (base));
1549	  if (!civ)
1550	    goto fail;
1551
1552	  TMR_BASE (base) = civ->base;
1553	  step = civ->step;
1554	}
1555      if (TMR_INDEX (base)
1556	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1557	{
1558	  civ = get_iv (data, TMR_INDEX (base));
1559	  if (!civ)
1560	    goto fail;
1561
1562	  TMR_INDEX (base) = civ->base;
1563	  astep = civ->step;
1564
1565	  if (astep)
1566	    {
1567	      if (TMR_STEP (base))
1568		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1569
1570	      if (step)
1571		step = fold_build2 (PLUS_EXPR, type, step, astep);
1572	      else
1573		step = astep;
1574	    }
1575	}
1576
1577      if (zero_p (step))
1578	goto fail;
1579      base = tree_mem_ref_addr (type, base);
1580    }
1581  else
1582    {
1583      ifs_ivopts_data.ivopts_data = data;
1584      ifs_ivopts_data.stmt = stmt;
1585      ifs_ivopts_data.step_p = &step;
1586      if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1587	  || zero_p (step))
1588	goto fail;
1589
1590      gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1591      gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1592
1593      base = build_fold_addr_expr (base);
1594    }
1595
1596  civ = alloc_iv (base, step);
1597  record_use (data, op_p, civ, stmt, USE_ADDRESS);
1598  return;
1599
1600fail:
1601  for_each_index (op_p, idx_record_use, data);
1602}
1603
1604/* Finds and records invariants used in STMT.  */
1605
1606static void
1607find_invariants_stmt (struct ivopts_data *data, tree stmt)
1608{
1609  ssa_op_iter iter;
1610  use_operand_p use_p;
1611  tree op;
1612
1613  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1614    {
1615      op = USE_FROM_PTR (use_p);
1616      record_invariant (data, op, false);
1617    }
1618}
1619
1620/* Finds interesting uses of induction variables in the statement STMT.  */
1621
1622static void
1623find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1624{
1625  struct iv *iv;
1626  tree op, lhs, rhs;
1627  ssa_op_iter iter;
1628  use_operand_p use_p;
1629
1630  find_invariants_stmt (data, stmt);
1631
1632  if (TREE_CODE (stmt) == COND_EXPR)
1633    {
1634      find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1635      return;
1636    }
1637
1638  if (TREE_CODE (stmt) == MODIFY_EXPR)
1639    {
1640      lhs = TREE_OPERAND (stmt, 0);
1641      rhs = TREE_OPERAND (stmt, 1);
1642
1643      if (TREE_CODE (lhs) == SSA_NAME)
1644	{
1645	  /* If the statement defines an induction variable, the uses are not
1646	     interesting by themselves.  */
1647
1648	  iv = get_iv (data, lhs);
1649
1650	  if (iv && !zero_p (iv->step))
1651	    return;
1652	}
1653
1654      switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1655	{
1656	case tcc_comparison:
1657	  find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1658	  return;
1659
1660	case tcc_reference:
1661	  find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1662	  if (REFERENCE_CLASS_P (lhs))
1663	    find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1664	  return;
1665
1666	default: ;
1667	}
1668
1669      if (REFERENCE_CLASS_P (lhs)
1670	  && is_gimple_val (rhs))
1671	{
1672	  find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1673	  find_interesting_uses_op (data, rhs);
1674	  return;
1675	}
1676
1677      /* TODO -- we should also handle address uses of type
1678
1679	 memory = call (whatever);
1680
1681	 and
1682
1683	 call (memory).  */
1684    }
1685
1686  if (TREE_CODE (stmt) == PHI_NODE
1687      && bb_for_stmt (stmt) == data->current_loop->header)
1688    {
1689      lhs = PHI_RESULT (stmt);
1690      iv = get_iv (data, lhs);
1691
1692      if (iv && !zero_p (iv->step))
1693	return;
1694    }
1695
1696  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1697    {
1698      op = USE_FROM_PTR (use_p);
1699
1700      if (TREE_CODE (op) != SSA_NAME)
1701	continue;
1702
1703      iv = get_iv (data, op);
1704      if (!iv)
1705	continue;
1706
1707      find_interesting_uses_op (data, op);
1708    }
1709}
1710
1711/* Finds interesting uses of induction variables outside of loops
1712   on loop exit edge EXIT.  */
1713
1714static void
1715find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1716{
1717  tree phi, def;
1718
1719  for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1720    {
1721      def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1722      find_interesting_uses_outer (data, def);
1723    }
1724}
1725
1726/* Finds uses of the induction variables that are interesting.  */
1727
1728static void
1729find_interesting_uses (struct ivopts_data *data)
1730{
1731  basic_block bb;
1732  block_stmt_iterator bsi;
1733  tree phi;
1734  basic_block *body = get_loop_body (data->current_loop);
1735  unsigned i;
1736  struct version_info *info;
1737  edge e;
1738
1739  if (dump_file && (dump_flags & TDF_DETAILS))
1740    fprintf (dump_file, "Uses:\n\n");
1741
1742  for (i = 0; i < data->current_loop->num_nodes; i++)
1743    {
1744      edge_iterator ei;
1745      bb = body[i];
1746
1747      FOR_EACH_EDGE (e, ei, bb->succs)
1748	if (e->dest != EXIT_BLOCK_PTR
1749	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1750	  find_interesting_uses_outside (data, e);
1751
1752      for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1753	find_interesting_uses_stmt (data, phi);
1754      for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1755	find_interesting_uses_stmt (data, bsi_stmt (bsi));
1756    }
1757
1758  if (dump_file && (dump_flags & TDF_DETAILS))
1759    {
1760      bitmap_iterator bi;
1761
1762      fprintf (dump_file, "\n");
1763
1764      EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1765	{
1766	  info = ver_info (data, i);
1767	  if (info->inv_id)
1768	    {
1769	      fprintf (dump_file, "  ");
1770	      print_generic_expr (dump_file, info->name, TDF_SLIM);
1771	      fprintf (dump_file, " is invariant (%d)%s\n",
1772		       info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1773	    }
1774	}
1775
1776      fprintf (dump_file, "\n");
1777    }
1778
1779  free (body);
1780}
1781
1782/* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1783   is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1784   we are at the top-level of the processed address.  */
1785
1786static tree
1787strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1788		unsigned HOST_WIDE_INT *offset)
1789{
1790  tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1791  enum tree_code code;
1792  tree type, orig_type = TREE_TYPE (expr);
1793  unsigned HOST_WIDE_INT off0, off1, st;
1794  tree orig_expr = expr;
1795
1796  STRIP_NOPS (expr);
1797
1798  type = TREE_TYPE (expr);
1799  code = TREE_CODE (expr);
1800  *offset = 0;
1801
1802  switch (code)
1803    {
1804    case INTEGER_CST:
1805      if (!cst_and_fits_in_hwi (expr)
1806	  || zero_p (expr))
1807	return orig_expr;
1808
1809      *offset = int_cst_value (expr);
1810      return build_int_cst_type (orig_type, 0);
1811
1812    case PLUS_EXPR:
1813    case MINUS_EXPR:
1814      op0 = TREE_OPERAND (expr, 0);
1815      op1 = TREE_OPERAND (expr, 1);
1816
1817      op0 = strip_offset_1 (op0, false, false, &off0);
1818      op1 = strip_offset_1 (op1, false, false, &off1);
1819
1820      *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1821      if (op0 == TREE_OPERAND (expr, 0)
1822	  && op1 == TREE_OPERAND (expr, 1))
1823	return orig_expr;
1824
1825      if (zero_p (op1))
1826	expr = op0;
1827      else if (zero_p (op0))
1828	{
1829	  if (code == PLUS_EXPR)
1830	    expr = op1;
1831	  else
1832	    expr = fold_build1 (NEGATE_EXPR, type, op1);
1833	}
1834      else
1835	expr = fold_build2 (code, type, op0, op1);
1836
1837      return fold_convert (orig_type, expr);
1838
1839    case ARRAY_REF:
1840      if (!inside_addr)
1841	return orig_expr;
1842
1843      step = array_ref_element_size (expr);
1844      if (!cst_and_fits_in_hwi (step))
1845	break;
1846
1847      st = int_cst_value (step);
1848      op1 = TREE_OPERAND (expr, 1);
1849      op1 = strip_offset_1 (op1, false, false, &off1);
1850      *offset = off1 * st;
1851
1852      if (top_compref
1853	  && zero_p (op1))
1854	{
1855	  /* Strip the component reference completely.  */
1856	  op0 = TREE_OPERAND (expr, 0);
1857	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1858	  *offset += off0;
1859	  return op0;
1860	}
1861      break;
1862
1863    case COMPONENT_REF:
1864      if (!inside_addr)
1865	return orig_expr;
1866
1867      tmp = component_ref_field_offset (expr);
1868      if (top_compref
1869	  && cst_and_fits_in_hwi (tmp))
1870	{
1871	  /* Strip the component reference completely.  */
1872	  op0 = TREE_OPERAND (expr, 0);
1873	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1874	  *offset = off0 + int_cst_value (tmp);
1875	  return op0;
1876	}
1877      break;
1878
1879    case ADDR_EXPR:
1880      op0 = TREE_OPERAND (expr, 0);
1881      op0 = strip_offset_1 (op0, true, true, &off0);
1882      *offset += off0;
1883
1884      if (op0 == TREE_OPERAND (expr, 0))
1885	return orig_expr;
1886
1887      expr = build_fold_addr_expr (op0);
1888      return fold_convert (orig_type, expr);
1889
1890    case INDIRECT_REF:
1891      inside_addr = false;
1892      break;
1893
1894    default:
1895      return orig_expr;
1896    }
1897
1898  /* Default handling of expressions for that we want to recurse into
1899     the first operand.  */
1900  op0 = TREE_OPERAND (expr, 0);
1901  op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1902  *offset += off0;
1903
1904  if (op0 == TREE_OPERAND (expr, 0)
1905      && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1906    return orig_expr;
1907
1908  expr = copy_node (expr);
1909  TREE_OPERAND (expr, 0) = op0;
1910  if (op1)
1911    TREE_OPERAND (expr, 1) = op1;
1912
1913  /* Inside address, we might strip the top level component references,
1914     thus changing type of the expression.  Handling of ADDR_EXPR
1915     will fix that.  */
1916  expr = fold_convert (orig_type, expr);
1917
1918  return expr;
1919}
1920
1921/* Strips constant offsets from EXPR and stores them to OFFSET.  */
1922
1923static tree
1924strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1925{
1926  return strip_offset_1 (expr, false, false, offset);
1927}
1928
1929/* Returns variant of TYPE that can be used as base for different uses.
1930   For integer types, we return unsigned variant of the type, which
1931   avoids problems with overflows.  For pointer types, we return void *.  */
1932
1933static tree
1934generic_type_for (tree type)
1935{
1936  if (POINTER_TYPE_P (type))
1937    return ptr_type_node;
1938
1939  if (TYPE_UNSIGNED (type))
1940    return type;
1941
1942  return unsigned_type_for (type);
1943}
1944
1945/* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1946   the bitmap to that we should store it.  */
1947
1948static struct ivopts_data *fd_ivopts_data;
1949static tree
1950find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1951{
1952  bitmap *depends_on = data;
1953  struct version_info *info;
1954
1955  if (TREE_CODE (*expr_p) != SSA_NAME)
1956    return NULL_TREE;
1957  info = name_info (fd_ivopts_data, *expr_p);
1958
1959  if (!info->inv_id || info->has_nonlin_use)
1960    return NULL_TREE;
1961
1962  if (!*depends_on)
1963    *depends_on = BITMAP_ALLOC (NULL);
1964  bitmap_set_bit (*depends_on, info->inv_id);
1965
1966  return NULL_TREE;
1967}
1968
1969/* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1970   position to POS.  If USE is not NULL, the candidate is set as related to
1971   it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1972   replacement of the final value of the iv by a direct computation.  */
1973
1974static struct iv_cand *
1975add_candidate_1 (struct ivopts_data *data,
1976		 tree base, tree step, bool important, enum iv_position pos,
1977		 struct iv_use *use, tree incremented_at)
1978{
1979  unsigned i;
1980  struct iv_cand *cand = NULL;
1981  tree type, orig_type;
1982
1983  if (base)
1984    {
1985      orig_type = TREE_TYPE (base);
1986      type = generic_type_for (orig_type);
1987      if (type != orig_type)
1988	{
1989	  base = fold_convert (type, base);
1990	  if (step)
1991	    step = fold_convert (type, step);
1992	}
1993    }
1994
1995  for (i = 0; i < n_iv_cands (data); i++)
1996    {
1997      cand = iv_cand (data, i);
1998
1999      if (cand->pos != pos)
2000	continue;
2001
2002      if (cand->incremented_at != incremented_at)
2003	continue;
2004
2005      if (!cand->iv)
2006	{
2007	  if (!base && !step)
2008	    break;
2009
2010	  continue;
2011	}
2012
2013      if (!base && !step)
2014	continue;
2015
2016      if (!operand_equal_p (base, cand->iv->base, 0))
2017	continue;
2018
2019      if (zero_p (cand->iv->step))
2020	{
2021	  if (zero_p (step))
2022	    break;
2023	}
2024      else
2025	{
2026	  if (step && operand_equal_p (step, cand->iv->step, 0))
2027	    break;
2028	}
2029    }
2030
2031  if (i == n_iv_cands (data))
2032    {
2033      cand = xcalloc (1, sizeof (struct iv_cand));
2034      cand->id = i;
2035
2036      if (!base && !step)
2037	cand->iv = NULL;
2038      else
2039	cand->iv = alloc_iv (base, step);
2040
2041      cand->pos = pos;
2042      if (pos != IP_ORIGINAL && cand->iv)
2043	{
2044	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2045	  cand->var_after = cand->var_before;
2046	}
2047      cand->important = important;
2048      cand->incremented_at = incremented_at;
2049      VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2050
2051      if (step
2052	  && TREE_CODE (step) != INTEGER_CST)
2053	{
2054	  fd_ivopts_data = data;
2055	  walk_tree (&step, find_depends, &cand->depends_on, NULL);
2056	}
2057
2058      if (dump_file && (dump_flags & TDF_DETAILS))
2059	dump_cand (dump_file, cand);
2060    }
2061
2062  if (important && !cand->important)
2063    {
2064      cand->important = true;
2065      if (dump_file && (dump_flags & TDF_DETAILS))
2066	fprintf (dump_file, "Candidate %d is important\n", cand->id);
2067    }
2068
2069  if (use)
2070    {
2071      bitmap_set_bit (use->related_cands, i);
2072      if (dump_file && (dump_flags & TDF_DETAILS))
2073	fprintf (dump_file, "Candidate %d is related to use %d\n",
2074		 cand->id, use->id);
2075    }
2076
2077  return cand;
2078}
2079
2080/* Returns true if incrementing the induction variable at the end of the LOOP
2081   is allowed.
2082
2083   The purpose is to avoid splitting latch edge with a biv increment, thus
2084   creating a jump, possibly confusing other optimization passes and leaving
2085   less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2086   is not available (so we do not have a better alternative), or if the latch
2087   edge is already nonempty.  */
2088
2089static bool
2090allow_ip_end_pos_p (struct loop *loop)
2091{
2092  if (!ip_normal_pos (loop))
2093    return true;
2094
2095  if (!empty_block_p (ip_end_pos (loop)))
2096    return true;
2097
2098  return false;
2099}
2100
2101/* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2102   position to POS.  If USE is not NULL, the candidate is set as related to
2103   it.  The candidate computation is scheduled on all available positions.  */
2104
2105static void
2106add_candidate (struct ivopts_data *data,
2107	       tree base, tree step, bool important, struct iv_use *use)
2108{
2109  if (ip_normal_pos (data->current_loop))
2110    add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2111  if (ip_end_pos (data->current_loop)
2112      && allow_ip_end_pos_p (data->current_loop))
2113    add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2114}
2115
2116/* Add a standard "0 + 1 * iteration" iv candidate for a
2117   type with SIZE bits.  */
2118
2119static void
2120add_standard_iv_candidates_for_size (struct ivopts_data *data,
2121				     unsigned int size)
2122{
2123  tree type = lang_hooks.types.type_for_size (size, true);
2124  add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2125		 true, NULL);
2126}
2127
2128/* Adds standard iv candidates.  */
2129
2130static void
2131add_standard_iv_candidates (struct ivopts_data *data)
2132{
2133  add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2134
2135  /* The same for a double-integer type if it is still fast enough.  */
2136  if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2137    add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2138}
2139
2140
2141/* Adds candidates bases on the old induction variable IV.  */
2142
2143static void
2144add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2145{
2146  tree phi, def;
2147  struct iv_cand *cand;
2148
2149  add_candidate (data, iv->base, iv->step, true, NULL);
2150
2151  /* The same, but with initial value zero.  */
2152  add_candidate (data,
2153		 build_int_cst (TREE_TYPE (iv->base), 0),
2154		 iv->step, true, NULL);
2155
2156  phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2157  if (TREE_CODE (phi) == PHI_NODE)
2158    {
2159      /* Additionally record the possibility of leaving the original iv
2160	 untouched.  */
2161      def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2162      cand = add_candidate_1 (data,
2163			      iv->base, iv->step, true, IP_ORIGINAL, NULL,
2164			      SSA_NAME_DEF_STMT (def));
2165      cand->var_before = iv->ssa_name;
2166      cand->var_after = def;
2167    }
2168}
2169
2170/* Adds candidates based on the old induction variables.  */
2171
2172static void
2173add_old_ivs_candidates (struct ivopts_data *data)
2174{
2175  unsigned i;
2176  struct iv *iv;
2177  bitmap_iterator bi;
2178
2179  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2180    {
2181      iv = ver_info (data, i)->iv;
2182      if (iv && iv->biv_p && !zero_p (iv->step))
2183	add_old_iv_candidates (data, iv);
2184    }
2185}
2186
2187/* Adds candidates based on the value of the induction variable IV and USE.  */
2188
2189static void
2190add_iv_value_candidates (struct ivopts_data *data,
2191			 struct iv *iv, struct iv_use *use)
2192{
2193  unsigned HOST_WIDE_INT offset;
2194  tree base;
2195
2196  add_candidate (data, iv->base, iv->step, false, use);
2197
2198  /* The same, but with initial value zero.  Make such variable important,
2199     since it is generic enough so that possibly many uses may be based
2200     on it.  */
2201  add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2202		 iv->step, true, use);
2203
2204  /* Third, try removing the constant offset.  */
2205  base = strip_offset (iv->base, &offset);
2206  if (offset)
2207    add_candidate (data, base, iv->step, false, use);
2208}
2209
2210/* Possibly adds pseudocandidate for replacing the final value of USE by
2211   a direct computation.  */
2212
2213static void
2214add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2215{
2216  /* We must know where we exit the loop and how many times does it roll.  */
2217  if (! niter_for_single_dom_exit (data))
2218    return;
2219
2220  add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2221}
2222
2223/* Adds candidates based on the uses.  */
2224
2225static void
2226add_derived_ivs_candidates (struct ivopts_data *data)
2227{
2228  unsigned i;
2229
2230  for (i = 0; i < n_iv_uses (data); i++)
2231    {
2232      struct iv_use *use = iv_use (data, i);
2233
2234      if (!use)
2235	continue;
2236
2237      switch (use->type)
2238	{
2239	case USE_NONLINEAR_EXPR:
2240	case USE_COMPARE:
2241	case USE_ADDRESS:
2242	  /* Just add the ivs based on the value of the iv used here.  */
2243	  add_iv_value_candidates (data, use->iv, use);
2244	  break;
2245
2246	case USE_OUTER:
2247	  add_iv_value_candidates (data, use->iv, use);
2248
2249	  /* Additionally, add the pseudocandidate for the possibility to
2250	     replace the final value by a direct computation.  */
2251	  add_iv_outer_candidates (data, use);
2252	  break;
2253
2254	default:
2255	  gcc_unreachable ();
2256	}
2257    }
2258}
2259
2260/* Record important candidates and add them to related_cands bitmaps
2261   if needed.  */
2262
2263static void
2264record_important_candidates (struct ivopts_data *data)
2265{
2266  unsigned i;
2267  struct iv_use *use;
2268
2269  for (i = 0; i < n_iv_cands (data); i++)
2270    {
2271      struct iv_cand *cand = iv_cand (data, i);
2272
2273      if (cand->important)
2274	bitmap_set_bit (data->important_candidates, i);
2275    }
2276
2277  data->consider_all_candidates = (n_iv_cands (data)
2278				   <= CONSIDER_ALL_CANDIDATES_BOUND);
2279
2280  if (data->consider_all_candidates)
2281    {
2282      /* We will not need "related_cands" bitmaps in this case,
2283	 so release them to decrease peak memory consumption.  */
2284      for (i = 0; i < n_iv_uses (data); i++)
2285	{
2286	  use = iv_use (data, i);
2287	  BITMAP_FREE (use->related_cands);
2288	}
2289    }
2290  else
2291    {
2292      /* Add important candidates to the related_cands bitmaps.  */
2293      for (i = 0; i < n_iv_uses (data); i++)
2294	bitmap_ior_into (iv_use (data, i)->related_cands,
2295			 data->important_candidates);
2296    }
2297}
2298
2299/* Finds the candidates for the induction variables.  */
2300
2301static void
2302find_iv_candidates (struct ivopts_data *data)
2303{
2304  /* Add commonly used ivs.  */
2305  add_standard_iv_candidates (data);
2306
2307  /* Add old induction variables.  */
2308  add_old_ivs_candidates (data);
2309
2310  /* Add induction variables derived from uses.  */
2311  add_derived_ivs_candidates (data);
2312
2313  /* Record the important candidates.  */
2314  record_important_candidates (data);
2315}
2316
2317/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2318   If consider_all_candidates is true, we use a two-dimensional array, otherwise
2319   we allocate a simple list to every use.  */
2320
2321static void
2322alloc_use_cost_map (struct ivopts_data *data)
2323{
2324  unsigned i, size, s, j;
2325
2326  for (i = 0; i < n_iv_uses (data); i++)
2327    {
2328      struct iv_use *use = iv_use (data, i);
2329      bitmap_iterator bi;
2330
2331      if (data->consider_all_candidates)
2332	size = n_iv_cands (data);
2333      else
2334	{
2335	  s = 0;
2336	  EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2337	    {
2338	      s++;
2339	    }
2340
2341	  /* Round up to the power of two, so that moduling by it is fast.  */
2342	  for (size = 1; size < s; size <<= 1)
2343	    continue;
2344	}
2345
2346      use->n_map_members = size;
2347      use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2348    }
2349}
2350
2351/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2352   on invariants DEPENDS_ON and that the value used in expressing it
2353   is VALUE.*/
2354
2355static void
2356set_use_iv_cost (struct ivopts_data *data,
2357		 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2358		 bitmap depends_on, tree value)
2359{
2360  unsigned i, s;
2361
2362  if (cost == INFTY)
2363    {
2364      BITMAP_FREE (depends_on);
2365      return;
2366    }
2367
2368  if (data->consider_all_candidates)
2369    {
2370      use->cost_map[cand->id].cand = cand;
2371      use->cost_map[cand->id].cost = cost;
2372      use->cost_map[cand->id].depends_on = depends_on;
2373      use->cost_map[cand->id].value = value;
2374      return;
2375    }
2376
2377  /* n_map_members is a power of two, so this computes modulo.  */
2378  s = cand->id & (use->n_map_members - 1);
2379  for (i = s; i < use->n_map_members; i++)
2380    if (!use->cost_map[i].cand)
2381      goto found;
2382  for (i = 0; i < s; i++)
2383    if (!use->cost_map[i].cand)
2384      goto found;
2385
2386  gcc_unreachable ();
2387
2388found:
2389  use->cost_map[i].cand = cand;
2390  use->cost_map[i].cost = cost;
2391  use->cost_map[i].depends_on = depends_on;
2392  use->cost_map[i].value = value;
2393}
2394
2395/* Gets cost of (USE, CANDIDATE) pair.  */
2396
2397static struct cost_pair *
2398get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2399		 struct iv_cand *cand)
2400{
2401  unsigned i, s;
2402  struct cost_pair *ret;
2403
2404  if (!cand)
2405    return NULL;
2406
2407  if (data->consider_all_candidates)
2408    {
2409      ret = use->cost_map + cand->id;
2410      if (!ret->cand)
2411	return NULL;
2412
2413      return ret;
2414    }
2415
2416  /* n_map_members is a power of two, so this computes modulo.  */
2417  s = cand->id & (use->n_map_members - 1);
2418  for (i = s; i < use->n_map_members; i++)
2419    if (use->cost_map[i].cand == cand)
2420      return use->cost_map + i;
2421
2422  for (i = 0; i < s; i++)
2423    if (use->cost_map[i].cand == cand)
2424      return use->cost_map + i;
2425
2426  return NULL;
2427}
2428
2429/* Returns estimate on cost of computing SEQ.  */
2430
2431static unsigned
2432seq_cost (rtx seq)
2433{
2434  unsigned cost = 0;
2435  rtx set;
2436
2437  for (; seq; seq = NEXT_INSN (seq))
2438    {
2439      set = single_set (seq);
2440      if (set)
2441	cost += rtx_cost (set, SET);
2442      else
2443	cost++;
2444    }
2445
2446  return cost;
2447}
2448
2449/* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2450static rtx
2451produce_memory_decl_rtl (tree obj, int *regno)
2452{
2453  rtx x;
2454
2455  gcc_assert (obj);
2456  if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2457    {
2458      const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2459      x = gen_rtx_SYMBOL_REF (Pmode, name);
2460    }
2461  else
2462    x = gen_raw_REG (Pmode, (*regno)++);
2463
2464  return gen_rtx_MEM (DECL_MODE (obj), x);
2465}
2466
2467/* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2468   walk_tree.  DATA contains the actual fake register number.  */
2469
2470static tree
2471prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2472{
2473  tree obj = NULL_TREE;
2474  rtx x = NULL_RTX;
2475  int *regno = data;
2476
2477  switch (TREE_CODE (*expr_p))
2478    {
2479    case ADDR_EXPR:
2480      for (expr_p = &TREE_OPERAND (*expr_p, 0);
2481	   handled_component_p (*expr_p);
2482	   expr_p = &TREE_OPERAND (*expr_p, 0))
2483	continue;
2484      obj = *expr_p;
2485      if (DECL_P (obj))
2486        x = produce_memory_decl_rtl (obj, regno);
2487      break;
2488
2489    case SSA_NAME:
2490      *ws = 0;
2491      obj = SSA_NAME_VAR (*expr_p);
2492      if (!DECL_RTL_SET_P (obj))
2493	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2494      break;
2495
2496    case VAR_DECL:
2497    case PARM_DECL:
2498    case RESULT_DECL:
2499      *ws = 0;
2500      obj = *expr_p;
2501
2502      if (DECL_RTL_SET_P (obj))
2503	break;
2504
2505      if (DECL_MODE (obj) == BLKmode)
2506	x = produce_memory_decl_rtl (obj, regno);
2507      else
2508	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2509
2510      break;
2511
2512    default:
2513      break;
2514    }
2515
2516  if (x)
2517    {
2518      VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2519      SET_DECL_RTL (obj, x);
2520    }
2521
2522  return NULL_TREE;
2523}
2524
2525/* Determines cost of the computation of EXPR.  */
2526
2527static unsigned
2528computation_cost (tree expr)
2529{
2530  rtx seq, rslt;
2531  tree type = TREE_TYPE (expr);
2532  unsigned cost;
2533  /* Avoid using hard regs in ways which may be unsupported.  */
2534  int regno = LAST_VIRTUAL_REGISTER + 1;
2535
2536  walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2537  start_sequence ();
2538  rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2539  seq = get_insns ();
2540  end_sequence ();
2541
2542  cost = seq_cost (seq);
2543  if (MEM_P (rslt))
2544    cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2545
2546  return cost;
2547}
2548
2549/* Returns variable containing the value of candidate CAND at statement AT.  */
2550
2551static tree
2552var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2553{
2554  if (stmt_after_increment (loop, cand, stmt))
2555    return cand->var_after;
2556  else
2557    return cand->var_before;
2558}
2559
2560/* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2561   but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2562
2563int
2564tree_int_cst_sign_bit (tree t)
2565{
2566  unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2567  unsigned HOST_WIDE_INT w;
2568
2569  if (bitno < HOST_BITS_PER_WIDE_INT)
2570    w = TREE_INT_CST_LOW (t);
2571  else
2572    {
2573      w = TREE_INT_CST_HIGH (t);
2574      bitno -= HOST_BITS_PER_WIDE_INT;
2575    }
2576
2577  return (w >> bitno) & 1;
2578}
2579
2580/* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2581   return cst.  Otherwise return NULL_TREE.  */
2582
2583static tree
2584constant_multiple_of (tree type, tree top, tree bot)
2585{
2586  tree res, mby, p0, p1;
2587  enum tree_code code;
2588  bool negate;
2589
2590  STRIP_NOPS (top);
2591  STRIP_NOPS (bot);
2592
2593  if (operand_equal_p (top, bot, 0))
2594    return build_int_cst (type, 1);
2595
2596  code = TREE_CODE (top);
2597  switch (code)
2598    {
2599    case MULT_EXPR:
2600      mby = TREE_OPERAND (top, 1);
2601      if (TREE_CODE (mby) != INTEGER_CST)
2602	return NULL_TREE;
2603
2604      res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2605      if (!res)
2606	return NULL_TREE;
2607
2608      return fold_binary_to_constant (MULT_EXPR, type, res,
2609				      fold_convert (type, mby));
2610
2611    case PLUS_EXPR:
2612    case MINUS_EXPR:
2613      p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2614      if (!p0)
2615	return NULL_TREE;
2616      p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2617      if (!p1)
2618	return NULL_TREE;
2619
2620      return fold_binary_to_constant (code, type, p0, p1);
2621
2622    case INTEGER_CST:
2623      if (TREE_CODE (bot) != INTEGER_CST)
2624	return NULL_TREE;
2625
2626      bot = fold_convert (type, bot);
2627      top = fold_convert (type, top);
2628
2629      /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2630	 the result afterwards.  */
2631      if (tree_int_cst_sign_bit (bot))
2632	{
2633	  negate = true;
2634	  bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2635	}
2636      else
2637	negate = false;
2638
2639      /* Ditto for TOP.  */
2640      if (tree_int_cst_sign_bit (top))
2641	{
2642	  negate = !negate;
2643	  top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2644	}
2645
2646      if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2647	return NULL_TREE;
2648
2649      res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2650      if (negate)
2651	res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2652      return res;
2653
2654    default:
2655      return NULL_TREE;
2656    }
2657}
2658
2659/* Sets COMB to CST.  */
2660
2661static void
2662aff_combination_const (struct affine_tree_combination *comb, tree type,
2663		       unsigned HOST_WIDE_INT cst)
2664{
2665  unsigned prec = TYPE_PRECISION (type);
2666
2667  comb->type = type;
2668  comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2669
2670  comb->n = 0;
2671  comb->rest = NULL_TREE;
2672  comb->offset = cst & comb->mask;
2673}
2674
2675/* Sets COMB to single element ELT.  */
2676
2677static void
2678aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2679{
2680  unsigned prec = TYPE_PRECISION (type);
2681
2682  comb->type = type;
2683  comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2684
2685  comb->n = 1;
2686  comb->elts[0] = elt;
2687  comb->coefs[0] = 1;
2688  comb->rest = NULL_TREE;
2689  comb->offset = 0;
2690}
2691
2692/* Scales COMB by SCALE.  */
2693
2694static void
2695aff_combination_scale (struct affine_tree_combination *comb,
2696		       unsigned HOST_WIDE_INT scale)
2697{
2698  unsigned i, j;
2699
2700  if (scale == 1)
2701    return;
2702
2703  if (scale == 0)
2704    {
2705      aff_combination_const (comb, comb->type, 0);
2706      return;
2707    }
2708
2709  comb->offset = (scale * comb->offset) & comb->mask;
2710  for (i = 0, j = 0; i < comb->n; i++)
2711    {
2712      comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2713      comb->elts[j] = comb->elts[i];
2714      if (comb->coefs[j] != 0)
2715	j++;
2716    }
2717  comb->n = j;
2718
2719  if (comb->rest)
2720    {
2721      if (comb->n < MAX_AFF_ELTS)
2722	{
2723	  comb->coefs[comb->n] = scale;
2724	  comb->elts[comb->n] = comb->rest;
2725	  comb->rest = NULL_TREE;
2726	  comb->n++;
2727	}
2728      else
2729	comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2730				  build_int_cst_type (comb->type, scale));
2731    }
2732}
2733
2734/* Adds ELT * SCALE to COMB.  */
2735
2736static void
2737aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2738			 unsigned HOST_WIDE_INT scale)
2739{
2740  unsigned i;
2741
2742  if (scale == 0)
2743    return;
2744
2745  for (i = 0; i < comb->n; i++)
2746    if (operand_equal_p (comb->elts[i], elt, 0))
2747      {
2748	comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2749	if (comb->coefs[i])
2750	  return;
2751
2752	comb->n--;
2753	comb->coefs[i] = comb->coefs[comb->n];
2754	comb->elts[i] = comb->elts[comb->n];
2755
2756	if (comb->rest)
2757	  {
2758	    gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2759	    comb->coefs[comb->n] = 1;
2760	    comb->elts[comb->n] = comb->rest;
2761	    comb->rest = NULL_TREE;
2762	    comb->n++;
2763	  }
2764	return;
2765      }
2766  if (comb->n < MAX_AFF_ELTS)
2767    {
2768      comb->coefs[comb->n] = scale;
2769      comb->elts[comb->n] = elt;
2770      comb->n++;
2771      return;
2772    }
2773
2774  if (scale == 1)
2775    elt = fold_convert (comb->type, elt);
2776  else
2777    elt = fold_build2 (MULT_EXPR, comb->type,
2778		       fold_convert (comb->type, elt),
2779		       build_int_cst_type (comb->type, scale));
2780
2781  if (comb->rest)
2782    comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2783  else
2784    comb->rest = elt;
2785}
2786
2787/* Adds COMB2 to COMB1.  */
2788
2789static void
2790aff_combination_add (struct affine_tree_combination *comb1,
2791		     struct affine_tree_combination *comb2)
2792{
2793  unsigned i;
2794
2795  comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2796  for (i = 0; i < comb2->n; i++)
2797    aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2798  if (comb2->rest)
2799    aff_combination_add_elt (comb1, comb2->rest, 1);
2800}
2801
2802/* Splits EXPR into an affine combination of parts.  */
2803
2804static void
2805tree_to_aff_combination (tree expr, tree type,
2806			 struct affine_tree_combination *comb)
2807{
2808  struct affine_tree_combination tmp;
2809  enum tree_code code;
2810  tree cst, core, toffset;
2811  HOST_WIDE_INT bitpos, bitsize;
2812  enum machine_mode mode;
2813  int unsignedp, volatilep;
2814
2815  STRIP_NOPS (expr);
2816
2817  code = TREE_CODE (expr);
2818  switch (code)
2819    {
2820    case INTEGER_CST:
2821      aff_combination_const (comb, type, int_cst_value (expr));
2822      return;
2823
2824    case PLUS_EXPR:
2825    case MINUS_EXPR:
2826      tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2827      tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2828      if (code == MINUS_EXPR)
2829	aff_combination_scale (&tmp, -1);
2830      aff_combination_add (comb, &tmp);
2831      return;
2832
2833    case MULT_EXPR:
2834      cst = TREE_OPERAND (expr, 1);
2835      if (TREE_CODE (cst) != INTEGER_CST)
2836	break;
2837      tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2838      aff_combination_scale (comb, int_cst_value (cst));
2839      return;
2840
2841    case NEGATE_EXPR:
2842      tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2843      aff_combination_scale (comb, -1);
2844      return;
2845
2846    case ADDR_EXPR:
2847      core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2848				  &toffset, &mode, &unsignedp, &volatilep,
2849				  false);
2850      if (bitpos % BITS_PER_UNIT != 0)
2851	break;
2852      aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2853      core = build_fold_addr_expr (core);
2854      if (TREE_CODE (core) == ADDR_EXPR)
2855	aff_combination_add_elt (comb, core, 1);
2856      else
2857	{
2858	  tree_to_aff_combination (core, type, &tmp);
2859	  aff_combination_add (comb, &tmp);
2860	}
2861      if (toffset)
2862	{
2863	  tree_to_aff_combination (toffset, type, &tmp);
2864	  aff_combination_add (comb, &tmp);
2865	}
2866      return;
2867
2868    default:
2869      break;
2870    }
2871
2872  aff_combination_elt (comb, type, expr);
2873}
2874
2875/* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2876
2877static tree
2878add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2879		 unsigned HOST_WIDE_INT mask)
2880{
2881  enum tree_code code;
2882
2883  scale &= mask;
2884  elt = fold_convert (type, elt);
2885
2886  if (scale == 1)
2887    {
2888      if (!expr)
2889	return elt;
2890
2891      return fold_build2 (PLUS_EXPR, type, expr, elt);
2892    }
2893
2894  if (scale == mask)
2895    {
2896      if (!expr)
2897	return fold_build1 (NEGATE_EXPR, type, elt);
2898
2899      return fold_build2 (MINUS_EXPR, type, expr, elt);
2900    }
2901
2902  if (!expr)
2903    return fold_build2 (MULT_EXPR, type, elt,
2904			build_int_cst_type (type, scale));
2905
2906  if ((scale | (mask >> 1)) == mask)
2907    {
2908      /* Scale is negative.  */
2909      code = MINUS_EXPR;
2910      scale = (-scale) & mask;
2911    }
2912  else
2913    code = PLUS_EXPR;
2914
2915  elt = fold_build2 (MULT_EXPR, type, elt,
2916		     build_int_cst_type (type, scale));
2917  return fold_build2 (code, type, expr, elt);
2918}
2919
2920/* Copies the tree elements of COMB to ensure that they are not shared.  */
2921
2922static void
2923unshare_aff_combination (struct affine_tree_combination *comb)
2924{
2925  unsigned i;
2926
2927  for (i = 0; i < comb->n; i++)
2928    comb->elts[i] = unshare_expr (comb->elts[i]);
2929  if (comb->rest)
2930    comb->rest = unshare_expr (comb->rest);
2931}
2932
2933/* Makes tree from the affine combination COMB.  */
2934
2935static tree
2936aff_combination_to_tree (struct affine_tree_combination *comb)
2937{
2938  tree type = comb->type;
2939  tree expr = comb->rest;
2940  unsigned i;
2941  unsigned HOST_WIDE_INT off, sgn;
2942
2943  /* Handle the special case produced by get_computation_aff when
2944     the type does not fit in HOST_WIDE_INT.  */
2945  if (comb->n == 0 && comb->offset == 0)
2946    return fold_convert (type, expr);
2947
2948  gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2949
2950  for (i = 0; i < comb->n; i++)
2951    expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2952			    comb->mask);
2953
2954  if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2955    {
2956      /* Offset is negative.  */
2957      off = (-comb->offset) & comb->mask;
2958      sgn = comb->mask;
2959    }
2960  else
2961    {
2962      off = comb->offset;
2963      sgn = 1;
2964    }
2965  return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2966			  comb->mask);
2967}
2968
2969/* Determines the expression by that USE is expressed from induction variable
2970   CAND at statement AT in LOOP.  The expression is stored in a decomposed
2971   form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2972
2973static bool
2974get_computation_aff (struct loop *loop,
2975		     struct iv_use *use, struct iv_cand *cand, tree at,
2976		     struct affine_tree_combination *aff)
2977{
2978  tree ubase = use->iv->base;
2979  tree ustep = use->iv->step;
2980  tree cbase = cand->iv->base;
2981  tree cstep = cand->iv->step;
2982  tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2983  tree uutype;
2984  tree expr, delta;
2985  tree ratio;
2986  unsigned HOST_WIDE_INT ustepi, cstepi;
2987  HOST_WIDE_INT ratioi;
2988  struct affine_tree_combination cbase_aff, expr_aff;
2989  tree cstep_orig = cstep, ustep_orig = ustep;
2990
2991  if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2992    {
2993      /* We do not have a precision to express the values of use.  */
2994      return false;
2995    }
2996
2997  expr = var_at_stmt (loop, cand, at);
2998
2999  if (TREE_TYPE (expr) != ctype)
3000    {
3001      /* This may happen with the original ivs.  */
3002      expr = fold_convert (ctype, expr);
3003    }
3004
3005  if (TYPE_UNSIGNED (utype))
3006    uutype = utype;
3007  else
3008    {
3009      uutype = unsigned_type_for (utype);
3010      ubase = fold_convert (uutype, ubase);
3011      ustep = fold_convert (uutype, ustep);
3012    }
3013
3014  if (uutype != ctype)
3015    {
3016      expr = fold_convert (uutype, expr);
3017      cbase = fold_convert (uutype, cbase);
3018      cstep = fold_convert (uutype, cstep);
3019
3020      /* If the conversion is not noop, we must take it into account when
3021	 considering the value of the step.  */
3022      if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3023	cstep_orig = cstep;
3024    }
3025
3026  if (cst_and_fits_in_hwi (cstep_orig)
3027      && cst_and_fits_in_hwi (ustep_orig))
3028    {
3029      ustepi = int_cst_value (ustep_orig);
3030      cstepi = int_cst_value (cstep_orig);
3031
3032      if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3033	{
3034	  /* TODO maybe consider case when ustep divides cstep and the ratio is
3035	     a power of 2 (so that the division is fast to execute)?  We would
3036	     need to be much more careful with overflows etc. then.  */
3037	  return false;
3038	}
3039
3040      ratio = build_int_cst_type (uutype, ratioi);
3041    }
3042  else
3043    {
3044      ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3045      if (!ratio)
3046	return false;
3047
3048      /* Ratioi is only used to detect special cases when the multiplicative
3049	 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3050	 we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
3051	 to integer_onep/integer_all_onesp, since the former ignores
3052	 TREE_OVERFLOW.  */
3053      if (cst_and_fits_in_hwi (ratio))
3054	ratioi = int_cst_value (ratio);
3055      else if (integer_onep (ratio))
3056	ratioi = 1;
3057      else if (integer_all_onesp (ratio))
3058	ratioi = -1;
3059      else
3060	ratioi = 0;
3061    }
3062
3063  /* We may need to shift the value if we are after the increment.  */
3064  if (stmt_after_increment (loop, cand, at))
3065    cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3066
3067  /* use = ubase - ratio * cbase + ratio * var.
3068
3069     In general case ubase + ratio * (var - cbase) could be better (one less
3070     multiplication), but often it is possible to eliminate redundant parts
3071     of computations from (ubase - ratio * cbase) term, and if it does not
3072     happen, fold is able to apply the distributive law to obtain this form
3073     anyway.  */
3074
3075  if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3076    {
3077      /* Let's compute in trees and just return the result in AFF.  This case
3078	 should not be very common, and fold itself is not that bad either,
3079	 so making the aff. functions more complicated to handle this case
3080	 is not that urgent.  */
3081      if (ratioi == 1)
3082	{
3083	  delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3084	  expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3085	}
3086      else if (ratioi == -1)
3087	{
3088	  delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3089	  expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3090	}
3091      else
3092	{
3093	  delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3094	  delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3095	  expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3096	  expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3097	}
3098
3099      aff->type = uutype;
3100      aff->n = 0;
3101      aff->offset = 0;
3102      aff->mask = 0;
3103      aff->rest = expr;
3104      return true;
3105    }
3106
3107  /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3108     possible to compute ratioi.  */
3109  gcc_assert (ratioi);
3110
3111  tree_to_aff_combination (ubase, uutype, aff);
3112  tree_to_aff_combination (cbase, uutype, &cbase_aff);
3113  tree_to_aff_combination (expr, uutype, &expr_aff);
3114  aff_combination_scale (&cbase_aff, -ratioi);
3115  aff_combination_scale (&expr_aff, ratioi);
3116  aff_combination_add (aff, &cbase_aff);
3117  aff_combination_add (aff, &expr_aff);
3118
3119  return true;
3120}
3121
3122/* Determines the expression by that USE is expressed from induction variable
3123   CAND at statement AT in LOOP.  The computation is unshared.  */
3124
3125static tree
3126get_computation_at (struct loop *loop,
3127		    struct iv_use *use, struct iv_cand *cand, tree at)
3128{
3129  struct affine_tree_combination aff;
3130  tree type = TREE_TYPE (use->iv->base);
3131
3132  if (!get_computation_aff (loop, use, cand, at, &aff))
3133    return NULL_TREE;
3134  unshare_aff_combination (&aff);
3135  return fold_convert (type, aff_combination_to_tree (&aff));
3136}
3137
3138/* Determines the expression by that USE is expressed from induction variable
3139   CAND in LOOP.  The computation is unshared.  */
3140
3141static tree
3142get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3143{
3144  return get_computation_at (loop, use, cand, use->stmt);
3145}
3146
3147/* Returns cost of addition in MODE.  */
3148
3149static unsigned
3150add_cost (enum machine_mode mode)
3151{
3152  static unsigned costs[NUM_MACHINE_MODES];
3153  rtx seq;
3154  unsigned cost;
3155
3156  if (costs[mode])
3157    return costs[mode];
3158
3159  start_sequence ();
3160  force_operand (gen_rtx_fmt_ee (PLUS, mode,
3161				 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3162				 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3163		 NULL_RTX);
3164  seq = get_insns ();
3165  end_sequence ();
3166
3167  cost = seq_cost (seq);
3168  if (!cost)
3169    cost = 1;
3170
3171  costs[mode] = cost;
3172
3173  if (dump_file && (dump_flags & TDF_DETAILS))
3174    fprintf (dump_file, "Addition in %s costs %d\n",
3175	     GET_MODE_NAME (mode), cost);
3176  return cost;
3177}
3178
3179/* Entry in a hashtable of already known costs for multiplication.  */
3180struct mbc_entry
3181{
3182  HOST_WIDE_INT cst;		/* The constant to multiply by.  */
3183  enum machine_mode mode;	/* In mode.  */
3184  unsigned cost;		/* The cost.  */
3185};
3186
3187/* Counts hash value for the ENTRY.  */
3188
3189static hashval_t
3190mbc_entry_hash (const void *entry)
3191{
3192  const struct mbc_entry *e = entry;
3193
3194  return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3195}
3196
3197/* Compares the hash table entries ENTRY1 and ENTRY2.  */
3198
3199static int
3200mbc_entry_eq (const void *entry1, const void *entry2)
3201{
3202  const struct mbc_entry *e1 = entry1;
3203  const struct mbc_entry *e2 = entry2;
3204
3205  return (e1->mode == e2->mode
3206	  && e1->cst == e2->cst);
3207}
3208
3209/* Returns cost of multiplication by constant CST in MODE.  */
3210
3211unsigned
3212multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3213{
3214  static htab_t costs;
3215  struct mbc_entry **cached, act;
3216  rtx seq;
3217  unsigned cost;
3218
3219  if (!costs)
3220    costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3221
3222  act.mode = mode;
3223  act.cst = cst;
3224  cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3225  if (*cached)
3226    return (*cached)->cost;
3227
3228  *cached = xmalloc (sizeof (struct mbc_entry));
3229  (*cached)->mode = mode;
3230  (*cached)->cst = cst;
3231
3232  start_sequence ();
3233  expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3234	       gen_int_mode (cst, mode), NULL_RTX, 0);
3235  seq = get_insns ();
3236  end_sequence ();
3237
3238  cost = seq_cost (seq);
3239
3240  if (dump_file && (dump_flags & TDF_DETAILS))
3241    fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3242	     (int) cst, GET_MODE_NAME (mode), cost);
3243
3244  (*cached)->cost = cost;
3245
3246  return cost;
3247}
3248
3249/* Returns true if multiplying by RATIO is allowed in address.  */
3250
3251bool
3252multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3253{
3254#define MAX_RATIO 128
3255  static sbitmap valid_mult;
3256
3257  if (!valid_mult)
3258    {
3259      rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3260      rtx addr;
3261      HOST_WIDE_INT i;
3262
3263      valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3264      sbitmap_zero (valid_mult);
3265      addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3266      for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3267	{
3268	  XEXP (addr, 1) = gen_int_mode (i, Pmode);
3269	  if (memory_address_p (Pmode, addr))
3270	    SET_BIT (valid_mult, i + MAX_RATIO);
3271	}
3272
3273      if (dump_file && (dump_flags & TDF_DETAILS))
3274	{
3275	  fprintf (dump_file, "  allowed multipliers:");
3276	  for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3277	    if (TEST_BIT (valid_mult, i + MAX_RATIO))
3278	      fprintf (dump_file, " %d", (int) i);
3279	  fprintf (dump_file, "\n");
3280	  fprintf (dump_file, "\n");
3281	}
3282    }
3283
3284  if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3285    return false;
3286
3287  return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3288}
3289
3290/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3291   If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3292   variable is omitted.  The created memory accesses MODE.
3293
3294   TODO -- there must be some better way.  This all is quite crude.  */
3295
3296static unsigned
3297get_address_cost (bool symbol_present, bool var_present,
3298		  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3299{
3300  static bool initialized = false;
3301  static HOST_WIDE_INT rat, off;
3302  static HOST_WIDE_INT min_offset, max_offset;
3303  static unsigned costs[2][2][2][2];
3304  unsigned cost, acost;
3305  rtx seq, addr, base;
3306  bool offset_p, ratio_p;
3307  rtx reg1;
3308  HOST_WIDE_INT s_offset;
3309  unsigned HOST_WIDE_INT mask;
3310  unsigned bits;
3311
3312  if (!initialized)
3313    {
3314      HOST_WIDE_INT i;
3315      initialized = true;
3316
3317      reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3318
3319      addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3320      for (i = 1; i <= 1 << 20; i <<= 1)
3321	{
3322	  XEXP (addr, 1) = gen_int_mode (i, Pmode);
3323	  if (!memory_address_p (Pmode, addr))
3324	    break;
3325	}
3326      max_offset = i >> 1;
3327      off = max_offset;
3328
3329      for (i = 1; i <= 1 << 20; i <<= 1)
3330	{
3331	  XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3332	  if (!memory_address_p (Pmode, addr))
3333	    break;
3334	}
3335      min_offset = -(i >> 1);
3336
3337      if (dump_file && (dump_flags & TDF_DETAILS))
3338	{
3339	  fprintf (dump_file, "get_address_cost:\n");
3340	  fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3341	  fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3342	}
3343
3344      rat = 1;
3345      for (i = 2; i <= MAX_RATIO; i++)
3346	if (multiplier_allowed_in_address_p (i))
3347	  {
3348	    rat = i;
3349	    break;
3350	  }
3351    }
3352
3353  bits = GET_MODE_BITSIZE (Pmode);
3354  mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3355  offset &= mask;
3356  if ((offset >> (bits - 1) & 1))
3357    offset |= ~mask;
3358  s_offset = offset;
3359
3360  cost = 0;
3361  offset_p = (s_offset != 0
3362	      && min_offset <= s_offset && s_offset <= max_offset);
3363  ratio_p = (ratio != 1
3364	     && multiplier_allowed_in_address_p (ratio));
3365
3366  if (ratio != 1 && !ratio_p)
3367    cost += multiply_by_cost (ratio, Pmode);
3368
3369  if (s_offset && !offset_p && !symbol_present)
3370    {
3371      cost += add_cost (Pmode);
3372      var_present = true;
3373    }
3374
3375  acost = costs[symbol_present][var_present][offset_p][ratio_p];
3376  if (!acost)
3377    {
3378      int old_cse_not_expected;
3379      acost = 0;
3380
3381      addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3382      reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3383      if (ratio_p)
3384	addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3385
3386      if (var_present)
3387	addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3388
3389      if (symbol_present)
3390	{
3391	  base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3392	  if (offset_p)
3393	    base = gen_rtx_fmt_e (CONST, Pmode,
3394				  gen_rtx_fmt_ee (PLUS, Pmode,
3395						  base,
3396						  gen_int_mode (off, Pmode)));
3397	}
3398      else if (offset_p)
3399	base = gen_int_mode (off, Pmode);
3400      else
3401	base = NULL_RTX;
3402
3403      if (base)
3404	addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3405
3406      start_sequence ();
3407      /* To avoid splitting addressing modes, pretend that no cse will
3408 	 follow.  */
3409      old_cse_not_expected = cse_not_expected;
3410      cse_not_expected = true;
3411      addr = memory_address (Pmode, addr);
3412      cse_not_expected = old_cse_not_expected;
3413      seq = get_insns ();
3414      end_sequence ();
3415
3416      acost = seq_cost (seq);
3417      acost += address_cost (addr, Pmode);
3418
3419      if (!acost)
3420	acost = 1;
3421      costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3422    }
3423
3424  return cost + acost;
3425}
3426
3427/* Estimates cost of forcing expression EXPR into a variable.  */
3428
3429unsigned
3430force_expr_to_var_cost (tree expr)
3431{
3432  static bool costs_initialized = false;
3433  static unsigned integer_cost;
3434  static unsigned symbol_cost;
3435  static unsigned address_cost;
3436  tree op0, op1;
3437  unsigned cost0, cost1, cost;
3438  enum machine_mode mode;
3439
3440  if (!costs_initialized)
3441    {
3442      tree var = create_tmp_var_raw (integer_type_node, "test_var");
3443      rtx x = gen_rtx_MEM (DECL_MODE (var),
3444			   gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3445      tree addr;
3446      tree type = build_pointer_type (integer_type_node);
3447
3448      integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3449							   2000));
3450
3451      SET_DECL_RTL (var, x);
3452      TREE_STATIC (var) = 1;
3453      addr = build1 (ADDR_EXPR, type, var);
3454      symbol_cost = computation_cost (addr) + 1;
3455
3456      address_cost
3457	= computation_cost (build2 (PLUS_EXPR, type,
3458				    addr,
3459				    build_int_cst_type (type, 2000))) + 1;
3460      if (dump_file && (dump_flags & TDF_DETAILS))
3461	{
3462	  fprintf (dump_file, "force_expr_to_var_cost:\n");
3463	  fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3464	  fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3465	  fprintf (dump_file, "  address %d\n", (int) address_cost);
3466	  fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3467	  fprintf (dump_file, "\n");
3468	}
3469
3470      costs_initialized = true;
3471    }
3472
3473  STRIP_NOPS (expr);
3474
3475  if (SSA_VAR_P (expr))
3476    return 0;
3477
3478  if (TREE_INVARIANT (expr))
3479    {
3480      if (TREE_CODE (expr) == INTEGER_CST)
3481	return integer_cost;
3482
3483      if (TREE_CODE (expr) == ADDR_EXPR)
3484	{
3485	  tree obj = TREE_OPERAND (expr, 0);
3486
3487	  if (TREE_CODE (obj) == VAR_DECL
3488	      || TREE_CODE (obj) == PARM_DECL
3489	      || TREE_CODE (obj) == RESULT_DECL)
3490	    return symbol_cost;
3491	}
3492
3493      return address_cost;
3494    }
3495
3496  switch (TREE_CODE (expr))
3497    {
3498    case PLUS_EXPR:
3499    case MINUS_EXPR:
3500    case MULT_EXPR:
3501      op0 = TREE_OPERAND (expr, 0);
3502      op1 = TREE_OPERAND (expr, 1);
3503      STRIP_NOPS (op0);
3504      STRIP_NOPS (op1);
3505
3506      if (is_gimple_val (op0))
3507	cost0 = 0;
3508      else
3509	cost0 = force_expr_to_var_cost (op0);
3510
3511      if (is_gimple_val (op1))
3512	cost1 = 0;
3513      else
3514	cost1 = force_expr_to_var_cost (op1);
3515
3516      break;
3517
3518    default:
3519      /* Just an arbitrary value, FIXME.  */
3520      return target_spill_cost;
3521    }
3522
3523  mode = TYPE_MODE (TREE_TYPE (expr));
3524  switch (TREE_CODE (expr))
3525    {
3526    case PLUS_EXPR:
3527    case MINUS_EXPR:
3528      cost = add_cost (mode);
3529      break;
3530
3531    case MULT_EXPR:
3532      if (cst_and_fits_in_hwi (op0))
3533	cost = multiply_by_cost (int_cst_value (op0), mode);
3534      else if (cst_and_fits_in_hwi (op1))
3535	cost = multiply_by_cost (int_cst_value (op1), mode);
3536      else
3537	return target_spill_cost;
3538      break;
3539
3540    default:
3541      gcc_unreachable ();
3542    }
3543
3544  cost += cost0;
3545  cost += cost1;
3546
3547  /* Bound the cost by target_spill_cost.  The parts of complicated
3548     computations often are either loop invariant or at least can
3549     be shared between several iv uses, so letting this grow without
3550     limits would not give reasonable results.  */
3551  return cost < target_spill_cost ? cost : target_spill_cost;
3552}
3553
3554/* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3555   invariants the computation depends on.  */
3556
3557static unsigned
3558force_var_cost (struct ivopts_data *data,
3559		tree expr, bitmap *depends_on)
3560{
3561  if (depends_on)
3562    {
3563      fd_ivopts_data = data;
3564      walk_tree (&expr, find_depends, depends_on, NULL);
3565    }
3566
3567  return force_expr_to_var_cost (expr);
3568}
3569
3570/* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3571   value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3572   to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3573   invariants the computation depends on.  */
3574
3575static unsigned
3576split_address_cost (struct ivopts_data *data,
3577		    tree addr, bool *symbol_present, bool *var_present,
3578		    unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3579{
3580  tree core;
3581  HOST_WIDE_INT bitsize;
3582  HOST_WIDE_INT bitpos;
3583  tree toffset;
3584  enum machine_mode mode;
3585  int unsignedp, volatilep;
3586
3587  core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3588			      &unsignedp, &volatilep, false);
3589
3590  if (toffset != 0
3591      || bitpos % BITS_PER_UNIT != 0
3592      || TREE_CODE (core) != VAR_DECL)
3593    {
3594      *symbol_present = false;
3595      *var_present = true;
3596      fd_ivopts_data = data;
3597      walk_tree (&addr, find_depends, depends_on, NULL);
3598      return target_spill_cost;
3599    }
3600
3601  *offset += bitpos / BITS_PER_UNIT;
3602  if (TREE_STATIC (core)
3603      || DECL_EXTERNAL (core))
3604    {
3605      *symbol_present = true;
3606      *var_present = false;
3607      return 0;
3608    }
3609
3610  *symbol_present = false;
3611  *var_present = true;
3612  return 0;
3613}
3614
3615/* Estimates cost of expressing difference of addresses E1 - E2 as
3616   var + symbol + offset.  The value of offset is added to OFFSET,
3617   SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3618   part is missing.  DEPENDS_ON is a set of the invariants the computation
3619   depends on.  */
3620
3621static unsigned
3622ptr_difference_cost (struct ivopts_data *data,
3623		     tree e1, tree e2, bool *symbol_present, bool *var_present,
3624		     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3625{
3626  HOST_WIDE_INT diff = 0;
3627  unsigned cost;
3628
3629  gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3630
3631  if (ptr_difference_const (e1, e2, &diff))
3632    {
3633      *offset += diff;
3634      *symbol_present = false;
3635      *var_present = false;
3636      return 0;
3637    }
3638
3639  if (e2 == integer_zero_node)
3640    return split_address_cost (data, TREE_OPERAND (e1, 0),
3641			       symbol_present, var_present, offset, depends_on);
3642
3643  *symbol_present = false;
3644  *var_present = true;
3645
3646  cost = force_var_cost (data, e1, depends_on);
3647  cost += force_var_cost (data, e2, depends_on);
3648  cost += add_cost (Pmode);
3649
3650  return cost;
3651}
3652
3653/* Estimates cost of expressing difference E1 - E2 as
3654   var + symbol + offset.  The value of offset is added to OFFSET,
3655   SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3656   part is missing.  DEPENDS_ON is a set of the invariants the computation
3657   depends on.  */
3658
3659static unsigned
3660difference_cost (struct ivopts_data *data,
3661		 tree e1, tree e2, bool *symbol_present, bool *var_present,
3662		 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3663{
3664  unsigned cost;
3665  enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3666  unsigned HOST_WIDE_INT off1, off2;
3667
3668  e1 = strip_offset (e1, &off1);
3669  e2 = strip_offset (e2, &off2);
3670  *offset += off1 - off2;
3671
3672  STRIP_NOPS (e1);
3673  STRIP_NOPS (e2);
3674
3675  if (TREE_CODE (e1) == ADDR_EXPR)
3676    return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3677				depends_on);
3678  *symbol_present = false;
3679
3680  if (operand_equal_p (e1, e2, 0))
3681    {
3682      *var_present = false;
3683      return 0;
3684    }
3685  *var_present = true;
3686  if (zero_p (e2))
3687    return force_var_cost (data, e1, depends_on);
3688
3689  if (zero_p (e1))
3690    {
3691      cost = force_var_cost (data, e2, depends_on);
3692      cost += multiply_by_cost (-1, mode);
3693
3694      return cost;
3695    }
3696
3697  cost = force_var_cost (data, e1, depends_on);
3698  cost += force_var_cost (data, e2, depends_on);
3699  cost += add_cost (mode);
3700
3701  return cost;
3702}
3703
3704/* Determines the cost of the computation by that USE is expressed
3705   from induction variable CAND.  If ADDRESS_P is true, we just need
3706   to create an address from it, otherwise we want to get it into
3707   register.  A set of invariants we depend on is stored in
3708   DEPENDS_ON.  AT is the statement at that the value is computed.  */
3709
3710static unsigned
3711get_computation_cost_at (struct ivopts_data *data,
3712			 struct iv_use *use, struct iv_cand *cand,
3713			 bool address_p, bitmap *depends_on, tree at)
3714{
3715  tree ubase = use->iv->base, ustep = use->iv->step;
3716  tree cbase, cstep;
3717  tree utype = TREE_TYPE (ubase), ctype;
3718  unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3719  HOST_WIDE_INT ratio, aratio;
3720  bool var_present, symbol_present;
3721  unsigned cost = 0, n_sums;
3722
3723  *depends_on = NULL;
3724
3725  /* Only consider real candidates.  */
3726  if (!cand->iv)
3727    return INFTY;
3728
3729  cbase = cand->iv->base;
3730  cstep = cand->iv->step;
3731  ctype = TREE_TYPE (cbase);
3732
3733  if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3734    {
3735      /* We do not have a precision to express the values of use.  */
3736      return INFTY;
3737    }
3738
3739  if (address_p)
3740    {
3741      /* Do not try to express address of an object with computation based
3742	 on address of a different object.  This may cause problems in rtl
3743	 level alias analysis (that does not expect this to be happening,
3744	 as this is illegal in C), and would be unlikely to be useful
3745	 anyway.  */
3746      if (use->iv->base_object
3747	  && cand->iv->base_object
3748	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3749	return INFTY;
3750    }
3751
3752  if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3753    {
3754      /* TODO -- add direct handling of this case.  */
3755      goto fallback;
3756    }
3757
3758  /* CSTEPI is removed from the offset in case statement is after the
3759     increment.  If the step is not constant, we use zero instead.
3760     This is a bit imprecise (there is the extra addition), but
3761     redundancy elimination is likely to transform the code so that
3762     it uses value of the variable before increment anyway,
3763     so it is not that much unrealistic.  */
3764  if (cst_and_fits_in_hwi (cstep))
3765    cstepi = int_cst_value (cstep);
3766  else
3767    cstepi = 0;
3768
3769  if (cst_and_fits_in_hwi (ustep)
3770      && cst_and_fits_in_hwi (cstep))
3771    {
3772      ustepi = int_cst_value (ustep);
3773
3774      if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3775	return INFTY;
3776    }
3777  else
3778    {
3779      tree rat;
3780
3781      rat = constant_multiple_of (utype, ustep, cstep);
3782
3783      if (!rat)
3784	return INFTY;
3785
3786      if (cst_and_fits_in_hwi (rat))
3787	ratio = int_cst_value (rat);
3788      else if (integer_onep (rat))
3789	ratio = 1;
3790      else if (integer_all_onesp (rat))
3791	ratio = -1;
3792      else
3793	return INFTY;
3794    }
3795
3796  /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3797     or ratio == 1, it is better to handle this like
3798
3799     ubase - ratio * cbase + ratio * var
3800
3801     (also holds in the case ratio == -1, TODO.  */
3802
3803  if (cst_and_fits_in_hwi (cbase))
3804    {
3805      offset = - ratio * int_cst_value (cbase);
3806      cost += difference_cost (data,
3807			       ubase, integer_zero_node,
3808			       &symbol_present, &var_present, &offset,
3809			       depends_on);
3810    }
3811  else if (ratio == 1)
3812    {
3813      cost += difference_cost (data,
3814			       ubase, cbase,
3815			       &symbol_present, &var_present, &offset,
3816			       depends_on);
3817    }
3818  else
3819    {
3820      cost += force_var_cost (data, cbase, depends_on);
3821      cost += add_cost (TYPE_MODE (ctype));
3822      cost += difference_cost (data,
3823			       ubase, integer_zero_node,
3824			       &symbol_present, &var_present, &offset,
3825			       depends_on);
3826    }
3827
3828  /* If we are after the increment, the value of the candidate is higher by
3829     one iteration.  */
3830  if (stmt_after_increment (data->current_loop, cand, at))
3831    offset -= ratio * cstepi;
3832
3833  /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3834     (symbol/var/const parts may be omitted).  If we are looking for an address,
3835     find the cost of addressing this.  */
3836  if (address_p)
3837    return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3838
3839  /* Otherwise estimate the costs for computing the expression.  */
3840  aratio = ratio > 0 ? ratio : -ratio;
3841  if (!symbol_present && !var_present && !offset)
3842    {
3843      if (ratio != 1)
3844	cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3845
3846      return cost;
3847    }
3848
3849  if (aratio != 1)
3850    cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3851
3852  n_sums = 1;
3853  if (var_present
3854      /* Symbol + offset should be compile-time computable.  */
3855      && (symbol_present || offset))
3856    n_sums++;
3857
3858  return cost + n_sums * add_cost (TYPE_MODE (ctype));
3859
3860fallback:
3861  {
3862    /* Just get the expression, expand it and measure the cost.  */
3863    tree comp = get_computation_at (data->current_loop, use, cand, at);
3864
3865    if (!comp)
3866      return INFTY;
3867
3868    if (address_p)
3869      comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3870
3871    return computation_cost (comp);
3872  }
3873}
3874
3875/* Determines the cost of the computation by that USE is expressed
3876   from induction variable CAND.  If ADDRESS_P is true, we just need
3877   to create an address from it, otherwise we want to get it into
3878   register.  A set of invariants we depend on is stored in
3879   DEPENDS_ON.  */
3880
3881static unsigned
3882get_computation_cost (struct ivopts_data *data,
3883		      struct iv_use *use, struct iv_cand *cand,
3884		      bool address_p, bitmap *depends_on)
3885{
3886  return get_computation_cost_at (data,
3887				  use, cand, address_p, depends_on, use->stmt);
3888}
3889
3890/* Determines cost of basing replacement of USE on CAND in a generic
3891   expression.  */
3892
3893static bool
3894determine_use_iv_cost_generic (struct ivopts_data *data,
3895			       struct iv_use *use, struct iv_cand *cand)
3896{
3897  bitmap depends_on;
3898  unsigned cost;
3899
3900  /* The simple case first -- if we need to express value of the preserved
3901     original biv, the cost is 0.  This also prevents us from counting the
3902     cost of increment twice -- once at this use and once in the cost of
3903     the candidate.  */
3904  if (cand->pos == IP_ORIGINAL
3905      && cand->incremented_at == use->stmt)
3906    {
3907      set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3908      return true;
3909    }
3910
3911  cost = get_computation_cost (data, use, cand, false, &depends_on);
3912  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3913
3914  return cost != INFTY;
3915}
3916
3917/* Determines cost of basing replacement of USE on CAND in an address.  */
3918
3919static bool
3920determine_use_iv_cost_address (struct ivopts_data *data,
3921			       struct iv_use *use, struct iv_cand *cand)
3922{
3923  bitmap depends_on;
3924  unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3925
3926  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3927
3928  return cost != INFTY;
3929}
3930
3931/* Computes value of induction variable IV in iteration NITER.  */
3932
3933static tree
3934iv_value (struct iv *iv, tree niter)
3935{
3936  tree val;
3937  tree type = TREE_TYPE (iv->base);
3938
3939  niter = fold_convert (type, niter);
3940  val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3941
3942  return fold_build2 (PLUS_EXPR, type, iv->base, val);
3943}
3944
3945/* Computes value of candidate CAND at position AT in iteration NITER.  */
3946
3947static tree
3948cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3949{
3950  tree val = iv_value (cand->iv, niter);
3951  tree type = TREE_TYPE (cand->iv->base);
3952
3953  if (stmt_after_increment (loop, cand, at))
3954    val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3955
3956  return val;
3957}
3958
3959/* Returns period of induction variable iv.  */
3960
3961static tree
3962iv_period (struct iv *iv)
3963{
3964  tree step = iv->step, period, type;
3965  tree pow2div;
3966
3967  gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3968
3969  /* Period of the iv is gcd (step, type range).  Since type range is power
3970     of two, it suffices to determine the maximum power of two that divides
3971     step.  */
3972  pow2div = num_ending_zeros (step);
3973  type = unsigned_type_for (TREE_TYPE (step));
3974
3975  period = build_low_bits_mask (type,
3976				(TYPE_PRECISION (type)
3977				 - tree_low_cst (pow2div, 1)));
3978
3979  return period;
3980}
3981
3982/* Returns the comparison operator used when eliminating the iv USE.  */
3983
3984static enum tree_code
3985iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3986{
3987  struct loop *loop = data->current_loop;
3988  basic_block ex_bb;
3989  edge exit;
3990
3991  ex_bb = bb_for_stmt (use->stmt);
3992  exit = EDGE_SUCC (ex_bb, 0);
3993  if (flow_bb_inside_loop_p (loop, exit->dest))
3994    exit = EDGE_SUCC (ex_bb, 1);
3995
3996  return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3997}
3998
3999/* Check whether it is possible to express the condition in USE by comparison
4000   of candidate CAND.  If so, store the value compared with to BOUND.  */
4001
4002static bool
4003may_eliminate_iv (struct ivopts_data *data,
4004		  struct iv_use *use, struct iv_cand *cand, tree *bound)
4005{
4006  basic_block ex_bb;
4007  edge exit;
4008  tree nit, nit_type;
4009  tree wider_type, period, per_type;
4010  struct loop *loop = data->current_loop;
4011
4012  if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4013    return false;
4014
4015  /* For now works only for exits that dominate the loop latch.  TODO -- extend
4016     for other conditions inside loop body.  */
4017  ex_bb = bb_for_stmt (use->stmt);
4018  if (use->stmt != last_stmt (ex_bb)
4019      || TREE_CODE (use->stmt) != COND_EXPR)
4020    return false;
4021  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4022    return false;
4023
4024  exit = EDGE_SUCC (ex_bb, 0);
4025  if (flow_bb_inside_loop_p (loop, exit->dest))
4026    exit = EDGE_SUCC (ex_bb, 1);
4027  if (flow_bb_inside_loop_p (loop, exit->dest))
4028    return false;
4029
4030  nit = niter_for_exit (data, exit);
4031  if (!nit)
4032    return false;
4033
4034  nit_type = TREE_TYPE (nit);
4035
4036  /* Determine whether we may use the variable to test whether niter iterations
4037     elapsed.  This is the case iff the period of the induction variable is
4038     greater than the number of iterations.  */
4039  period = iv_period (cand->iv);
4040  if (!period)
4041    return false;
4042  per_type = TREE_TYPE (period);
4043
4044  wider_type = TREE_TYPE (period);
4045  if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4046    wider_type = per_type;
4047  else
4048    wider_type = nit_type;
4049
4050  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4051				      fold_convert (wider_type, period),
4052				      fold_convert (wider_type, nit))))
4053    return false;
4054
4055  *bound = cand_value_at (loop, cand, use->stmt, nit);
4056  return true;
4057}
4058
4059/* Determines cost of basing replacement of USE on CAND in a condition.  */
4060
4061static bool
4062determine_use_iv_cost_condition (struct ivopts_data *data,
4063				 struct iv_use *use, struct iv_cand *cand)
4064{
4065  tree bound = NULL_TREE, op, cond;
4066  bitmap depends_on = NULL;
4067  unsigned cost;
4068
4069  /* Only consider real candidates.  */
4070  if (!cand->iv)
4071    {
4072      set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4073      return false;
4074    }
4075
4076  if (may_eliminate_iv (data, use, cand, &bound))
4077    {
4078      cost = force_var_cost (data, bound, &depends_on);
4079
4080      set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4081      return cost != INFTY;
4082    }
4083
4084  /* The induction variable elimination failed; just express the original
4085     giv.  If it is compared with an invariant, note that we cannot get
4086     rid of it.  */
4087  cost = get_computation_cost (data, use, cand, false, &depends_on);
4088
4089  cond = *use->op_p;
4090  if (TREE_CODE (cond) != SSA_NAME)
4091    {
4092      op = TREE_OPERAND (cond, 0);
4093      if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4094	op = TREE_OPERAND (cond, 1);
4095      if (TREE_CODE (op) == SSA_NAME)
4096	{
4097	  op = get_iv (data, op)->base;
4098	  fd_ivopts_data = data;
4099	  walk_tree (&op, find_depends, &depends_on, NULL);
4100	}
4101    }
4102
4103  set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4104  return cost != INFTY;
4105}
4106
4107/* Checks whether it is possible to replace the final value of USE by
4108   a direct computation.  If so, the formula is stored to *VALUE.  */
4109
4110static bool
4111may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4112			 tree *value)
4113{
4114  struct loop *loop = data->current_loop;
4115  edge exit;
4116  tree nit;
4117
4118  exit = single_dom_exit (loop);
4119  if (!exit)
4120    return false;
4121
4122  gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4123			      bb_for_stmt (use->stmt)));
4124
4125  nit = niter_for_single_dom_exit (data);
4126  if (!nit)
4127    return false;
4128
4129  *value = iv_value (use->iv, nit);
4130
4131  return true;
4132}
4133
4134/* Determines cost of replacing final value of USE using CAND.  */
4135
4136static bool
4137determine_use_iv_cost_outer (struct ivopts_data *data,
4138			     struct iv_use *use, struct iv_cand *cand)
4139{
4140  bitmap depends_on;
4141  unsigned cost;
4142  edge exit;
4143  tree value = NULL_TREE;
4144  struct loop *loop = data->current_loop;
4145
4146  /* The simple case first -- if we need to express value of the preserved
4147     original biv, the cost is 0.  This also prevents us from counting the
4148     cost of increment twice -- once at this use and once in the cost of
4149     the candidate.  */
4150  if (cand->pos == IP_ORIGINAL
4151      && cand->incremented_at == use->stmt)
4152    {
4153      set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4154      return true;
4155    }
4156
4157  if (!cand->iv)
4158    {
4159      if (!may_replace_final_value (data, use, &value))
4160	{
4161	  set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4162	  return false;
4163	}
4164
4165      depends_on = NULL;
4166      cost = force_var_cost (data, value, &depends_on);
4167
4168      cost /= AVG_LOOP_NITER (loop);
4169
4170      set_use_iv_cost (data, use, cand, cost, depends_on, value);
4171      return cost != INFTY;
4172    }
4173
4174  exit = single_dom_exit (loop);
4175  if (exit)
4176    {
4177      /* If there is just a single exit, we may use value of the candidate
4178	 after we take it to determine the value of use.  */
4179      cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4180				      last_stmt (exit->src));
4181      if (cost != INFTY)
4182	cost /= AVG_LOOP_NITER (loop);
4183    }
4184  else
4185    {
4186      /* Otherwise we just need to compute the iv.  */
4187      cost = get_computation_cost (data, use, cand, false, &depends_on);
4188    }
4189
4190  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4191
4192  return cost != INFTY;
4193}
4194
4195/* Determines cost of basing replacement of USE on CAND.  Returns false
4196   if USE cannot be based on CAND.  */
4197
4198static bool
4199determine_use_iv_cost (struct ivopts_data *data,
4200		       struct iv_use *use, struct iv_cand *cand)
4201{
4202  switch (use->type)
4203    {
4204    case USE_NONLINEAR_EXPR:
4205      return determine_use_iv_cost_generic (data, use, cand);
4206
4207    case USE_OUTER:
4208      return determine_use_iv_cost_outer (data, use, cand);
4209
4210    case USE_ADDRESS:
4211      return determine_use_iv_cost_address (data, use, cand);
4212
4213    case USE_COMPARE:
4214      return determine_use_iv_cost_condition (data, use, cand);
4215
4216    default:
4217      gcc_unreachable ();
4218    }
4219}
4220
4221/* Determines costs of basing the use of the iv on an iv candidate.  */
4222
4223static void
4224determine_use_iv_costs (struct ivopts_data *data)
4225{
4226  unsigned i, j;
4227  struct iv_use *use;
4228  struct iv_cand *cand;
4229  bitmap to_clear = BITMAP_ALLOC (NULL);
4230
4231  alloc_use_cost_map (data);
4232
4233  for (i = 0; i < n_iv_uses (data); i++)
4234    {
4235      use = iv_use (data, i);
4236
4237      if (data->consider_all_candidates)
4238	{
4239	  for (j = 0; j < n_iv_cands (data); j++)
4240	    {
4241	      cand = iv_cand (data, j);
4242	      determine_use_iv_cost (data, use, cand);
4243	    }
4244	}
4245      else
4246	{
4247	  bitmap_iterator bi;
4248
4249	  EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4250	    {
4251	      cand = iv_cand (data, j);
4252	      if (!determine_use_iv_cost (data, use, cand))
4253		bitmap_set_bit (to_clear, j);
4254	    }
4255
4256	  /* Remove the candidates for that the cost is infinite from
4257	     the list of related candidates.  */
4258	  bitmap_and_compl_into (use->related_cands, to_clear);
4259	  bitmap_clear (to_clear);
4260	}
4261    }
4262
4263  BITMAP_FREE (to_clear);
4264
4265  if (dump_file && (dump_flags & TDF_DETAILS))
4266    {
4267      fprintf (dump_file, "Use-candidate costs:\n");
4268
4269      for (i = 0; i < n_iv_uses (data); i++)
4270	{
4271	  use = iv_use (data, i);
4272
4273	  fprintf (dump_file, "Use %d:\n", i);
4274	  fprintf (dump_file, "  cand\tcost\tdepends on\n");
4275	  for (j = 0; j < use->n_map_members; j++)
4276	    {
4277	      if (!use->cost_map[j].cand
4278		  || use->cost_map[j].cost == INFTY)
4279		continue;
4280
4281	      fprintf (dump_file, "  %d\t%d\t",
4282		       use->cost_map[j].cand->id,
4283		       use->cost_map[j].cost);
4284	      if (use->cost_map[j].depends_on)
4285		bitmap_print (dump_file,
4286			      use->cost_map[j].depends_on, "","");
4287	      fprintf (dump_file, "\n");
4288	    }
4289
4290	  fprintf (dump_file, "\n");
4291	}
4292      fprintf (dump_file, "\n");
4293    }
4294}
4295
4296/* Determines cost of the candidate CAND.  */
4297
4298static void
4299determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4300{
4301  unsigned cost_base, cost_step;
4302  tree base;
4303
4304  if (!cand->iv)
4305    {
4306      cand->cost = 0;
4307      return;
4308    }
4309
4310  /* There are two costs associated with the candidate -- its increment
4311     and its initialization.  The second is almost negligible for any loop
4312     that rolls enough, so we take it just very little into account.  */
4313
4314  base = cand->iv->base;
4315  cost_base = force_var_cost (data, base, NULL);
4316  cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4317
4318  cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4319
4320  /* Prefer the original iv unless we may gain something by replacing it;
4321     this is not really relevant for artificial ivs created by other
4322     passes.  */
4323  if (cand->pos == IP_ORIGINAL
4324      && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4325    cand->cost--;
4326
4327  /* Prefer not to insert statements into latch unless there are some
4328     already (so that we do not create unnecessary jumps).  */
4329  if (cand->pos == IP_END
4330      && empty_block_p (ip_end_pos (data->current_loop)))
4331    cand->cost++;
4332}
4333
4334/* Determines costs of computation of the candidates.  */
4335
4336static void
4337determine_iv_costs (struct ivopts_data *data)
4338{
4339  unsigned i;
4340
4341  if (dump_file && (dump_flags & TDF_DETAILS))
4342    {
4343      fprintf (dump_file, "Candidate costs:\n");
4344      fprintf (dump_file, "  cand\tcost\n");
4345    }
4346
4347  for (i = 0; i < n_iv_cands (data); i++)
4348    {
4349      struct iv_cand *cand = iv_cand (data, i);
4350
4351      determine_iv_cost (data, cand);
4352
4353      if (dump_file && (dump_flags & TDF_DETAILS))
4354	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4355    }
4356
4357if (dump_file && (dump_flags & TDF_DETAILS))
4358      fprintf (dump_file, "\n");
4359}
4360
4361/* Calculates cost for having SIZE induction variables.  */
4362
4363static unsigned
4364ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4365{
4366  return global_cost_for_size (size,
4367			       loop_data (data->current_loop)->regs_used,
4368			       n_iv_uses (data));
4369}
4370
4371/* For each size of the induction variable set determine the penalty.  */
4372
4373static void
4374determine_set_costs (struct ivopts_data *data)
4375{
4376  unsigned j, n;
4377  tree phi, op;
4378  struct loop *loop = data->current_loop;
4379  bitmap_iterator bi;
4380
4381  /* We use the following model (definitely improvable, especially the
4382     cost function -- TODO):
4383
4384     We estimate the number of registers available (using MD data), name it A.
4385
4386     We estimate the number of registers used by the loop, name it U.  This
4387     number is obtained as the number of loop phi nodes (not counting virtual
4388     registers and bivs) + the number of variables from outside of the loop.
4389
4390     We set a reserve R (free regs that are used for temporary computations,
4391     etc.).  For now the reserve is a constant 3.
4392
4393     Let I be the number of induction variables.
4394
4395     -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4396	make a lot of ivs without a reason).
4397     -- if A - R < U + I <= A, the cost is I * PRES_COST
4398     -- if U + I > A, the cost is I * PRES_COST and
4399        number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4400
4401  if (dump_file && (dump_flags & TDF_DETAILS))
4402    {
4403      fprintf (dump_file, "Global costs:\n");
4404      fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4405      fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4406      fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4407      fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4408    }
4409
4410  n = 0;
4411  for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4412    {
4413      op = PHI_RESULT (phi);
4414
4415      if (!is_gimple_reg (op))
4416	continue;
4417
4418      if (get_iv (data, op))
4419	continue;
4420
4421      n++;
4422    }
4423
4424  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4425    {
4426      struct version_info *info = ver_info (data, j);
4427
4428      if (info->inv_id && info->has_nonlin_use)
4429	n++;
4430    }
4431
4432  loop_data (loop)->regs_used = n;
4433  if (dump_file && (dump_flags & TDF_DETAILS))
4434    fprintf (dump_file, "  regs_used %d\n", n);
4435
4436  if (dump_file && (dump_flags & TDF_DETAILS))
4437    {
4438      fprintf (dump_file, "  cost for size:\n");
4439      fprintf (dump_file, "  ivs\tcost\n");
4440      for (j = 0; j <= 2 * target_avail_regs; j++)
4441	fprintf (dump_file, "  %d\t%d\n", j,
4442		 ivopts_global_cost_for_size (data, j));
4443      fprintf (dump_file, "\n");
4444    }
4445}
4446
4447/* Returns true if A is a cheaper cost pair than B.  */
4448
4449static bool
4450cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4451{
4452  if (!a)
4453    return false;
4454
4455  if (!b)
4456    return true;
4457
4458  if (a->cost < b->cost)
4459    return true;
4460
4461  if (a->cost > b->cost)
4462    return false;
4463
4464  /* In case the costs are the same, prefer the cheaper candidate.  */
4465  if (a->cand->cost < b->cand->cost)
4466    return true;
4467
4468  return false;
4469}
4470
4471/* Computes the cost field of IVS structure.  */
4472
4473static void
4474iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4475{
4476  unsigned cost = 0;
4477
4478  cost += ivs->cand_use_cost;
4479  cost += ivs->cand_cost;
4480  cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4481
4482  ivs->cost = cost;
4483}
4484
4485/* Remove invariants in set INVS to set IVS.  */
4486
4487static void
4488iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4489{
4490  bitmap_iterator bi;
4491  unsigned iid;
4492
4493  if (!invs)
4494    return;
4495
4496  EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4497    {
4498      ivs->n_invariant_uses[iid]--;
4499      if (ivs->n_invariant_uses[iid] == 0)
4500	ivs->n_regs--;
4501    }
4502}
4503
4504/* Set USE not to be expressed by any candidate in IVS.  */
4505
4506static void
4507iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4508		 struct iv_use *use)
4509{
4510  unsigned uid = use->id, cid;
4511  struct cost_pair *cp;
4512
4513  cp = ivs->cand_for_use[uid];
4514  if (!cp)
4515    return;
4516  cid = cp->cand->id;
4517
4518  ivs->bad_uses++;
4519  ivs->cand_for_use[uid] = NULL;
4520  ivs->n_cand_uses[cid]--;
4521
4522  if (ivs->n_cand_uses[cid] == 0)
4523    {
4524      bitmap_clear_bit (ivs->cands, cid);
4525      /* Do not count the pseudocandidates.  */
4526      if (cp->cand->iv)
4527	ivs->n_regs--;
4528      ivs->n_cands--;
4529      ivs->cand_cost -= cp->cand->cost;
4530
4531      iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4532    }
4533
4534  ivs->cand_use_cost -= cp->cost;
4535
4536  iv_ca_set_remove_invariants (ivs, cp->depends_on);
4537  iv_ca_recount_cost (data, ivs);
4538}
4539
4540/* Add invariants in set INVS to set IVS.  */
4541
4542static void
4543iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4544{
4545  bitmap_iterator bi;
4546  unsigned iid;
4547
4548  if (!invs)
4549    return;
4550
4551  EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4552    {
4553      ivs->n_invariant_uses[iid]++;
4554      if (ivs->n_invariant_uses[iid] == 1)
4555	ivs->n_regs++;
4556    }
4557}
4558
4559/* Set cost pair for USE in set IVS to CP.  */
4560
4561static void
4562iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4563	      struct iv_use *use, struct cost_pair *cp)
4564{
4565  unsigned uid = use->id, cid;
4566
4567  if (ivs->cand_for_use[uid] == cp)
4568    return;
4569
4570  if (ivs->cand_for_use[uid])
4571    iv_ca_set_no_cp (data, ivs, use);
4572
4573  if (cp)
4574    {
4575      cid = cp->cand->id;
4576
4577      ivs->bad_uses--;
4578      ivs->cand_for_use[uid] = cp;
4579      ivs->n_cand_uses[cid]++;
4580      if (ivs->n_cand_uses[cid] == 1)
4581	{
4582	  bitmap_set_bit (ivs->cands, cid);
4583	  /* Do not count the pseudocandidates.  */
4584	  if (cp->cand->iv)
4585	    ivs->n_regs++;
4586	  ivs->n_cands++;
4587	  ivs->cand_cost += cp->cand->cost;
4588
4589	  iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4590	}
4591
4592      ivs->cand_use_cost += cp->cost;
4593      iv_ca_set_add_invariants (ivs, cp->depends_on);
4594      iv_ca_recount_cost (data, ivs);
4595    }
4596}
4597
4598/* Extend set IVS by expressing USE by some of the candidates in it
4599   if possible.  */
4600
4601static void
4602iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4603	       struct iv_use *use)
4604{
4605  struct cost_pair *best_cp = NULL, *cp;
4606  bitmap_iterator bi;
4607  unsigned i;
4608
4609  gcc_assert (ivs->upto >= use->id);
4610
4611  if (ivs->upto == use->id)
4612    {
4613      ivs->upto++;
4614      ivs->bad_uses++;
4615    }
4616
4617  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4618    {
4619      cp = get_use_iv_cost (data, use, iv_cand (data, i));
4620
4621      if (cheaper_cost_pair (cp, best_cp))
4622	best_cp = cp;
4623    }
4624
4625  iv_ca_set_cp (data, ivs, use, best_cp);
4626}
4627
4628/* Get cost for assignment IVS.  */
4629
4630static unsigned
4631iv_ca_cost (struct iv_ca *ivs)
4632{
4633  return (ivs->bad_uses ? INFTY : ivs->cost);
4634}
4635
4636/* Returns true if all dependences of CP are among invariants in IVS.  */
4637
4638static bool
4639iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4640{
4641  unsigned i;
4642  bitmap_iterator bi;
4643
4644  if (!cp->depends_on)
4645    return true;
4646
4647  EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4648    {
4649      if (ivs->n_invariant_uses[i] == 0)
4650	return false;
4651    }
4652
4653  return true;
4654}
4655
4656/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4657   it before NEXT_CHANGE.  */
4658
4659static struct iv_ca_delta *
4660iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4661		 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4662{
4663  struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4664
4665  change->use = use;
4666  change->old_cp = old_cp;
4667  change->new_cp = new_cp;
4668  change->next_change = next_change;
4669
4670  return change;
4671}
4672
4673/* Joins two lists of changes L1 and L2.  Destructive -- old lists
4674   are rewritten.  */
4675
4676static struct iv_ca_delta *
4677iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4678{
4679  struct iv_ca_delta *last;
4680
4681  if (!l2)
4682    return l1;
4683
4684  if (!l1)
4685    return l2;
4686
4687  for (last = l1; last->next_change; last = last->next_change)
4688    continue;
4689  last->next_change = l2;
4690
4691  return l1;
4692}
4693
4694/* Returns candidate by that USE is expressed in IVS.  */
4695
4696static struct cost_pair *
4697iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4698{
4699  return ivs->cand_for_use[use->id];
4700}
4701
4702/* Reverse the list of changes DELTA, forming the inverse to it.  */
4703
4704static struct iv_ca_delta *
4705iv_ca_delta_reverse (struct iv_ca_delta *delta)
4706{
4707  struct iv_ca_delta *act, *next, *prev = NULL;
4708  struct cost_pair *tmp;
4709
4710  for (act = delta; act; act = next)
4711    {
4712      next = act->next_change;
4713      act->next_change = prev;
4714      prev = act;
4715
4716      tmp = act->old_cp;
4717      act->old_cp = act->new_cp;
4718      act->new_cp = tmp;
4719    }
4720
4721  return prev;
4722}
4723
4724/* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4725   reverted instead.  */
4726
4727static void
4728iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4729		    struct iv_ca_delta *delta, bool forward)
4730{
4731  struct cost_pair *from, *to;
4732  struct iv_ca_delta *act;
4733
4734  if (!forward)
4735    delta = iv_ca_delta_reverse (delta);
4736
4737  for (act = delta; act; act = act->next_change)
4738    {
4739      from = act->old_cp;
4740      to = act->new_cp;
4741      gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4742      iv_ca_set_cp (data, ivs, act->use, to);
4743    }
4744
4745  if (!forward)
4746    iv_ca_delta_reverse (delta);
4747}
4748
4749/* Returns true if CAND is used in IVS.  */
4750
4751static bool
4752iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4753{
4754  return ivs->n_cand_uses[cand->id] > 0;
4755}
4756
4757/* Returns number of induction variable candidates in the set IVS.  */
4758
4759static unsigned
4760iv_ca_n_cands (struct iv_ca *ivs)
4761{
4762  return ivs->n_cands;
4763}
4764
4765/* Free the list of changes DELTA.  */
4766
4767static void
4768iv_ca_delta_free (struct iv_ca_delta **delta)
4769{
4770  struct iv_ca_delta *act, *next;
4771
4772  for (act = *delta; act; act = next)
4773    {
4774      next = act->next_change;
4775      free (act);
4776    }
4777
4778  *delta = NULL;
4779}
4780
4781/* Allocates new iv candidates assignment.  */
4782
4783static struct iv_ca *
4784iv_ca_new (struct ivopts_data *data)
4785{
4786  struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4787
4788  nw->upto = 0;
4789  nw->bad_uses = 0;
4790  nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4791  nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4792  nw->cands = BITMAP_ALLOC (NULL);
4793  nw->n_cands = 0;
4794  nw->n_regs = 0;
4795  nw->cand_use_cost = 0;
4796  nw->cand_cost = 0;
4797  nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4798  nw->cost = 0;
4799
4800  return nw;
4801}
4802
4803/* Free memory occupied by the set IVS.  */
4804
4805static void
4806iv_ca_free (struct iv_ca **ivs)
4807{
4808  free ((*ivs)->cand_for_use);
4809  free ((*ivs)->n_cand_uses);
4810  BITMAP_FREE ((*ivs)->cands);
4811  free ((*ivs)->n_invariant_uses);
4812  free (*ivs);
4813  *ivs = NULL;
4814}
4815
4816/* Dumps IVS to FILE.  */
4817
4818static void
4819iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4820{
4821  const char *pref = "  invariants ";
4822  unsigned i;
4823
4824  fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4825  bitmap_print (file, ivs->cands, "  candidates ","\n");
4826
4827  for (i = 1; i <= data->max_inv_id; i++)
4828    if (ivs->n_invariant_uses[i])
4829      {
4830	fprintf (file, "%s%d", pref, i);
4831	pref = ", ";
4832      }
4833  fprintf (file, "\n");
4834}
4835
4836/* Try changing candidate in IVS to CAND for each use.  Return cost of the
4837   new set, and store differences in DELTA.  Number of induction variables
4838   in the new set is stored to N_IVS.  */
4839
4840static unsigned
4841iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4842	      struct iv_cand *cand, struct iv_ca_delta **delta,
4843	      unsigned *n_ivs)
4844{
4845  unsigned i, cost;
4846  struct iv_use *use;
4847  struct cost_pair *old_cp, *new_cp;
4848
4849  *delta = NULL;
4850  for (i = 0; i < ivs->upto; i++)
4851    {
4852      use = iv_use (data, i);
4853      old_cp = iv_ca_cand_for_use (ivs, use);
4854
4855      if (old_cp
4856	  && old_cp->cand == cand)
4857	continue;
4858
4859      new_cp = get_use_iv_cost (data, use, cand);
4860      if (!new_cp)
4861	continue;
4862
4863      if (!iv_ca_has_deps (ivs, new_cp))
4864	continue;
4865
4866      if (!cheaper_cost_pair (new_cp, old_cp))
4867	continue;
4868
4869      *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4870    }
4871
4872  iv_ca_delta_commit (data, ivs, *delta, true);
4873  cost = iv_ca_cost (ivs);
4874  if (n_ivs)
4875    *n_ivs = iv_ca_n_cands (ivs);
4876  iv_ca_delta_commit (data, ivs, *delta, false);
4877
4878  return cost;
4879}
4880
4881/* Try narrowing set IVS by removing CAND.  Return the cost of
4882   the new set and store the differences in DELTA.  */
4883
4884static unsigned
4885iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4886	      struct iv_cand *cand, struct iv_ca_delta **delta)
4887{
4888  unsigned i, ci;
4889  struct iv_use *use;
4890  struct cost_pair *old_cp, *new_cp, *cp;
4891  bitmap_iterator bi;
4892  struct iv_cand *cnd;
4893  unsigned cost;
4894
4895  *delta = NULL;
4896  for (i = 0; i < n_iv_uses (data); i++)
4897    {
4898      use = iv_use (data, i);
4899
4900      old_cp = iv_ca_cand_for_use (ivs, use);
4901      if (old_cp->cand != cand)
4902	continue;
4903
4904      new_cp = NULL;
4905
4906      if (data->consider_all_candidates)
4907	{
4908	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4909	    {
4910	      if (ci == cand->id)
4911		continue;
4912
4913	      cnd = iv_cand (data, ci);
4914
4915	      cp = get_use_iv_cost (data, use, cnd);
4916	      if (!cp)
4917		continue;
4918	      if (!iv_ca_has_deps (ivs, cp))
4919		continue;
4920
4921	      if (!cheaper_cost_pair (cp, new_cp))
4922		continue;
4923
4924	      new_cp = cp;
4925	    }
4926	}
4927      else
4928	{
4929	  EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4930	    {
4931	      if (ci == cand->id)
4932		continue;
4933
4934	      cnd = iv_cand (data, ci);
4935
4936	      cp = get_use_iv_cost (data, use, cnd);
4937	      if (!cp)
4938		continue;
4939	      if (!iv_ca_has_deps (ivs, cp))
4940		continue;
4941
4942	      if (!cheaper_cost_pair (cp, new_cp))
4943		continue;
4944
4945	      new_cp = cp;
4946	    }
4947	}
4948
4949      if (!new_cp)
4950	{
4951	  iv_ca_delta_free (delta);
4952	  return INFTY;
4953	}
4954
4955      *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4956    }
4957
4958  iv_ca_delta_commit (data, ivs, *delta, true);
4959  cost = iv_ca_cost (ivs);
4960  iv_ca_delta_commit (data, ivs, *delta, false);
4961
4962  return cost;
4963}
4964
4965/* Try optimizing the set of candidates IVS by removing candidates different
4966   from to EXCEPT_CAND from it.  Return cost of the new set, and store
4967   differences in DELTA.  */
4968
4969static unsigned
4970iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4971	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
4972{
4973  bitmap_iterator bi;
4974  struct iv_ca_delta *act_delta, *best_delta;
4975  unsigned i, best_cost, acost;
4976  struct iv_cand *cand;
4977
4978  best_delta = NULL;
4979  best_cost = iv_ca_cost (ivs);
4980
4981  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4982    {
4983      cand = iv_cand (data, i);
4984
4985      if (cand == except_cand)
4986	continue;
4987
4988      acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4989
4990      if (acost < best_cost)
4991	{
4992	  best_cost = acost;
4993	  iv_ca_delta_free (&best_delta);
4994	  best_delta = act_delta;
4995	}
4996      else
4997	iv_ca_delta_free (&act_delta);
4998    }
4999
5000  if (!best_delta)
5001    {
5002      *delta = NULL;
5003      return best_cost;
5004    }
5005
5006  /* Recurse to possibly remove other unnecessary ivs.  */
5007  iv_ca_delta_commit (data, ivs, best_delta, true);
5008  best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5009  iv_ca_delta_commit (data, ivs, best_delta, false);
5010  *delta = iv_ca_delta_join (best_delta, *delta);
5011  return best_cost;
5012}
5013
5014/* Tries to extend the sets IVS in the best possible way in order
5015   to express the USE.  */
5016
5017static bool
5018try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5019		  struct iv_use *use)
5020{
5021  unsigned best_cost, act_cost;
5022  unsigned i;
5023  bitmap_iterator bi;
5024  struct iv_cand *cand;
5025  struct iv_ca_delta *best_delta = NULL, *act_delta;
5026  struct cost_pair *cp;
5027
5028  iv_ca_add_use (data, ivs, use);
5029  best_cost = iv_ca_cost (ivs);
5030
5031  cp = iv_ca_cand_for_use (ivs, use);
5032  if (cp)
5033    {
5034      best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5035      iv_ca_set_no_cp (data, ivs, use);
5036    }
5037
5038  /* First try important candidates.  Only if it fails, try the specific ones.
5039     Rationale -- in loops with many variables the best choice often is to use
5040     just one generic biv.  If we added here many ivs specific to the uses,
5041     the optimization algorithm later would be likely to get stuck in a local
5042     minimum, thus causing us to create too many ivs.  The approach from
5043     few ivs to more seems more likely to be successful -- starting from few
5044     ivs, replacing an expensive use by a specific iv should always be a
5045     win.  */
5046  EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5047    {
5048      cand = iv_cand (data, i);
5049
5050      if (iv_ca_cand_used_p (ivs, cand))
5051	continue;
5052
5053      cp = get_use_iv_cost (data, use, cand);
5054      if (!cp)
5055	continue;
5056
5057      iv_ca_set_cp (data, ivs, use, cp);
5058      act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5059      iv_ca_set_no_cp (data, ivs, use);
5060      act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5061
5062      if (act_cost < best_cost)
5063	{
5064	  best_cost = act_cost;
5065
5066	  iv_ca_delta_free (&best_delta);
5067	  best_delta = act_delta;
5068	}
5069      else
5070	iv_ca_delta_free (&act_delta);
5071    }
5072
5073  if (best_cost == INFTY)
5074    {
5075      for (i = 0; i < use->n_map_members; i++)
5076	{
5077	  cp = use->cost_map + i;
5078	  cand = cp->cand;
5079	  if (!cand)
5080	    continue;
5081
5082	  /* Already tried this.  */
5083	  if (cand->important)
5084	    continue;
5085
5086	  if (iv_ca_cand_used_p (ivs, cand))
5087	    continue;
5088
5089	  act_delta = NULL;
5090	  iv_ca_set_cp (data, ivs, use, cp);
5091	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5092	  iv_ca_set_no_cp (data, ivs, use);
5093	  act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5094				       cp, act_delta);
5095
5096	  if (act_cost < best_cost)
5097	    {
5098	      best_cost = act_cost;
5099
5100	      if (best_delta)
5101		iv_ca_delta_free (&best_delta);
5102	      best_delta = act_delta;
5103	    }
5104	  else
5105	    iv_ca_delta_free (&act_delta);
5106	}
5107    }
5108
5109  iv_ca_delta_commit (data, ivs, best_delta, true);
5110  iv_ca_delta_free (&best_delta);
5111
5112  return (best_cost != INFTY);
5113}
5114
5115/* Finds an initial assignment of candidates to uses.  */
5116
5117static struct iv_ca *
5118get_initial_solution (struct ivopts_data *data)
5119{
5120  struct iv_ca *ivs = iv_ca_new (data);
5121  unsigned i;
5122
5123  for (i = 0; i < n_iv_uses (data); i++)
5124    if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5125      {
5126	iv_ca_free (&ivs);
5127	return NULL;
5128      }
5129
5130  return ivs;
5131}
5132
5133/* Tries to improve set of induction variables IVS.  */
5134
5135static bool
5136try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5137{
5138  unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5139  struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5140  struct iv_cand *cand;
5141
5142  /* Try extending the set of induction variables by one.  */
5143  for (i = 0; i < n_iv_cands (data); i++)
5144    {
5145      cand = iv_cand (data, i);
5146
5147      if (iv_ca_cand_used_p (ivs, cand))
5148	continue;
5149
5150      acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5151      if (!act_delta)
5152	continue;
5153
5154      /* If we successfully added the candidate and the set is small enough,
5155	 try optimizing it by removing other candidates.  */
5156      if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5157      	{
5158	  iv_ca_delta_commit (data, ivs, act_delta, true);
5159	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5160	  iv_ca_delta_commit (data, ivs, act_delta, false);
5161	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5162	}
5163
5164      if (acost < best_cost)
5165	{
5166	  best_cost = acost;
5167	  iv_ca_delta_free (&best_delta);
5168	  best_delta = act_delta;
5169	}
5170      else
5171	iv_ca_delta_free (&act_delta);
5172    }
5173
5174  if (!best_delta)
5175    {
5176      /* Try removing the candidates from the set instead.  */
5177      best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5178
5179      /* Nothing more we can do.  */
5180      if (!best_delta)
5181	return false;
5182    }
5183
5184  iv_ca_delta_commit (data, ivs, best_delta, true);
5185  gcc_assert (best_cost == iv_ca_cost (ivs));
5186  iv_ca_delta_free (&best_delta);
5187  return true;
5188}
5189
5190/* Attempts to find the optimal set of induction variables.  We do simple
5191   greedy heuristic -- we try to replace at most one candidate in the selected
5192   solution and remove the unused ivs while this improves the cost.  */
5193
5194static struct iv_ca *
5195find_optimal_iv_set (struct ivopts_data *data)
5196{
5197  unsigned i;
5198  struct iv_ca *set;
5199  struct iv_use *use;
5200
5201  /* Get the initial solution.  */
5202  set = get_initial_solution (data);
5203  if (!set)
5204    {
5205      if (dump_file && (dump_flags & TDF_DETAILS))
5206	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5207      return NULL;
5208    }
5209
5210  if (dump_file && (dump_flags & TDF_DETAILS))
5211    {
5212      fprintf (dump_file, "Initial set of candidates:\n");
5213      iv_ca_dump (data, dump_file, set);
5214    }
5215
5216  while (try_improve_iv_set (data, set))
5217    {
5218      if (dump_file && (dump_flags & TDF_DETAILS))
5219	{
5220	  fprintf (dump_file, "Improved to:\n");
5221	  iv_ca_dump (data, dump_file, set);
5222	}
5223    }
5224
5225  if (dump_file && (dump_flags & TDF_DETAILS))
5226    fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5227
5228  for (i = 0; i < n_iv_uses (data); i++)
5229    {
5230      use = iv_use (data, i);
5231      use->selected = iv_ca_cand_for_use (set, use)->cand;
5232    }
5233
5234  return set;
5235}
5236
5237/* Creates a new induction variable corresponding to CAND.  */
5238
5239static void
5240create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5241{
5242  block_stmt_iterator incr_pos;
5243  tree base;
5244  bool after = false;
5245
5246  if (!cand->iv)
5247    return;
5248
5249  switch (cand->pos)
5250    {
5251    case IP_NORMAL:
5252      incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5253      break;
5254
5255    case IP_END:
5256      incr_pos = bsi_last (ip_end_pos (data->current_loop));
5257      after = true;
5258      break;
5259
5260    case IP_ORIGINAL:
5261      /* Mark that the iv is preserved.  */
5262      name_info (data, cand->var_before)->preserve_biv = true;
5263      name_info (data, cand->var_after)->preserve_biv = true;
5264
5265      /* Rewrite the increment so that it uses var_before directly.  */
5266      find_interesting_uses_op (data, cand->var_after)->selected = cand;
5267
5268      return;
5269    }
5270
5271  gimple_add_tmp_var (cand->var_before);
5272  add_referenced_tmp_var (cand->var_before);
5273
5274  base = unshare_expr (cand->iv->base);
5275
5276  create_iv (base, unshare_expr (cand->iv->step),
5277	     cand->var_before, data->current_loop,
5278	     &incr_pos, after, &cand->var_before, &cand->var_after);
5279}
5280
5281/* Creates new induction variables described in SET.  */
5282
5283static void
5284create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5285{
5286  unsigned i;
5287  struct iv_cand *cand;
5288  bitmap_iterator bi;
5289
5290  EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5291    {
5292      cand = iv_cand (data, i);
5293      create_new_iv (data, cand);
5294    }
5295}
5296
5297/* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5298   is true, remove also the ssa name defined by the statement.  */
5299
5300static void
5301remove_statement (tree stmt, bool including_defined_name)
5302{
5303  if (TREE_CODE (stmt) == PHI_NODE)
5304    {
5305      if (!including_defined_name)
5306	{
5307	  /* Prevent the ssa name defined by the statement from being removed.  */
5308	  SET_PHI_RESULT (stmt, NULL);
5309	}
5310      remove_phi_node (stmt, NULL_TREE);
5311    }
5312  else
5313    {
5314      block_stmt_iterator bsi = bsi_for_stmt (stmt);
5315
5316      bsi_remove (&bsi);
5317    }
5318}
5319
5320/* Rewrites USE (definition of iv used in a nonlinear expression)
5321   using candidate CAND.  */
5322
5323static void
5324rewrite_use_nonlinear_expr (struct ivopts_data *data,
5325			    struct iv_use *use, struct iv_cand *cand)
5326{
5327  tree comp;
5328  tree op, stmts, tgt, ass;
5329  block_stmt_iterator bsi, pbsi;
5330
5331  /* An important special case -- if we are asked to express value of
5332     the original iv by itself, just exit; there is no need to
5333     introduce a new computation (that might also need casting the
5334     variable to unsigned and back).  */
5335  if (cand->pos == IP_ORIGINAL
5336      && cand->incremented_at == use->stmt)
5337    {
5338      tree step, ctype, utype;
5339      enum tree_code incr_code = PLUS_EXPR;
5340
5341      gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5342      gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5343
5344      step = cand->iv->step;
5345      ctype = TREE_TYPE (step);
5346      utype = TREE_TYPE (cand->var_after);
5347      if (TREE_CODE (step) == NEGATE_EXPR)
5348	{
5349	  incr_code = MINUS_EXPR;
5350	  step = TREE_OPERAND (step, 0);
5351	}
5352
5353      /* Check whether we may leave the computation unchanged.
5354	 This is the case only if it does not rely on other
5355	 computations in the loop -- otherwise, the computation
5356	 we rely upon may be removed in remove_unused_ivs,
5357	 thus leading to ICE.  */
5358      op = TREE_OPERAND (use->stmt, 1);
5359      if (TREE_CODE (op) == PLUS_EXPR
5360	  || TREE_CODE (op) == MINUS_EXPR)
5361	{
5362	  if (TREE_OPERAND (op, 0) == cand->var_before)
5363	    op = TREE_OPERAND (op, 1);
5364	  else if (TREE_CODE (op) == PLUS_EXPR
5365		   && TREE_OPERAND (op, 1) == cand->var_before)
5366	    op = TREE_OPERAND (op, 0);
5367	  else
5368	    op = NULL_TREE;
5369	}
5370      else
5371	op = NULL_TREE;
5372
5373      if (op
5374	  && (TREE_CODE (op) == INTEGER_CST
5375	      || operand_equal_p (op, step, 0)))
5376	return;
5377
5378      /* Otherwise, add the necessary computations to express
5379	 the iv.  */
5380      op = fold_convert (ctype, cand->var_before);
5381      comp = fold_convert (utype,
5382			   build2 (incr_code, ctype, op,
5383				   unshare_expr (step)));
5384    }
5385  else
5386    comp = get_computation (data->current_loop, use, cand);
5387
5388  switch (TREE_CODE (use->stmt))
5389    {
5390    case PHI_NODE:
5391      tgt = PHI_RESULT (use->stmt);
5392
5393      /* If we should keep the biv, do not replace it.  */
5394      if (name_info (data, tgt)->preserve_biv)
5395	return;
5396
5397      pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5398      while (!bsi_end_p (pbsi)
5399	     && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5400	{
5401	  bsi = pbsi;
5402	  bsi_next (&pbsi);
5403	}
5404      break;
5405
5406    case MODIFY_EXPR:
5407      tgt = TREE_OPERAND (use->stmt, 0);
5408      bsi = bsi_for_stmt (use->stmt);
5409      break;
5410
5411    default:
5412      gcc_unreachable ();
5413    }
5414
5415  op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5416
5417  if (TREE_CODE (use->stmt) == PHI_NODE)
5418    {
5419      if (stmts)
5420	bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5421      ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5422      bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5423      remove_statement (use->stmt, false);
5424      SSA_NAME_DEF_STMT (tgt) = ass;
5425    }
5426  else
5427    {
5428      if (stmts)
5429	bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5430      TREE_OPERAND (use->stmt, 1) = op;
5431    }
5432}
5433
5434/* Replaces ssa name in index IDX by its basic variable.  Callback for
5435   for_each_index.  */
5436
5437static bool
5438idx_remove_ssa_names (tree base, tree *idx,
5439		      void *data ATTRIBUTE_UNUSED)
5440{
5441  tree *op;
5442
5443  if (TREE_CODE (*idx) == SSA_NAME)
5444    *idx = SSA_NAME_VAR (*idx);
5445
5446  if (TREE_CODE (base) == ARRAY_REF)
5447    {
5448      op = &TREE_OPERAND (base, 2);
5449      if (*op
5450	  && TREE_CODE (*op) == SSA_NAME)
5451	*op = SSA_NAME_VAR (*op);
5452      op = &TREE_OPERAND (base, 3);
5453      if (*op
5454	  && TREE_CODE (*op) == SSA_NAME)
5455	*op = SSA_NAME_VAR (*op);
5456    }
5457
5458  return true;
5459}
5460
5461/* Unshares REF and replaces ssa names inside it by their basic variables.  */
5462
5463static tree
5464unshare_and_remove_ssa_names (tree ref)
5465{
5466  ref = unshare_expr (ref);
5467  for_each_index (&ref, idx_remove_ssa_names, NULL);
5468
5469  return ref;
5470}
5471
5472/* Extract the alias analysis info for the memory reference REF.  There are
5473   several ways how this information may be stored and what precisely is
5474   its semantics depending on the type of the reference, but there always is
5475   somewhere hidden one _DECL node that is used to determine the set of
5476   virtual operands for the reference.  The code below deciphers this jungle
5477   and extracts this single useful piece of information.  */
5478
5479static tree
5480get_ref_tag (tree ref, tree orig)
5481{
5482  tree var = get_base_address (ref);
5483  tree tag, sv;
5484  unsigned HOST_WIDE_INT offset, size;
5485
5486  for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5487    if (okay_component_ref_for_subvars (sv, &offset, &size))
5488      break;
5489
5490  if (handled_component_p (sv))
5491    return unshare_expr (sv);
5492
5493  if (!var)
5494    return NULL_TREE;
5495
5496  if (TREE_CODE (var) == INDIRECT_REF)
5497    {
5498      /* In case the base is a dereference of a pointer, first check its name
5499	 mem tag, and if it does not have one, use type mem tag.  */
5500      var = TREE_OPERAND (var, 0);
5501      if (TREE_CODE (var) != SSA_NAME)
5502	return NULL_TREE;
5503
5504      if (SSA_NAME_PTR_INFO (var))
5505	{
5506	  tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5507	  if (tag)
5508	    return tag;
5509	}
5510
5511      var = SSA_NAME_VAR (var);
5512      tag = var_ann (var)->type_mem_tag;
5513      gcc_assert (tag != NULL_TREE);
5514      return tag;
5515    }
5516  else
5517    {
5518      if (!DECL_P (var))
5519	return NULL_TREE;
5520
5521      tag = var_ann (var)->type_mem_tag;
5522      if (tag)
5523	return tag;
5524
5525      return var;
5526    }
5527}
5528
5529/* Copies the reference information from OLD_REF to NEW_REF.  */
5530
5531static void
5532copy_ref_info (tree new_ref, tree old_ref)
5533{
5534  if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5535    copy_mem_ref_info (new_ref, old_ref);
5536  else
5537    {
5538      TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5539      TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5540    }
5541}
5542
5543/* Rewrites USE (address that is an iv) using candidate CAND.  */
5544
5545static void
5546rewrite_use_address (struct ivopts_data *data,
5547		     struct iv_use *use, struct iv_cand *cand)
5548{
5549  struct affine_tree_combination aff;
5550  block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5551  tree ref;
5552
5553  get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5554  unshare_aff_combination (&aff);
5555
5556  ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5557  copy_ref_info (ref, *use->op_p);
5558  *use->op_p = ref;
5559}
5560
5561/* Rewrites USE (the condition such that one of the arguments is an iv) using
5562   candidate CAND.  */
5563
5564static void
5565rewrite_use_compare (struct ivopts_data *data,
5566		     struct iv_use *use, struct iv_cand *cand)
5567{
5568  tree comp;
5569  tree *op_p, cond, op, stmts, bound;
5570  block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5571  enum tree_code compare;
5572  struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5573
5574  bound = cp->value;
5575  if (bound)
5576    {
5577      tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5578      tree var_type = TREE_TYPE (var);
5579
5580      compare = iv_elimination_compare (data, use);
5581      bound = fold_convert (var_type, bound);
5582      op = force_gimple_operand (unshare_expr (bound), &stmts,
5583				 true, NULL_TREE);
5584
5585      if (stmts)
5586	bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5587
5588      *use->op_p = build2 (compare, boolean_type_node, var, op);
5589      update_stmt (use->stmt);
5590      return;
5591    }
5592
5593  /* The induction variable elimination failed; just express the original
5594     giv.  */
5595  comp = get_computation (data->current_loop, use, cand);
5596
5597  cond = *use->op_p;
5598  op_p = &TREE_OPERAND (cond, 0);
5599  if (TREE_CODE (*op_p) != SSA_NAME
5600      || zero_p (get_iv (data, *op_p)->step))
5601    op_p = &TREE_OPERAND (cond, 1);
5602
5603  op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5604  if (stmts)
5605    bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5606
5607  *op_p = op;
5608}
5609
5610/* Ensure that operand *OP_P may be used at the end of EXIT without
5611   violating loop closed ssa form.  */
5612
5613static void
5614protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5615{
5616  basic_block def_bb;
5617  struct loop *def_loop;
5618  tree phi, use;
5619
5620  use = USE_FROM_PTR (op_p);
5621  if (TREE_CODE (use) != SSA_NAME)
5622    return;
5623
5624  def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5625  if (!def_bb)
5626    return;
5627
5628  def_loop = def_bb->loop_father;
5629  if (flow_bb_inside_loop_p (def_loop, exit->dest))
5630    return;
5631
5632  /* Try finding a phi node that copies the value out of the loop.  */
5633  for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5634    if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5635      break;
5636
5637  if (!phi)
5638    {
5639      /* Create such a phi node.  */
5640      tree new_name = duplicate_ssa_name (use, NULL);
5641
5642      phi = create_phi_node (new_name, exit->dest);
5643      SSA_NAME_DEF_STMT (new_name) = phi;
5644      add_phi_arg (phi, use, exit);
5645    }
5646
5647  SET_USE (op_p, PHI_RESULT (phi));
5648}
5649
5650/* Ensure that operands of STMT may be used at the end of EXIT without
5651   violating loop closed ssa form.  */
5652
5653static void
5654protect_loop_closed_ssa_form (edge exit, tree stmt)
5655{
5656  ssa_op_iter iter;
5657  use_operand_p use_p;
5658
5659  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5660    protect_loop_closed_ssa_form_use (exit, use_p);
5661}
5662
5663/* STMTS compute a value of a phi argument OP on EXIT of a loop.  Arrange things
5664   so that they are emitted on the correct place, and so that the loop closed
5665   ssa form is preserved.  */
5666
5667void
5668compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5669{
5670  tree_stmt_iterator tsi;
5671  block_stmt_iterator bsi;
5672  tree phi, stmt, def, next;
5673
5674  if (!single_pred_p (exit->dest))
5675    split_loop_exit_edge (exit);
5676
5677  /* Ensure there is label in exit->dest, so that we can
5678     insert after it.  */
5679  tree_block_label (exit->dest);
5680  bsi = bsi_after_labels (exit->dest);
5681
5682  if (TREE_CODE (stmts) == STATEMENT_LIST)
5683    {
5684      for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5685        {
5686	  tree stmt = tsi_stmt (tsi);
5687	  bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5688	  protect_loop_closed_ssa_form (exit, stmt);
5689	}
5690    }
5691  else
5692    {
5693      bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5694      protect_loop_closed_ssa_form (exit, stmts);
5695    }
5696
5697  if (!op)
5698    return;
5699
5700  for (phi = phi_nodes (exit->dest); phi; phi = next)
5701    {
5702      next = PHI_CHAIN (phi);
5703
5704      if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5705	{
5706	  def = PHI_RESULT (phi);
5707	  remove_statement (phi, false);
5708	  stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5709			def, op);
5710	  SSA_NAME_DEF_STMT (def) = stmt;
5711	  bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5712	}
5713    }
5714}
5715
5716/* Rewrites the final value of USE (that is only needed outside of the loop)
5717   using candidate CAND.  */
5718
5719static void
5720rewrite_use_outer (struct ivopts_data *data,
5721		   struct iv_use *use, struct iv_cand *cand)
5722{
5723  edge exit;
5724  tree value, op, stmts, tgt;
5725  tree phi;
5726
5727  switch (TREE_CODE (use->stmt))
5728    {
5729    case PHI_NODE:
5730      tgt = PHI_RESULT (use->stmt);
5731      break;
5732    case MODIFY_EXPR:
5733      tgt = TREE_OPERAND (use->stmt, 0);
5734      break;
5735    default:
5736      gcc_unreachable ();
5737    }
5738
5739  exit = single_dom_exit (data->current_loop);
5740
5741  if (exit && !(exit->flags & EDGE_COMPLEX))
5742    {
5743      if (!cand->iv)
5744	{
5745	  struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5746	  value = unshare_expr (cp->value);
5747	}
5748      else
5749	value = get_computation_at (data->current_loop,
5750				    use, cand, last_stmt (exit->src));
5751
5752      op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5753
5754      /* If we will preserve the iv anyway and we would need to perform
5755	 some computation to replace the final value, do nothing.  */
5756      if (stmts && name_info (data, tgt)->preserve_biv)
5757	return;
5758
5759      for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5760	{
5761	  use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5762
5763	  if (USE_FROM_PTR (use_p) == tgt)
5764	    SET_USE (use_p, op);
5765	}
5766
5767      if (stmts)
5768	compute_phi_arg_on_exit (exit, stmts, op);
5769
5770      /* Enable removal of the statement.  We cannot remove it directly,
5771	 since we may still need the aliasing information attached to the
5772	 ssa name defined by it.  */
5773      name_info (data, tgt)->iv->have_use_for = false;
5774      return;
5775    }
5776
5777  /* If the variable is going to be preserved anyway, there is nothing to
5778     do.  */
5779  if (name_info (data, tgt)->preserve_biv)
5780    return;
5781
5782  /* Otherwise we just need to compute the iv.  */
5783  rewrite_use_nonlinear_expr (data, use, cand);
5784}
5785
5786/* Rewrites USE using candidate CAND.  */
5787
5788static void
5789rewrite_use (struct ivopts_data *data,
5790	     struct iv_use *use, struct iv_cand *cand)
5791{
5792  switch (use->type)
5793    {
5794      case USE_NONLINEAR_EXPR:
5795	rewrite_use_nonlinear_expr (data, use, cand);
5796	break;
5797
5798      case USE_OUTER:
5799	rewrite_use_outer (data, use, cand);
5800	break;
5801
5802      case USE_ADDRESS:
5803	rewrite_use_address (data, use, cand);
5804	break;
5805
5806      case USE_COMPARE:
5807	rewrite_use_compare (data, use, cand);
5808	break;
5809
5810      default:
5811	gcc_unreachable ();
5812    }
5813  update_stmt (use->stmt);
5814}
5815
5816/* Rewrite the uses using the selected induction variables.  */
5817
5818static void
5819rewrite_uses (struct ivopts_data *data)
5820{
5821  unsigned i;
5822  struct iv_cand *cand;
5823  struct iv_use *use;
5824
5825  for (i = 0; i < n_iv_uses (data); i++)
5826    {
5827      use = iv_use (data, i);
5828      cand = use->selected;
5829      gcc_assert (cand);
5830
5831      rewrite_use (data, use, cand);
5832    }
5833}
5834
5835/* Removes the ivs that are not used after rewriting.  */
5836
5837static void
5838remove_unused_ivs (struct ivopts_data *data)
5839{
5840  unsigned j;
5841  bitmap_iterator bi;
5842
5843  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5844    {
5845      struct version_info *info;
5846
5847      info = ver_info (data, j);
5848      if (info->iv
5849	  && !zero_p (info->iv->step)
5850	  && !info->inv_id
5851	  && !info->iv->have_use_for
5852	  && !info->preserve_biv)
5853	remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5854    }
5855}
5856
5857/* Frees data allocated by the optimization of a single loop.  */
5858
5859static void
5860free_loop_data (struct ivopts_data *data)
5861{
5862  unsigned i, j;
5863  bitmap_iterator bi;
5864  tree obj;
5865
5866  htab_empty (data->niters);
5867
5868  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5869    {
5870      struct version_info *info;
5871
5872      info = ver_info (data, i);
5873      if (info->iv)
5874	free (info->iv);
5875      info->iv = NULL;
5876      info->has_nonlin_use = false;
5877      info->preserve_biv = false;
5878      info->inv_id = 0;
5879    }
5880  bitmap_clear (data->relevant);
5881  bitmap_clear (data->important_candidates);
5882
5883  for (i = 0; i < n_iv_uses (data); i++)
5884    {
5885      struct iv_use *use = iv_use (data, i);
5886
5887      free (use->iv);
5888      BITMAP_FREE (use->related_cands);
5889      for (j = 0; j < use->n_map_members; j++)
5890	if (use->cost_map[j].depends_on)
5891	  BITMAP_FREE (use->cost_map[j].depends_on);
5892      free (use->cost_map);
5893      free (use);
5894    }
5895  VEC_truncate (iv_use_p, data->iv_uses, 0);
5896
5897  for (i = 0; i < n_iv_cands (data); i++)
5898    {
5899      struct iv_cand *cand = iv_cand (data, i);
5900
5901      if (cand->iv)
5902	free (cand->iv);
5903      if (cand->depends_on)
5904	BITMAP_FREE (cand->depends_on);
5905      free (cand);
5906    }
5907  VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5908
5909  if (data->version_info_size < num_ssa_names)
5910    {
5911      data->version_info_size = 2 * num_ssa_names;
5912      free (data->version_info);
5913      data->version_info = xcalloc (data->version_info_size,
5914				    sizeof (struct version_info));
5915    }
5916
5917  data->max_inv_id = 0;
5918
5919  for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5920    SET_DECL_RTL (obj, NULL_RTX);
5921
5922  VEC_truncate (tree, decl_rtl_to_reset, 0);
5923}
5924
5925/* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5926   loop tree.  */
5927
5928static void
5929tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5930{
5931  unsigned i;
5932
5933  for (i = 1; i < loops->num; i++)
5934    if (loops->parray[i])
5935      {
5936	free (loops->parray[i]->aux);
5937	loops->parray[i]->aux = NULL;
5938      }
5939
5940  free_loop_data (data);
5941  free (data->version_info);
5942  BITMAP_FREE (data->relevant);
5943  BITMAP_FREE (data->important_candidates);
5944  htab_delete (data->niters);
5945
5946  VEC_free (tree, heap, decl_rtl_to_reset);
5947  VEC_free (iv_use_p, heap, data->iv_uses);
5948  VEC_free (iv_cand_p, heap, data->iv_candidates);
5949}
5950
5951/* Optimizes the LOOP.  Returns true if anything changed.  */
5952
5953static bool
5954tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5955{
5956  bool changed = false;
5957  struct iv_ca *iv_ca;
5958  edge exit;
5959
5960  data->current_loop = loop;
5961
5962  if (dump_file && (dump_flags & TDF_DETAILS))
5963    {
5964      fprintf (dump_file, "Processing loop %d\n", loop->num);
5965
5966      exit = single_dom_exit (loop);
5967      if (exit)
5968	{
5969	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5970		   exit->src->index, exit->dest->index);
5971	  print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5972	  fprintf (dump_file, "\n");
5973	}
5974
5975      fprintf (dump_file, "\n");
5976    }
5977
5978  /* For each ssa name determines whether it behaves as an induction variable
5979     in some loop.  */
5980  if (!find_induction_variables (data))
5981    goto finish;
5982
5983  /* Finds interesting uses (item 1).  */
5984  find_interesting_uses (data);
5985  if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5986    goto finish;
5987
5988  /* Finds candidates for the induction variables (item 2).  */
5989  find_iv_candidates (data);
5990
5991  /* Calculates the costs (item 3, part 1).  */
5992  determine_use_iv_costs (data);
5993  determine_iv_costs (data);
5994  determine_set_costs (data);
5995
5996  /* Find the optimal set of induction variables (item 3, part 2).  */
5997  iv_ca = find_optimal_iv_set (data);
5998  if (!iv_ca)
5999    goto finish;
6000  changed = true;
6001
6002  /* Create the new induction variables (item 4, part 1).  */
6003  create_new_ivs (data, iv_ca);
6004  iv_ca_free (&iv_ca);
6005
6006  /* Rewrite the uses (item 4, part 2).  */
6007  rewrite_uses (data);
6008
6009  /* Remove the ivs that are unused after rewriting.  */
6010  remove_unused_ivs (data);
6011
6012  /* We have changed the structure of induction variables; it might happen
6013     that definitions in the scev database refer to some of them that were
6014     eliminated.  */
6015  scev_reset ();
6016
6017finish:
6018  free_loop_data (data);
6019
6020  return changed;
6021}
6022
6023/* Main entry point.  Optimizes induction variables in LOOPS.  */
6024
6025void
6026tree_ssa_iv_optimize (struct loops *loops)
6027{
6028  struct loop *loop;
6029  struct ivopts_data data;
6030
6031  tree_ssa_iv_optimize_init (loops, &data);
6032
6033  /* Optimize the loops starting with the innermost ones.  */
6034  loop = loops->tree_root;
6035  while (loop->inner)
6036    loop = loop->inner;
6037
6038  /* Scan the loops, inner ones first.  */
6039  while (loop != loops->tree_root)
6040    {
6041      if (dump_file && (dump_flags & TDF_DETAILS))
6042	flow_loop_dump (loop, dump_file, NULL, 1);
6043
6044      tree_ssa_iv_optimize_loop (&data, loop);
6045
6046      if (loop->next)
6047	{
6048	  loop = loop->next;
6049	  while (loop->inner)
6050	    loop = loop->inner;
6051	}
6052      else
6053	loop = loop->outer;
6054    }
6055
6056  tree_ssa_iv_optimize_finalize (loops, &data);
6057}
6058