1/* Induction variable optimizations.
2   Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 2, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING.  If not, write to the Free
18Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
1902110-1301, USA.  */
20
21/* This pass tries to find the optimal set of induction variables for the loop.
22   It optimizes just the basic linear induction variables (although adding
23   support for other types should not be too hard).  It includes the
24   optimizations commonly known as strength reduction, induction variable
25   coalescing and induction variable elimination.  It does it in the
26   following steps:
27
28   1) The interesting uses of induction variables are found.  This includes
29
30      -- uses of induction variables in non-linear expressions
31      -- addresses of arrays
32      -- comparisons of induction variables
33
34   2) Candidates for the induction variables are found.  This includes
35
36      -- old induction variables
37      -- the variables defined by expressions derived from the "interesting
38	 uses" above
39
40   3) The optimal (w.r. to a cost function) set of variables is chosen.  The
41      cost function assigns a cost to sets of induction variables and consists
42      of three parts:
43
44      -- The use costs.  Each of the interesting uses chooses the best induction
45	 variable in the set and adds its cost to the sum.  The cost reflects
46	 the time spent on modifying the induction variables value to be usable
47	 for the given purpose (adding base and offset for arrays, etc.).
48      -- The variable costs.  Each of the variables has a cost assigned that
49	 reflects the costs associated with incrementing the value of the
50	 variable.  The original variables are somewhat preferred.
51      -- The set cost.  Depending on the size of the set, extra cost may be
52	 added to reflect register pressure.
53
54      All the costs are defined in a machine-specific way, using the target
55      hooks and machine descriptions to determine them.
56
57   4) The trees are transformed to use the new variables, the dead code is
58      removed.
59
60   All of this is done loop by loop.  Doing it globally is theoretically
61   possible, it might give a better performance and it might enable us
62   to decide costs more precisely, but getting all the interactions right
63   would be complicated.  */
64
65#include "config.h"
66#include "system.h"
67#include "coretypes.h"
68#include "tm.h"
69#include "tree.h"
70#include "rtl.h"
71#include "tm_p.h"
72#include "hard-reg-set.h"
73#include "basic-block.h"
74#include "output.h"
75#include "diagnostic.h"
76#include "tree-flow.h"
77#include "tree-dump.h"
78#include "timevar.h"
79#include "cfgloop.h"
80#include "varray.h"
81#include "expr.h"
82#include "tree-pass.h"
83#include "ggc.h"
84#include "insn-config.h"
85#include "recog.h"
86#include "hashtab.h"
87#include "tree-chrec.h"
88#include "tree-scalar-evolution.h"
89#include "cfgloop.h"
90#include "params.h"
91#include "langhooks.h"
92
93/* The infinite cost.  */
94#define INFTY 10000000
95
96/* The expected number of loop iterations.  TODO -- use profiling instead of
97   this.  */
98#define AVG_LOOP_NITER(LOOP) 5
99
100
101/* Representation of the induction variable.  */
102struct iv
103{
104  tree base;		/* Initial value of the iv.  */
105  tree base_object;	/* A memory object to that the induction variable points.  */
106  tree step;		/* Step of the iv (constant only).  */
107  tree ssa_name;	/* The ssa name with the value.  */
108  bool biv_p;		/* Is it a biv?  */
109  bool have_use_for;	/* Do we already have a use for it?  */
110  unsigned use_id;	/* The identifier in the use if it is the case.  */
111};
112
113/* Per-ssa version information (induction variable descriptions, etc.).  */
114struct version_info
115{
116  tree name;		/* The ssa name.  */
117  struct iv *iv;	/* Induction variable description.  */
118  bool has_nonlin_use;	/* For a loop-level invariant, whether it is used in
119			   an expression that is not an induction variable.  */
120  unsigned inv_id;	/* Id of an invariant.  */
121  bool preserve_biv;	/* For the original biv, whether to preserve it.  */
122};
123
124/* Types of uses.  */
125enum use_type
126{
127  USE_NONLINEAR_EXPR,	/* Use in a nonlinear expression.  */
128  USE_ADDRESS,		/* Use in an address.  */
129  USE_COMPARE		/* Use is a compare.  */
130};
131
132/* The candidate - cost pair.  */
133struct cost_pair
134{
135  struct iv_cand *cand;	/* The candidate.  */
136  unsigned cost;	/* The cost.  */
137  bitmap depends_on;	/* The list of invariants that have to be
138			   preserved.  */
139  tree value;		/* For final value elimination, the expression for
140			   the final value of the iv.  For iv elimination,
141			   the new bound to compare with.  */
142};
143
144/* Use.  */
145struct iv_use
146{
147  unsigned id;		/* The id of the use.  */
148  enum use_type type;	/* Type of the use.  */
149  struct iv *iv;	/* The induction variable it is based on.  */
150  tree stmt;		/* Statement in that it occurs.  */
151  tree *op_p;		/* The place where it occurs.  */
152  bitmap related_cands;	/* The set of "related" iv candidates, plus the common
153			   important ones.  */
154
155  unsigned n_map_members; /* Number of candidates in the cost_map list.  */
156  struct cost_pair *cost_map;
157			/* The costs wrto the iv candidates.  */
158
159  struct iv_cand *selected;
160			/* The selected candidate.  */
161};
162
163/* The position where the iv is computed.  */
164enum iv_position
165{
166  IP_NORMAL,		/* At the end, just before the exit condition.  */
167  IP_END,		/* At the end of the latch block.  */
168  IP_ORIGINAL		/* The original biv.  */
169};
170
171/* The induction variable candidate.  */
172struct iv_cand
173{
174  unsigned id;		/* The number of the candidate.  */
175  bool important;	/* Whether this is an "important" candidate, i.e. such
176			   that it should be considered by all uses.  */
177  enum iv_position pos;	/* Where it is computed.  */
178  tree incremented_at;	/* For original biv, the statement where it is
179			   incremented.  */
180  tree var_before;	/* The variable used for it before increment.  */
181  tree var_after;	/* The variable used for it after increment.  */
182  struct iv *iv;	/* The value of the candidate.  NULL for
183			   "pseudocandidate" used to indicate the possibility
184			   to replace the final value of an iv by direct
185			   computation of the value.  */
186  unsigned cost;	/* Cost of the candidate.  */
187  bitmap depends_on;	/* The list of invariants that are used in step of the
188			   biv.  */
189};
190
191/* The data used by the induction variable optimizations.  */
192
193typedef struct iv_use *iv_use_p;
194DEF_VEC_P(iv_use_p);
195DEF_VEC_ALLOC_P(iv_use_p,heap);
196
197typedef struct iv_cand *iv_cand_p;
198DEF_VEC_P(iv_cand_p);
199DEF_VEC_ALLOC_P(iv_cand_p,heap);
200
201struct ivopts_data
202{
203  /* The currently optimized loop.  */
204  struct loop *current_loop;
205
206  /* Number of registers used in it.  */
207  unsigned regs_used;
208
209  /* Numbers of iterations for all exits of the current loop.  */
210  htab_t niters;
211
212  /* The size of version_info array allocated.  */
213  unsigned version_info_size;
214
215  /* The array of information for the ssa names.  */
216  struct version_info *version_info;
217
218  /* The bitmap of indices in version_info whose value was changed.  */
219  bitmap relevant;
220
221  /* The maximum invariant id.  */
222  unsigned max_inv_id;
223
224  /* The uses of induction variables.  */
225  VEC(iv_use_p,heap) *iv_uses;
226
227  /* The candidates.  */
228  VEC(iv_cand_p,heap) *iv_candidates;
229
230  /* A bitmap of important candidates.  */
231  bitmap important_candidates;
232
233  /* Whether to consider just related and important candidates when replacing a
234     use.  */
235  bool consider_all_candidates;
236};
237
238/* An assignment of iv candidates to uses.  */
239
240struct iv_ca
241{
242  /* The number of uses covered by the assignment.  */
243  unsigned upto;
244
245  /* Number of uses that cannot be expressed by the candidates in the set.  */
246  unsigned bad_uses;
247
248  /* Candidate assigned to a use, together with the related costs.  */
249  struct cost_pair **cand_for_use;
250
251  /* Number of times each candidate is used.  */
252  unsigned *n_cand_uses;
253
254  /* The candidates used.  */
255  bitmap cands;
256
257  /* The number of candidates in the set.  */
258  unsigned n_cands;
259
260  /* Total number of registers needed.  */
261  unsigned n_regs;
262
263  /* Total cost of expressing uses.  */
264  unsigned cand_use_cost;
265
266  /* Total cost of candidates.  */
267  unsigned cand_cost;
268
269  /* Number of times each invariant is used.  */
270  unsigned *n_invariant_uses;
271
272  /* Total cost of the assignment.  */
273  unsigned cost;
274};
275
276/* Difference of two iv candidate assignments.  */
277
278struct iv_ca_delta
279{
280  /* Changed use.  */
281  struct iv_use *use;
282
283  /* An old assignment (for rollback purposes).  */
284  struct cost_pair *old_cp;
285
286  /* A new assignment.  */
287  struct cost_pair *new_cp;
288
289  /* Next change in the list.  */
290  struct iv_ca_delta *next_change;
291};
292
293/* Bound on number of candidates below that all candidates are considered.  */
294
295#define CONSIDER_ALL_CANDIDATES_BOUND \
296  ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
297
298/* If there are more iv occurrences, we just give up (it is quite unlikely that
299   optimizing such a loop would help, and it would take ages).  */
300
301#define MAX_CONSIDERED_USES \
302  ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
303
304/* If there are at most this number of ivs in the set, try removing unnecessary
305   ivs from the set always.  */
306
307#define ALWAYS_PRUNE_CAND_SET_BOUND \
308  ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
309
310/* The list of trees for that the decl_rtl field must be reset is stored
311   here.  */
312
313static VEC(tree,heap) *decl_rtl_to_reset;
314
315/* Number of uses recorded in DATA.  */
316
317static inline unsigned
318n_iv_uses (struct ivopts_data *data)
319{
320  return VEC_length (iv_use_p, data->iv_uses);
321}
322
323/* Ith use recorded in DATA.  */
324
325static inline struct iv_use *
326iv_use (struct ivopts_data *data, unsigned i)
327{
328  return VEC_index (iv_use_p, data->iv_uses, i);
329}
330
331/* Number of candidates recorded in DATA.  */
332
333static inline unsigned
334n_iv_cands (struct ivopts_data *data)
335{
336  return VEC_length (iv_cand_p, data->iv_candidates);
337}
338
339/* Ith candidate recorded in DATA.  */
340
341static inline struct iv_cand *
342iv_cand (struct ivopts_data *data, unsigned i)
343{
344  return VEC_index (iv_cand_p, data->iv_candidates, i);
345}
346
347/* The single loop exit if it dominates the latch, NULL otherwise.  */
348
349edge
350single_dom_exit (struct loop *loop)
351{
352  edge exit = loop->single_exit;
353
354  if (!exit)
355    return NULL;
356
357  if (!just_once_each_iteration_p (loop, exit->src))
358    return NULL;
359
360  return exit;
361}
362
363/* Dumps information about the induction variable IV to FILE.  */
364
365extern void dump_iv (FILE *, struct iv *);
366void
367dump_iv (FILE *file, struct iv *iv)
368{
369  if (iv->ssa_name)
370    {
371      fprintf (file, "ssa name ");
372      print_generic_expr (file, iv->ssa_name, TDF_SLIM);
373      fprintf (file, "\n");
374    }
375
376  fprintf (file, "  type ");
377  print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
378  fprintf (file, "\n");
379
380  if (iv->step)
381    {
382      fprintf (file, "  base ");
383      print_generic_expr (file, iv->base, TDF_SLIM);
384      fprintf (file, "\n");
385
386      fprintf (file, "  step ");
387      print_generic_expr (file, iv->step, TDF_SLIM);
388      fprintf (file, "\n");
389    }
390  else
391    {
392      fprintf (file, "  invariant ");
393      print_generic_expr (file, iv->base, TDF_SLIM);
394      fprintf (file, "\n");
395    }
396
397  if (iv->base_object)
398    {
399      fprintf (file, "  base object ");
400      print_generic_expr (file, iv->base_object, TDF_SLIM);
401      fprintf (file, "\n");
402    }
403
404  if (iv->biv_p)
405    fprintf (file, "  is a biv\n");
406}
407
408/* Dumps information about the USE to FILE.  */
409
410extern void dump_use (FILE *, struct iv_use *);
411void
412dump_use (FILE *file, struct iv_use *use)
413{
414  fprintf (file, "use %d\n", use->id);
415
416  switch (use->type)
417    {
418    case USE_NONLINEAR_EXPR:
419      fprintf (file, "  generic\n");
420      break;
421
422    case USE_ADDRESS:
423      fprintf (file, "  address\n");
424      break;
425
426    case USE_COMPARE:
427      fprintf (file, "  compare\n");
428      break;
429
430    default:
431      gcc_unreachable ();
432    }
433
434  fprintf (file, "  in statement ");
435  print_generic_expr (file, use->stmt, TDF_SLIM);
436  fprintf (file, "\n");
437
438  fprintf (file, "  at position ");
439  if (use->op_p)
440    print_generic_expr (file, *use->op_p, TDF_SLIM);
441  fprintf (file, "\n");
442
443  dump_iv (file, use->iv);
444
445  if (use->related_cands)
446    {
447      fprintf (file, "  related candidates ");
448      dump_bitmap (file, use->related_cands);
449    }
450}
451
452/* Dumps information about the uses to FILE.  */
453
454extern void dump_uses (FILE *, struct ivopts_data *);
455void
456dump_uses (FILE *file, struct ivopts_data *data)
457{
458  unsigned i;
459  struct iv_use *use;
460
461  for (i = 0; i < n_iv_uses (data); i++)
462    {
463      use = iv_use (data, i);
464
465      dump_use (file, use);
466      fprintf (file, "\n");
467    }
468}
469
470/* Dumps information about induction variable candidate CAND to FILE.  */
471
472extern void dump_cand (FILE *, struct iv_cand *);
473void
474dump_cand (FILE *file, struct iv_cand *cand)
475{
476  struct iv *iv = cand->iv;
477
478  fprintf (file, "candidate %d%s\n",
479	   cand->id, cand->important ? " (important)" : "");
480
481  if (cand->depends_on)
482    {
483      fprintf (file, "  depends on ");
484      dump_bitmap (file, cand->depends_on);
485    }
486
487  if (!iv)
488    {
489      fprintf (file, "  final value replacement\n");
490      return;
491    }
492
493  switch (cand->pos)
494    {
495    case IP_NORMAL:
496      fprintf (file, "  incremented before exit test\n");
497      break;
498
499    case IP_END:
500      fprintf (file, "  incremented at end\n");
501      break;
502
503    case IP_ORIGINAL:
504      fprintf (file, "  original biv\n");
505      break;
506    }
507
508  dump_iv (file, iv);
509}
510
511/* Returns the info for ssa version VER.  */
512
513static inline struct version_info *
514ver_info (struct ivopts_data *data, unsigned ver)
515{
516  return data->version_info + ver;
517}
518
519/* Returns the info for ssa name NAME.  */
520
521static inline struct version_info *
522name_info (struct ivopts_data *data, tree name)
523{
524  return ver_info (data, SSA_NAME_VERSION (name));
525}
526
527/* Checks whether there exists number X such that X * B = A, counting modulo
528   2^BITS.  */
529
530static bool
531divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
532	HOST_WIDE_INT *x)
533{
534  unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
535  unsigned HOST_WIDE_INT inv, ex, val;
536  unsigned i;
537
538  a &= mask;
539  b &= mask;
540
541  /* First divide the whole equation by 2 as long as possible.  */
542  while (!(a & 1) && !(b & 1))
543    {
544      a >>= 1;
545      b >>= 1;
546      bits--;
547      mask >>= 1;
548    }
549
550  if (!(b & 1))
551    {
552      /* If b is still even, a is odd and there is no such x.  */
553      return false;
554    }
555
556  /* Find the inverse of b.  We compute it as
557     b^(2^(bits - 1) - 1) (mod 2^bits).  */
558  inv = 1;
559  ex = b;
560  for (i = 0; i < bits - 1; i++)
561    {
562      inv = (inv * ex) & mask;
563      ex = (ex * ex) & mask;
564    }
565
566  val = (a * inv) & mask;
567
568  gcc_assert (((val * b) & mask) == a);
569
570  if ((val >> (bits - 1)) & 1)
571    val |= ~mask;
572
573  *x = val;
574
575  return true;
576}
577
578/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
579   emitted in LOOP.  */
580
581static bool
582stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
583{
584  basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
585
586  gcc_assert (bb);
587
588  if (sbb == loop->latch)
589    return true;
590
591  if (sbb != bb)
592    return false;
593
594  return stmt == last_stmt (bb);
595}
596
597/* Returns true if STMT if after the place where the original induction
598   variable CAND is incremented.  */
599
600static bool
601stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
602{
603  basic_block cand_bb = bb_for_stmt (cand->incremented_at);
604  basic_block stmt_bb = bb_for_stmt (stmt);
605  block_stmt_iterator bsi;
606
607  if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
608    return false;
609
610  if (stmt_bb != cand_bb)
611    return true;
612
613  /* Scan the block from the end, since the original ivs are usually
614     incremented at the end of the loop body.  */
615  for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
616    {
617      if (bsi_stmt (bsi) == cand->incremented_at)
618	return false;
619      if (bsi_stmt (bsi) == stmt)
620	return true;
621    }
622}
623
624/* Returns true if STMT if after the place where the induction variable
625   CAND is incremented in LOOP.  */
626
627static bool
628stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
629{
630  switch (cand->pos)
631    {
632    case IP_END:
633      return false;
634
635    case IP_NORMAL:
636      return stmt_after_ip_normal_pos (loop, stmt);
637
638    case IP_ORIGINAL:
639      return stmt_after_ip_original_pos (cand, stmt);
640
641    default:
642      gcc_unreachable ();
643    }
644}
645
646/* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
647
648static bool
649abnormal_ssa_name_p (tree exp)
650{
651  if (!exp)
652    return false;
653
654  if (TREE_CODE (exp) != SSA_NAME)
655    return false;
656
657  return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
658}
659
660/* Returns false if BASE or INDEX contains a ssa name that occurs in an
661   abnormal phi node.  Callback for for_each_index.  */
662
663static bool
664idx_contains_abnormal_ssa_name_p (tree base, tree *index,
665				  void *data ATTRIBUTE_UNUSED)
666{
667  if (TREE_CODE (base) == ARRAY_REF)
668    {
669      if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
670	return false;
671      if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
672	return false;
673    }
674
675  return !abnormal_ssa_name_p (*index);
676}
677
678/* Returns true if EXPR contains a ssa name that occurs in an
679   abnormal phi node.  */
680
681bool
682contains_abnormal_ssa_name_p (tree expr)
683{
684  enum tree_code code;
685  enum tree_code_class class;
686
687  if (!expr)
688    return false;
689
690  code = TREE_CODE (expr);
691  class = TREE_CODE_CLASS (code);
692
693  if (code == SSA_NAME)
694    return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
695
696  if (code == INTEGER_CST
697      || is_gimple_min_invariant (expr))
698    return false;
699
700  if (code == ADDR_EXPR)
701    return !for_each_index (&TREE_OPERAND (expr, 0),
702			    idx_contains_abnormal_ssa_name_p,
703			    NULL);
704
705  switch (class)
706    {
707    case tcc_binary:
708    case tcc_comparison:
709      if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
710	return true;
711
712      /* Fallthru.  */
713    case tcc_unary:
714      if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
715	return true;
716
717      break;
718
719    default:
720      gcc_unreachable ();
721    }
722
723  return false;
724}
725
726/* Element of the table in that we cache the numbers of iterations obtained
727   from exits of the loop.  */
728
729struct nfe_cache_elt
730{
731  /* The edge for that the number of iterations is cached.  */
732  edge exit;
733
734  /* Number of iterations corresponding to this exit, or NULL if it cannot be
735     determined.  */
736  tree niter;
737};
738
739/* Hash function for nfe_cache_elt E.  */
740
741static hashval_t
742nfe_hash (const void *e)
743{
744  const struct nfe_cache_elt *elt = e;
745
746  return htab_hash_pointer (elt->exit);
747}
748
749/* Equality function for nfe_cache_elt E1 and edge E2.  */
750
751static int
752nfe_eq (const void *e1, const void *e2)
753{
754  const struct nfe_cache_elt *elt1 = e1;
755
756  return elt1->exit == e2;
757}
758
759/*  Returns tree describing number of iterations determined from
760    EXIT of DATA->current_loop, or NULL if something goes wrong.  */
761
762static tree
763niter_for_exit (struct ivopts_data *data, edge exit)
764{
765  struct nfe_cache_elt *nfe_desc;
766  struct tree_niter_desc desc;
767  PTR *slot;
768
769  slot = htab_find_slot_with_hash (data->niters, exit,
770				   htab_hash_pointer (exit),
771				   INSERT);
772
773  if (!*slot)
774    {
775      nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
776      nfe_desc->exit = exit;
777
778      /* Try to determine number of iterations.  We must know it
779	 unconditionally (i.e., without possibility of # of iterations
780	 being zero).  Also, we cannot safely work with ssa names that
781	 appear in phi nodes on abnormal edges, so that we do not create
782	 overlapping life ranges for them (PR 27283).  */
783      if (number_of_iterations_exit (data->current_loop,
784				     exit, &desc, true)
785	  && zero_p (desc.may_be_zero)
786     	  && !contains_abnormal_ssa_name_p (desc.niter))
787	nfe_desc->niter = desc.niter;
788      else
789	nfe_desc->niter = NULL_TREE;
790    }
791  else
792    nfe_desc = *slot;
793
794  return nfe_desc->niter;
795}
796
797/* Returns tree describing number of iterations determined from
798   single dominating exit of DATA->current_loop, or NULL if something
799   goes wrong.  */
800
801static tree
802niter_for_single_dom_exit (struct ivopts_data *data)
803{
804  edge exit = single_dom_exit (data->current_loop);
805
806  if (!exit)
807    return NULL;
808
809  return niter_for_exit (data, exit);
810}
811
812/* Initializes data structures used by the iv optimization pass, stored
813   in DATA.  */
814
815static void
816tree_ssa_iv_optimize_init (struct ivopts_data *data)
817{
818  data->version_info_size = 2 * num_ssa_names;
819  data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
820  data->relevant = BITMAP_ALLOC (NULL);
821  data->important_candidates = BITMAP_ALLOC (NULL);
822  data->max_inv_id = 0;
823  data->niters = htab_create (10, nfe_hash, nfe_eq, free);
824  data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
825  data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
826  decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
827}
828
829/* Returns a memory object to that EXPR points.  In case we are able to
830   determine that it does not point to any such object, NULL is returned.  */
831
832static tree
833determine_base_object (tree expr)
834{
835  enum tree_code code = TREE_CODE (expr);
836  tree base, obj, op0, op1;
837
838  /* If this is a pointer casted to any type, we need to determine
839     the base object for the pointer; so handle conversions before
840     throwing away non-pointer expressions.  */
841  if (TREE_CODE (expr) == NOP_EXPR
842      || TREE_CODE (expr) == CONVERT_EXPR)
843    return determine_base_object (TREE_OPERAND (expr, 0));
844
845  if (!POINTER_TYPE_P (TREE_TYPE (expr)))
846    return NULL_TREE;
847
848  switch (code)
849    {
850    case INTEGER_CST:
851      return NULL_TREE;
852
853    case ADDR_EXPR:
854      obj = TREE_OPERAND (expr, 0);
855      base = get_base_address (obj);
856
857      if (!base)
858	return expr;
859
860      if (TREE_CODE (base) == INDIRECT_REF)
861	return determine_base_object (TREE_OPERAND (base, 0));
862
863      return fold_convert (ptr_type_node,
864		           build_fold_addr_expr (base));
865
866    case PLUS_EXPR:
867    case MINUS_EXPR:
868      op0 = determine_base_object (TREE_OPERAND (expr, 0));
869      op1 = determine_base_object (TREE_OPERAND (expr, 1));
870
871      if (!op1)
872	return op0;
873
874      if (!op0)
875	return (code == PLUS_EXPR
876		? op1
877		: fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
878
879      return fold_build2 (code, ptr_type_node, op0, op1);
880
881    default:
882      return fold_convert (ptr_type_node, expr);
883    }
884}
885
886/* Allocates an induction variable with given initial value BASE and step STEP
887   for loop LOOP.  */
888
889static struct iv *
890alloc_iv (tree base, tree step)
891{
892  struct iv *iv = XCNEW (struct iv);
893
894  if (step && integer_zerop (step))
895    step = NULL_TREE;
896
897  iv->base = base;
898  iv->base_object = determine_base_object (base);
899  iv->step = step;
900  iv->biv_p = false;
901  iv->have_use_for = false;
902  iv->use_id = 0;
903  iv->ssa_name = NULL_TREE;
904
905  return iv;
906}
907
908/* Sets STEP and BASE for induction variable IV.  */
909
910static void
911set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
912{
913  struct version_info *info = name_info (data, iv);
914
915  gcc_assert (!info->iv);
916
917  bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
918  info->iv = alloc_iv (base, step);
919  info->iv->ssa_name = iv;
920}
921
922/* Finds induction variable declaration for VAR.  */
923
924static struct iv *
925get_iv (struct ivopts_data *data, tree var)
926{
927  basic_block bb;
928
929  if (!name_info (data, var)->iv)
930    {
931      bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
932
933      if (!bb
934	  || !flow_bb_inside_loop_p (data->current_loop, bb))
935	set_iv (data, var, var, NULL_TREE);
936    }
937
938  return name_info (data, var)->iv;
939}
940
941/* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
942   not define a simple affine biv with nonzero step.  */
943
944static tree
945determine_biv_step (tree phi)
946{
947  struct loop *loop = bb_for_stmt (phi)->loop_father;
948  tree name = PHI_RESULT (phi);
949  affine_iv iv;
950
951  if (!is_gimple_reg (name))
952    return NULL_TREE;
953
954  if (!simple_iv (loop, phi, name, &iv, true))
955    return NULL_TREE;
956
957  return (zero_p (iv.step) ? NULL_TREE : iv.step);
958}
959
960/* Finds basic ivs.  */
961
962static bool
963find_bivs (struct ivopts_data *data)
964{
965  tree phi, step, type, base;
966  bool found = false;
967  struct loop *loop = data->current_loop;
968
969  for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
970    {
971      if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
972	continue;
973
974      step = determine_biv_step (phi);
975      if (!step)
976	continue;
977
978      base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
979      base = expand_simple_operations (base);
980      if (contains_abnormal_ssa_name_p (base)
981	  || contains_abnormal_ssa_name_p (step))
982	continue;
983
984      type = TREE_TYPE (PHI_RESULT (phi));
985      base = fold_convert (type, base);
986      if (step)
987	step = fold_convert (type, step);
988
989      set_iv (data, PHI_RESULT (phi), base, step);
990      found = true;
991    }
992
993  return found;
994}
995
996/* Marks basic ivs.  */
997
998static void
999mark_bivs (struct ivopts_data *data)
1000{
1001  tree phi, var;
1002  struct iv *iv, *incr_iv;
1003  struct loop *loop = data->current_loop;
1004  basic_block incr_bb;
1005
1006  for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1007    {
1008      iv = get_iv (data, PHI_RESULT (phi));
1009      if (!iv)
1010	continue;
1011
1012      var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1013      incr_iv = get_iv (data, var);
1014      if (!incr_iv)
1015	continue;
1016
1017      /* If the increment is in the subloop, ignore it.  */
1018      incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1019      if (incr_bb->loop_father != data->current_loop
1020	  || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1021	continue;
1022
1023      iv->biv_p = true;
1024      incr_iv->biv_p = true;
1025    }
1026}
1027
1028/* Checks whether STMT defines a linear induction variable and stores its
1029   parameters to IV.  */
1030
1031static bool
1032find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1033{
1034  tree lhs;
1035  struct loop *loop = data->current_loop;
1036
1037  iv->base = NULL_TREE;
1038  iv->step = NULL_TREE;
1039
1040  if (TREE_CODE (stmt) != MODIFY_EXPR)
1041    return false;
1042
1043  lhs = TREE_OPERAND (stmt, 0);
1044  if (TREE_CODE (lhs) != SSA_NAME)
1045    return false;
1046
1047  if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1048    return false;
1049  iv->base = expand_simple_operations (iv->base);
1050
1051  if (contains_abnormal_ssa_name_p (iv->base)
1052      || contains_abnormal_ssa_name_p (iv->step))
1053    return false;
1054
1055  return true;
1056}
1057
1058/* Finds general ivs in statement STMT.  */
1059
1060static void
1061find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1062{
1063  affine_iv iv;
1064
1065  if (!find_givs_in_stmt_scev (data, stmt, &iv))
1066    return;
1067
1068  set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1069}
1070
1071/* Finds general ivs in basic block BB.  */
1072
1073static void
1074find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1075{
1076  block_stmt_iterator bsi;
1077
1078  for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1079    find_givs_in_stmt (data, bsi_stmt (bsi));
1080}
1081
1082/* Finds general ivs.  */
1083
1084static void
1085find_givs (struct ivopts_data *data)
1086{
1087  struct loop *loop = data->current_loop;
1088  basic_block *body = get_loop_body_in_dom_order (loop);
1089  unsigned i;
1090
1091  for (i = 0; i < loop->num_nodes; i++)
1092    find_givs_in_bb (data, body[i]);
1093  free (body);
1094}
1095
1096/* For each ssa name defined in LOOP determines whether it is an induction
1097   variable and if so, its initial value and step.  */
1098
1099static bool
1100find_induction_variables (struct ivopts_data *data)
1101{
1102  unsigned i;
1103  bitmap_iterator bi;
1104
1105  if (!find_bivs (data))
1106    return false;
1107
1108  find_givs (data);
1109  mark_bivs (data);
1110
1111  if (dump_file && (dump_flags & TDF_DETAILS))
1112    {
1113      tree niter = niter_for_single_dom_exit (data);
1114
1115      if (niter)
1116	{
1117	  fprintf (dump_file, "  number of iterations ");
1118	  print_generic_expr (dump_file, niter, TDF_SLIM);
1119	  fprintf (dump_file, "\n\n");
1120    	};
1121
1122      fprintf (dump_file, "Induction variables:\n\n");
1123
1124      EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1125	{
1126	  if (ver_info (data, i)->iv)
1127	    dump_iv (dump_file, ver_info (data, i)->iv);
1128	}
1129    }
1130
1131  return true;
1132}
1133
1134/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1135
1136static struct iv_use *
1137record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1138	    tree stmt, enum use_type use_type)
1139{
1140  struct iv_use *use = XCNEW (struct iv_use);
1141
1142  use->id = n_iv_uses (data);
1143  use->type = use_type;
1144  use->iv = iv;
1145  use->stmt = stmt;
1146  use->op_p = use_p;
1147  use->related_cands = BITMAP_ALLOC (NULL);
1148
1149  /* To avoid showing ssa name in the dumps, if it was not reset by the
1150     caller.  */
1151  iv->ssa_name = NULL_TREE;
1152
1153  if (dump_file && (dump_flags & TDF_DETAILS))
1154    dump_use (dump_file, use);
1155
1156  VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1157
1158  return use;
1159}
1160
1161/* Checks whether OP is a loop-level invariant and if so, records it.
1162   NONLINEAR_USE is true if the invariant is used in a way we do not
1163   handle specially.  */
1164
1165static void
1166record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1167{
1168  basic_block bb;
1169  struct version_info *info;
1170
1171  if (TREE_CODE (op) != SSA_NAME
1172      || !is_gimple_reg (op))
1173    return;
1174
1175  bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1176  if (bb
1177      && flow_bb_inside_loop_p (data->current_loop, bb))
1178    return;
1179
1180  info = name_info (data, op);
1181  info->name = op;
1182  info->has_nonlin_use |= nonlinear_use;
1183  if (!info->inv_id)
1184    info->inv_id = ++data->max_inv_id;
1185  bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1186}
1187
1188/* Checks whether the use OP is interesting and if so, records it.  */
1189
1190static struct iv_use *
1191find_interesting_uses_op (struct ivopts_data *data, tree op)
1192{
1193  struct iv *iv;
1194  struct iv *civ;
1195  tree stmt;
1196  struct iv_use *use;
1197
1198  if (TREE_CODE (op) != SSA_NAME)
1199    return NULL;
1200
1201  iv = get_iv (data, op);
1202  if (!iv)
1203    return NULL;
1204
1205  if (iv->have_use_for)
1206    {
1207      use = iv_use (data, iv->use_id);
1208
1209      gcc_assert (use->type == USE_NONLINEAR_EXPR);
1210      return use;
1211    }
1212
1213  if (zero_p (iv->step))
1214    {
1215      record_invariant (data, op, true);
1216      return NULL;
1217    }
1218  iv->have_use_for = true;
1219
1220  civ = XNEW (struct iv);
1221  *civ = *iv;
1222
1223  stmt = SSA_NAME_DEF_STMT (op);
1224  gcc_assert (TREE_CODE (stmt) == PHI_NODE
1225	      || TREE_CODE (stmt) == MODIFY_EXPR);
1226
1227  use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1228  iv->use_id = use->id;
1229
1230  return use;
1231}
1232
1233/* Checks whether the condition *COND_P in STMT is interesting
1234   and if so, records it.  */
1235
1236static void
1237find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1238{
1239  tree *op0_p;
1240  tree *op1_p;
1241  struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1242  struct iv const_iv;
1243  tree zero = integer_zero_node;
1244
1245  const_iv.step = NULL_TREE;
1246
1247  if (TREE_CODE (*cond_p) != SSA_NAME
1248      && !COMPARISON_CLASS_P (*cond_p))
1249    return;
1250
1251  if (TREE_CODE (*cond_p) == SSA_NAME)
1252    {
1253      op0_p = cond_p;
1254      op1_p = &zero;
1255    }
1256  else
1257    {
1258      op0_p = &TREE_OPERAND (*cond_p, 0);
1259      op1_p = &TREE_OPERAND (*cond_p, 1);
1260    }
1261
1262  if (TREE_CODE (*op0_p) == SSA_NAME)
1263    iv0 = get_iv (data, *op0_p);
1264  else
1265    iv0 = &const_iv;
1266
1267  if (TREE_CODE (*op1_p) == SSA_NAME)
1268    iv1 = get_iv (data, *op1_p);
1269  else
1270    iv1 = &const_iv;
1271
1272  if (/* When comparing with non-invariant value, we may not do any senseful
1273	 induction variable elimination.  */
1274      (!iv0 || !iv1)
1275      /* Eliminating condition based on two ivs would be nontrivial.
1276	 ??? TODO -- it is not really important to handle this case.  */
1277      || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1278    {
1279      find_interesting_uses_op (data, *op0_p);
1280      find_interesting_uses_op (data, *op1_p);
1281      return;
1282    }
1283
1284  if (zero_p (iv0->step) && zero_p (iv1->step))
1285    {
1286      /* If both are invariants, this is a work for unswitching.  */
1287      return;
1288    }
1289
1290  civ = XNEW (struct iv);
1291  *civ = zero_p (iv0->step) ? *iv1: *iv0;
1292  record_use (data, cond_p, civ, stmt, USE_COMPARE);
1293}
1294
1295/* Returns true if expression EXPR is obviously invariant in LOOP,
1296   i.e. if all its operands are defined outside of the LOOP.  */
1297
1298bool
1299expr_invariant_in_loop_p (struct loop *loop, tree expr)
1300{
1301  basic_block def_bb;
1302  unsigned i, len;
1303
1304  if (is_gimple_min_invariant (expr))
1305    return true;
1306
1307  if (TREE_CODE (expr) == SSA_NAME)
1308    {
1309      def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1310      if (def_bb
1311	  && flow_bb_inside_loop_p (loop, def_bb))
1312	return false;
1313
1314      return true;
1315    }
1316
1317  if (!EXPR_P (expr))
1318    return false;
1319
1320  len = TREE_CODE_LENGTH (TREE_CODE (expr));
1321  for (i = 0; i < len; i++)
1322    if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1323      return false;
1324
1325  return true;
1326}
1327
1328/* Cumulates the steps of indices into DATA and replaces their values with the
1329   initial ones.  Returns false when the value of the index cannot be determined.
1330   Callback for for_each_index.  */
1331
1332struct ifs_ivopts_data
1333{
1334  struct ivopts_data *ivopts_data;
1335  tree stmt;
1336  tree *step_p;
1337};
1338
1339static bool
1340idx_find_step (tree base, tree *idx, void *data)
1341{
1342  struct ifs_ivopts_data *dta = data;
1343  struct iv *iv;
1344  tree step, iv_base, iv_step, lbound, off;
1345  struct loop *loop = dta->ivopts_data->current_loop;
1346
1347  if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1348      || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1349    return false;
1350
1351  /* If base is a component ref, require that the offset of the reference
1352     be invariant.  */
1353  if (TREE_CODE (base) == COMPONENT_REF)
1354    {
1355      off = component_ref_field_offset (base);
1356      return expr_invariant_in_loop_p (loop, off);
1357    }
1358
1359  /* If base is array, first check whether we will be able to move the
1360     reference out of the loop (in order to take its address in strength
1361     reduction).  In order for this to work we need both lower bound
1362     and step to be loop invariants.  */
1363  if (TREE_CODE (base) == ARRAY_REF)
1364    {
1365      step = array_ref_element_size (base);
1366      lbound = array_ref_low_bound (base);
1367
1368      if (!expr_invariant_in_loop_p (loop, step)
1369	  || !expr_invariant_in_loop_p (loop, lbound))
1370	return false;
1371    }
1372
1373  if (TREE_CODE (*idx) != SSA_NAME)
1374    return true;
1375
1376  iv = get_iv (dta->ivopts_data, *idx);
1377  if (!iv)
1378    return false;
1379
1380  /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1381	  *&x[0], which is not folded and does not trigger the
1382	  ARRAY_REF path below.  */
1383  *idx = iv->base;
1384
1385  if (!iv->step)
1386    return true;
1387
1388  if (TREE_CODE (base) == ARRAY_REF)
1389    {
1390      step = array_ref_element_size (base);
1391
1392      /* We only handle addresses whose step is an integer constant.  */
1393      if (TREE_CODE (step) != INTEGER_CST)
1394	return false;
1395    }
1396  else
1397    /* The step for pointer arithmetics already is 1 byte.  */
1398    step = build_int_cst (sizetype, 1);
1399
1400  iv_base = iv->base;
1401  iv_step = iv->step;
1402  if (!convert_affine_scev (dta->ivopts_data->current_loop,
1403			    sizetype, &iv_base, &iv_step, dta->stmt,
1404			    false))
1405    {
1406      /* The index might wrap.  */
1407      return false;
1408    }
1409
1410  step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1411
1412  if (!*dta->step_p)
1413    *dta->step_p = step;
1414  else
1415    *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1416
1417  return true;
1418}
1419
1420/* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1421   object is passed to it in DATA.  */
1422
1423static bool
1424idx_record_use (tree base, tree *idx,
1425		void *data)
1426{
1427  find_interesting_uses_op (data, *idx);
1428  if (TREE_CODE (base) == ARRAY_REF)
1429    {
1430      find_interesting_uses_op (data, array_ref_element_size (base));
1431      find_interesting_uses_op (data, array_ref_low_bound (base));
1432    }
1433  return true;
1434}
1435
1436/* Returns true if memory reference REF may be unaligned.  */
1437
1438static bool
1439may_be_unaligned_p (tree ref)
1440{
1441  tree base;
1442  tree base_type;
1443  HOST_WIDE_INT bitsize;
1444  HOST_WIDE_INT bitpos;
1445  tree toffset;
1446  enum machine_mode mode;
1447  int unsignedp, volatilep;
1448  unsigned base_align;
1449
1450  /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1451     thus they are not misaligned.  */
1452  if (TREE_CODE (ref) == TARGET_MEM_REF)
1453    return false;
1454
1455  /* The test below is basically copy of what expr.c:normal_inner_ref
1456     does to check whether the object must be loaded by parts when
1457     STRICT_ALIGNMENT is true.  */
1458  base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1459			      &unsignedp, &volatilep, true);
1460  base_type = TREE_TYPE (base);
1461  base_align = TYPE_ALIGN (base_type);
1462
1463  if (mode != BLKmode
1464      && (base_align < GET_MODE_ALIGNMENT (mode)
1465	  || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1466	  || bitpos % BITS_PER_UNIT != 0))
1467    return true;
1468
1469  return false;
1470}
1471
1472/* Return true if EXPR may be non-addressable.   */
1473
1474static bool
1475may_be_nonaddressable_p (tree expr)
1476{
1477  switch (TREE_CODE (expr))
1478    {
1479    case COMPONENT_REF:
1480      return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1481	     || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1482
1483    case ARRAY_REF:
1484    case ARRAY_RANGE_REF:
1485      return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1486
1487    case VIEW_CONVERT_EXPR:
1488      /* This kind of view-conversions may wrap non-addressable objects
1489	 and make them look addressable.  After some processing the
1490	 non-addressability may be uncovered again, causing ADDR_EXPRs
1491	 of inappropriate objects to be built.  */
1492      return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1493	     && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1494
1495    default:
1496      break;
1497    }
1498
1499  return false;
1500}
1501
1502/* Finds addresses in *OP_P inside STMT.  */
1503
1504static void
1505find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1506{
1507  tree base = *op_p, step = NULL;
1508  struct iv *civ;
1509  struct ifs_ivopts_data ifs_ivopts_data;
1510
1511  /* Do not play with volatile memory references.  A bit too conservative,
1512     perhaps, but safe.  */
1513  if (stmt_ann (stmt)->has_volatile_ops)
1514    goto fail;
1515
1516  /* Ignore bitfields for now.  Not really something terribly complicated
1517     to handle.  TODO.  */
1518  if (TREE_CODE (base) == BIT_FIELD_REF)
1519    goto fail;
1520
1521  if (may_be_nonaddressable_p (base))
1522    goto fail;
1523
1524  if (STRICT_ALIGNMENT
1525      && may_be_unaligned_p (base))
1526    goto fail;
1527
1528  base = unshare_expr (base);
1529
1530  if (TREE_CODE (base) == TARGET_MEM_REF)
1531    {
1532      tree type = build_pointer_type (TREE_TYPE (base));
1533      tree astep;
1534
1535      if (TMR_BASE (base)
1536	  && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1537	{
1538	  civ = get_iv (data, TMR_BASE (base));
1539	  if (!civ)
1540	    goto fail;
1541
1542	  TMR_BASE (base) = civ->base;
1543	  step = civ->step;
1544	}
1545      if (TMR_INDEX (base)
1546	  && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1547	{
1548	  civ = get_iv (data, TMR_INDEX (base));
1549	  if (!civ)
1550	    goto fail;
1551
1552	  TMR_INDEX (base) = civ->base;
1553	  astep = civ->step;
1554
1555	  if (astep)
1556	    {
1557	      if (TMR_STEP (base))
1558		astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1559
1560	      if (step)
1561		step = fold_build2 (PLUS_EXPR, type, step, astep);
1562	      else
1563		step = astep;
1564	    }
1565	}
1566
1567      if (zero_p (step))
1568	goto fail;
1569      base = tree_mem_ref_addr (type, base);
1570    }
1571  else
1572    {
1573      ifs_ivopts_data.ivopts_data = data;
1574      ifs_ivopts_data.stmt = stmt;
1575      ifs_ivopts_data.step_p = &step;
1576      if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1577	  || zero_p (step))
1578	goto fail;
1579
1580      gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1581      gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1582
1583      base = build_fold_addr_expr (base);
1584
1585      /* Substituting bases of IVs into the base expression might
1586	 have caused folding opportunities.  */
1587      if (TREE_CODE (base) == ADDR_EXPR)
1588	{
1589	  tree *ref = &TREE_OPERAND (base, 0);
1590	  while (handled_component_p (*ref))
1591	    ref = &TREE_OPERAND (*ref, 0);
1592	  if (TREE_CODE (*ref) == INDIRECT_REF)
1593	    *ref = fold_indirect_ref (*ref);
1594	}
1595    }
1596
1597  civ = alloc_iv (base, step);
1598  record_use (data, op_p, civ, stmt, USE_ADDRESS);
1599  return;
1600
1601fail:
1602  for_each_index (op_p, idx_record_use, data);
1603}
1604
1605/* Finds and records invariants used in STMT.  */
1606
1607static void
1608find_invariants_stmt (struct ivopts_data *data, tree stmt)
1609{
1610  ssa_op_iter iter;
1611  use_operand_p use_p;
1612  tree op;
1613
1614  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1615    {
1616      op = USE_FROM_PTR (use_p);
1617      record_invariant (data, op, false);
1618    }
1619}
1620
1621/* Finds interesting uses of induction variables in the statement STMT.  */
1622
1623static void
1624find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1625{
1626  struct iv *iv;
1627  tree op, lhs, rhs;
1628  ssa_op_iter iter;
1629  use_operand_p use_p;
1630
1631  find_invariants_stmt (data, stmt);
1632
1633  if (TREE_CODE (stmt) == COND_EXPR)
1634    {
1635      find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1636      return;
1637    }
1638
1639  if (TREE_CODE (stmt) == MODIFY_EXPR)
1640    {
1641      lhs = TREE_OPERAND (stmt, 0);
1642      rhs = TREE_OPERAND (stmt, 1);
1643
1644      if (TREE_CODE (lhs) == SSA_NAME)
1645	{
1646	  /* If the statement defines an induction variable, the uses are not
1647	     interesting by themselves.  */
1648
1649	  iv = get_iv (data, lhs);
1650
1651	  if (iv && !zero_p (iv->step))
1652	    return;
1653	}
1654
1655      switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1656	{
1657	case tcc_comparison:
1658	  find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1659	  return;
1660
1661	case tcc_reference:
1662	  find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1663	  if (REFERENCE_CLASS_P (lhs))
1664	    find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1665	  return;
1666
1667	default: ;
1668	}
1669
1670      if (REFERENCE_CLASS_P (lhs)
1671	  && is_gimple_val (rhs))
1672	{
1673	  find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1674	  find_interesting_uses_op (data, rhs);
1675	  return;
1676	}
1677
1678      /* TODO -- we should also handle address uses of type
1679
1680	 memory = call (whatever);
1681
1682	 and
1683
1684	 call (memory).  */
1685    }
1686
1687  if (TREE_CODE (stmt) == PHI_NODE
1688      && bb_for_stmt (stmt) == data->current_loop->header)
1689    {
1690      lhs = PHI_RESULT (stmt);
1691      iv = get_iv (data, lhs);
1692
1693      if (iv && !zero_p (iv->step))
1694	return;
1695    }
1696
1697  FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1698    {
1699      op = USE_FROM_PTR (use_p);
1700
1701      if (TREE_CODE (op) != SSA_NAME)
1702	continue;
1703
1704      iv = get_iv (data, op);
1705      if (!iv)
1706	continue;
1707
1708      find_interesting_uses_op (data, op);
1709    }
1710}
1711
1712/* Finds interesting uses of induction variables outside of loops
1713   on loop exit edge EXIT.  */
1714
1715static void
1716find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1717{
1718  tree phi, def;
1719
1720  for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1721    {
1722      def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1723      find_interesting_uses_op (data, def);
1724    }
1725}
1726
1727/* Finds uses of the induction variables that are interesting.  */
1728
1729static void
1730find_interesting_uses (struct ivopts_data *data)
1731{
1732  basic_block bb;
1733  block_stmt_iterator bsi;
1734  tree phi;
1735  basic_block *body = get_loop_body (data->current_loop);
1736  unsigned i;
1737  struct version_info *info;
1738  edge e;
1739
1740  if (dump_file && (dump_flags & TDF_DETAILS))
1741    fprintf (dump_file, "Uses:\n\n");
1742
1743  for (i = 0; i < data->current_loop->num_nodes; i++)
1744    {
1745      edge_iterator ei;
1746      bb = body[i];
1747
1748      FOR_EACH_EDGE (e, ei, bb->succs)
1749	if (e->dest != EXIT_BLOCK_PTR
1750	    && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1751	  find_interesting_uses_outside (data, e);
1752
1753      for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1754	find_interesting_uses_stmt (data, phi);
1755      for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1756	find_interesting_uses_stmt (data, bsi_stmt (bsi));
1757    }
1758
1759  if (dump_file && (dump_flags & TDF_DETAILS))
1760    {
1761      bitmap_iterator bi;
1762
1763      fprintf (dump_file, "\n");
1764
1765      EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1766	{
1767	  info = ver_info (data, i);
1768	  if (info->inv_id)
1769	    {
1770	      fprintf (dump_file, "  ");
1771	      print_generic_expr (dump_file, info->name, TDF_SLIM);
1772	      fprintf (dump_file, " is invariant (%d)%s\n",
1773		       info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1774	    }
1775	}
1776
1777      fprintf (dump_file, "\n");
1778    }
1779
1780  free (body);
1781}
1782
1783/* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1784   is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1785   we are at the top-level of the processed address.  */
1786
1787static tree
1788strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1789		unsigned HOST_WIDE_INT *offset)
1790{
1791  tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1792  enum tree_code code;
1793  tree type, orig_type = TREE_TYPE (expr);
1794  unsigned HOST_WIDE_INT off0, off1, st;
1795  tree orig_expr = expr;
1796
1797  STRIP_NOPS (expr);
1798
1799  type = TREE_TYPE (expr);
1800  code = TREE_CODE (expr);
1801  *offset = 0;
1802
1803  switch (code)
1804    {
1805    case INTEGER_CST:
1806      if (!cst_and_fits_in_hwi (expr)
1807	  || zero_p (expr))
1808	return orig_expr;
1809
1810      *offset = int_cst_value (expr);
1811      return build_int_cst (orig_type, 0);
1812
1813    case PLUS_EXPR:
1814    case MINUS_EXPR:
1815      op0 = TREE_OPERAND (expr, 0);
1816      op1 = TREE_OPERAND (expr, 1);
1817
1818      op0 = strip_offset_1 (op0, false, false, &off0);
1819      op1 = strip_offset_1 (op1, false, false, &off1);
1820
1821      *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1822      if (op0 == TREE_OPERAND (expr, 0)
1823	  && op1 == TREE_OPERAND (expr, 1))
1824	return orig_expr;
1825
1826      if (zero_p (op1))
1827	expr = op0;
1828      else if (zero_p (op0))
1829	{
1830	  if (code == PLUS_EXPR)
1831	    expr = op1;
1832	  else
1833	    expr = fold_build1 (NEGATE_EXPR, type, op1);
1834	}
1835      else
1836	expr = fold_build2 (code, type, op0, op1);
1837
1838      return fold_convert (orig_type, expr);
1839
1840    case ARRAY_REF:
1841      if (!inside_addr)
1842	return orig_expr;
1843
1844      step = array_ref_element_size (expr);
1845      if (!cst_and_fits_in_hwi (step))
1846	break;
1847
1848      st = int_cst_value (step);
1849      op1 = TREE_OPERAND (expr, 1);
1850      op1 = strip_offset_1 (op1, false, false, &off1);
1851      *offset = off1 * st;
1852
1853      if (top_compref
1854	  && zero_p (op1))
1855	{
1856	  /* Strip the component reference completely.  */
1857	  op0 = TREE_OPERAND (expr, 0);
1858	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1859	  *offset += off0;
1860	  return op0;
1861	}
1862      break;
1863
1864    case COMPONENT_REF:
1865      if (!inside_addr)
1866	return orig_expr;
1867
1868      tmp = component_ref_field_offset (expr);
1869      if (top_compref
1870	  && cst_and_fits_in_hwi (tmp))
1871	{
1872	  /* Strip the component reference completely.  */
1873	  op0 = TREE_OPERAND (expr, 0);
1874	  op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1875	  *offset = off0 + int_cst_value (tmp);
1876	  return op0;
1877	}
1878      break;
1879
1880    case ADDR_EXPR:
1881      op0 = TREE_OPERAND (expr, 0);
1882      op0 = strip_offset_1 (op0, true, true, &off0);
1883      *offset += off0;
1884
1885      if (op0 == TREE_OPERAND (expr, 0))
1886	return orig_expr;
1887
1888      expr = build_fold_addr_expr (op0);
1889      return fold_convert (orig_type, expr);
1890
1891    case INDIRECT_REF:
1892      inside_addr = false;
1893      break;
1894
1895    default:
1896      return orig_expr;
1897    }
1898
1899  /* Default handling of expressions for that we want to recurse into
1900     the first operand.  */
1901  op0 = TREE_OPERAND (expr, 0);
1902  op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1903  *offset += off0;
1904
1905  if (op0 == TREE_OPERAND (expr, 0)
1906      && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1907    return orig_expr;
1908
1909  expr = copy_node (expr);
1910  TREE_OPERAND (expr, 0) = op0;
1911  if (op1)
1912    TREE_OPERAND (expr, 1) = op1;
1913
1914  /* Inside address, we might strip the top level component references,
1915     thus changing type of the expression.  Handling of ADDR_EXPR
1916     will fix that.  */
1917  expr = fold_convert (orig_type, expr);
1918
1919  return expr;
1920}
1921
1922/* Strips constant offsets from EXPR and stores them to OFFSET.  */
1923
1924static tree
1925strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1926{
1927  return strip_offset_1 (expr, false, false, offset);
1928}
1929
1930/* Returns variant of TYPE that can be used as base for different uses.
1931   We return unsigned type with the same precision, which avoids problems
1932   with overflows.  */
1933
1934static tree
1935generic_type_for (tree type)
1936{
1937  if (POINTER_TYPE_P (type))
1938    return unsigned_type_for (type);
1939
1940  if (TYPE_UNSIGNED (type))
1941    return type;
1942
1943  return unsigned_type_for (type);
1944}
1945
1946/* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1947   the bitmap to that we should store it.  */
1948
1949static struct ivopts_data *fd_ivopts_data;
1950static tree
1951find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1952{
1953  bitmap *depends_on = data;
1954  struct version_info *info;
1955
1956  if (TREE_CODE (*expr_p) != SSA_NAME)
1957    return NULL_TREE;
1958  info = name_info (fd_ivopts_data, *expr_p);
1959
1960  if (!info->inv_id || info->has_nonlin_use)
1961    return NULL_TREE;
1962
1963  if (!*depends_on)
1964    *depends_on = BITMAP_ALLOC (NULL);
1965  bitmap_set_bit (*depends_on, info->inv_id);
1966
1967  return NULL_TREE;
1968}
1969
1970/* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1971   position to POS.  If USE is not NULL, the candidate is set as related to
1972   it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1973   replacement of the final value of the iv by a direct computation.  */
1974
1975static struct iv_cand *
1976add_candidate_1 (struct ivopts_data *data,
1977		 tree base, tree step, bool important, enum iv_position pos,
1978		 struct iv_use *use, tree incremented_at)
1979{
1980  unsigned i;
1981  struct iv_cand *cand = NULL;
1982  tree type, orig_type;
1983
1984  if (base)
1985    {
1986      orig_type = TREE_TYPE (base);
1987      type = generic_type_for (orig_type);
1988      if (type != orig_type)
1989	{
1990	  base = fold_convert (type, base);
1991	  if (step)
1992	    step = fold_convert (type, step);
1993	}
1994    }
1995
1996  for (i = 0; i < n_iv_cands (data); i++)
1997    {
1998      cand = iv_cand (data, i);
1999
2000      if (cand->pos != pos)
2001	continue;
2002
2003      if (cand->incremented_at != incremented_at)
2004	continue;
2005
2006      if (!cand->iv)
2007	{
2008	  if (!base && !step)
2009	    break;
2010
2011	  continue;
2012	}
2013
2014      if (!base && !step)
2015	continue;
2016
2017      if (!operand_equal_p (base, cand->iv->base, 0))
2018	continue;
2019
2020      if (zero_p (cand->iv->step))
2021	{
2022	  if (zero_p (step))
2023	    break;
2024	}
2025      else
2026	{
2027	  if (step && operand_equal_p (step, cand->iv->step, 0))
2028	    break;
2029	}
2030    }
2031
2032  if (i == n_iv_cands (data))
2033    {
2034      cand = XCNEW (struct iv_cand);
2035      cand->id = i;
2036
2037      if (!base && !step)
2038	cand->iv = NULL;
2039      else
2040	cand->iv = alloc_iv (base, step);
2041
2042      cand->pos = pos;
2043      if (pos != IP_ORIGINAL && cand->iv)
2044	{
2045	  cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2046	  cand->var_after = cand->var_before;
2047	}
2048      cand->important = important;
2049      cand->incremented_at = incremented_at;
2050      VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2051
2052      if (step
2053	  && TREE_CODE (step) != INTEGER_CST)
2054	{
2055	  fd_ivopts_data = data;
2056	  walk_tree (&step, find_depends, &cand->depends_on, NULL);
2057	}
2058
2059      if (dump_file && (dump_flags & TDF_DETAILS))
2060	dump_cand (dump_file, cand);
2061    }
2062
2063  if (important && !cand->important)
2064    {
2065      cand->important = true;
2066      if (dump_file && (dump_flags & TDF_DETAILS))
2067	fprintf (dump_file, "Candidate %d is important\n", cand->id);
2068    }
2069
2070  if (use)
2071    {
2072      bitmap_set_bit (use->related_cands, i);
2073      if (dump_file && (dump_flags & TDF_DETAILS))
2074	fprintf (dump_file, "Candidate %d is related to use %d\n",
2075		 cand->id, use->id);
2076    }
2077
2078  return cand;
2079}
2080
2081/* Returns true if incrementing the induction variable at the end of the LOOP
2082   is allowed.
2083
2084   The purpose is to avoid splitting latch edge with a biv increment, thus
2085   creating a jump, possibly confusing other optimization passes and leaving
2086   less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2087   is not available (so we do not have a better alternative), or if the latch
2088   edge is already nonempty.  */
2089
2090static bool
2091allow_ip_end_pos_p (struct loop *loop)
2092{
2093  if (!ip_normal_pos (loop))
2094    return true;
2095
2096  if (!empty_block_p (ip_end_pos (loop)))
2097    return true;
2098
2099  return false;
2100}
2101
2102/* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2103   position to POS.  If USE is not NULL, the candidate is set as related to
2104   it.  The candidate computation is scheduled on all available positions.  */
2105
2106static void
2107add_candidate (struct ivopts_data *data,
2108	       tree base, tree step, bool important, struct iv_use *use)
2109{
2110  if (ip_normal_pos (data->current_loop))
2111    add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2112  if (ip_end_pos (data->current_loop)
2113      && allow_ip_end_pos_p (data->current_loop))
2114    add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2115}
2116
2117/* Add a standard "0 + 1 * iteration" iv candidate for a
2118   type with SIZE bits.  */
2119
2120static void
2121add_standard_iv_candidates_for_size (struct ivopts_data *data,
2122				     unsigned int size)
2123{
2124  tree type = lang_hooks.types.type_for_size (size, true);
2125  add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2126		 true, NULL);
2127}
2128
2129/* Adds standard iv candidates.  */
2130
2131static void
2132add_standard_iv_candidates (struct ivopts_data *data)
2133{
2134  add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2135
2136  /* The same for a double-integer type if it is still fast enough.  */
2137  if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2138    add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2139}
2140
2141
2142/* Adds candidates bases on the old induction variable IV.  */
2143
2144static void
2145add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2146{
2147  tree phi, def;
2148  struct iv_cand *cand;
2149
2150  add_candidate (data, iv->base, iv->step, true, NULL);
2151
2152  /* The same, but with initial value zero.  */
2153  add_candidate (data,
2154		 build_int_cst (TREE_TYPE (iv->base), 0),
2155		 iv->step, true, NULL);
2156
2157  phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2158  if (TREE_CODE (phi) == PHI_NODE)
2159    {
2160      /* Additionally record the possibility of leaving the original iv
2161	 untouched.  */
2162      def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2163      cand = add_candidate_1 (data,
2164			      iv->base, iv->step, true, IP_ORIGINAL, NULL,
2165			      SSA_NAME_DEF_STMT (def));
2166      cand->var_before = iv->ssa_name;
2167      cand->var_after = def;
2168    }
2169}
2170
2171/* Adds candidates based on the old induction variables.  */
2172
2173static void
2174add_old_ivs_candidates (struct ivopts_data *data)
2175{
2176  unsigned i;
2177  struct iv *iv;
2178  bitmap_iterator bi;
2179
2180  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2181    {
2182      iv = ver_info (data, i)->iv;
2183      if (iv && iv->biv_p && !zero_p (iv->step))
2184	add_old_iv_candidates (data, iv);
2185    }
2186}
2187
2188/* Adds candidates based on the value of the induction variable IV and USE.  */
2189
2190static void
2191add_iv_value_candidates (struct ivopts_data *data,
2192			 struct iv *iv, struct iv_use *use)
2193{
2194  unsigned HOST_WIDE_INT offset;
2195  tree base;
2196
2197  add_candidate (data, iv->base, iv->step, false, use);
2198
2199  /* The same, but with initial value zero.  Make such variable important,
2200     since it is generic enough so that possibly many uses may be based
2201     on it.  */
2202  add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2203		 iv->step, true, use);
2204
2205  /* Third, try removing the constant offset.  */
2206  base = strip_offset (iv->base, &offset);
2207  if (offset)
2208    add_candidate (data, base, iv->step, false, use);
2209}
2210
2211/* Adds candidates based on the uses.  */
2212
2213static void
2214add_derived_ivs_candidates (struct ivopts_data *data)
2215{
2216  unsigned i;
2217
2218  for (i = 0; i < n_iv_uses (data); i++)
2219    {
2220      struct iv_use *use = iv_use (data, i);
2221
2222      if (!use)
2223	continue;
2224
2225      switch (use->type)
2226	{
2227	case USE_NONLINEAR_EXPR:
2228	case USE_COMPARE:
2229	case USE_ADDRESS:
2230	  /* Just add the ivs based on the value of the iv used here.  */
2231	  add_iv_value_candidates (data, use->iv, use);
2232	  break;
2233
2234	default:
2235	  gcc_unreachable ();
2236	}
2237    }
2238}
2239
2240/* Record important candidates and add them to related_cands bitmaps
2241   if needed.  */
2242
2243static void
2244record_important_candidates (struct ivopts_data *data)
2245{
2246  unsigned i;
2247  struct iv_use *use;
2248
2249  for (i = 0; i < n_iv_cands (data); i++)
2250    {
2251      struct iv_cand *cand = iv_cand (data, i);
2252
2253      if (cand->important)
2254	bitmap_set_bit (data->important_candidates, i);
2255    }
2256
2257  data->consider_all_candidates = (n_iv_cands (data)
2258				   <= CONSIDER_ALL_CANDIDATES_BOUND);
2259
2260  if (data->consider_all_candidates)
2261    {
2262      /* We will not need "related_cands" bitmaps in this case,
2263	 so release them to decrease peak memory consumption.  */
2264      for (i = 0; i < n_iv_uses (data); i++)
2265	{
2266	  use = iv_use (data, i);
2267	  BITMAP_FREE (use->related_cands);
2268	}
2269    }
2270  else
2271    {
2272      /* Add important candidates to the related_cands bitmaps.  */
2273      for (i = 0; i < n_iv_uses (data); i++)
2274	bitmap_ior_into (iv_use (data, i)->related_cands,
2275			 data->important_candidates);
2276    }
2277}
2278
2279/* Finds the candidates for the induction variables.  */
2280
2281static void
2282find_iv_candidates (struct ivopts_data *data)
2283{
2284  /* Add commonly used ivs.  */
2285  add_standard_iv_candidates (data);
2286
2287  /* Add old induction variables.  */
2288  add_old_ivs_candidates (data);
2289
2290  /* Add induction variables derived from uses.  */
2291  add_derived_ivs_candidates (data);
2292
2293  /* Record the important candidates.  */
2294  record_important_candidates (data);
2295}
2296
2297/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2298   If consider_all_candidates is true, we use a two-dimensional array, otherwise
2299   we allocate a simple list to every use.  */
2300
2301static void
2302alloc_use_cost_map (struct ivopts_data *data)
2303{
2304  unsigned i, size, s, j;
2305
2306  for (i = 0; i < n_iv_uses (data); i++)
2307    {
2308      struct iv_use *use = iv_use (data, i);
2309      bitmap_iterator bi;
2310
2311      if (data->consider_all_candidates)
2312	size = n_iv_cands (data);
2313      else
2314	{
2315	  s = 0;
2316	  EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2317	    {
2318	      s++;
2319	    }
2320
2321	  /* Round up to the power of two, so that moduling by it is fast.  */
2322	  for (size = 1; size < s; size <<= 1)
2323	    continue;
2324	}
2325
2326      use->n_map_members = size;
2327      use->cost_map = XCNEWVEC (struct cost_pair, size);
2328    }
2329}
2330
2331/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2332   on invariants DEPENDS_ON and that the value used in expressing it
2333   is VALUE.*/
2334
2335static void
2336set_use_iv_cost (struct ivopts_data *data,
2337		 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2338		 bitmap depends_on, tree value)
2339{
2340  unsigned i, s;
2341
2342  if (cost == INFTY)
2343    {
2344      BITMAP_FREE (depends_on);
2345      return;
2346    }
2347
2348  if (data->consider_all_candidates)
2349    {
2350      use->cost_map[cand->id].cand = cand;
2351      use->cost_map[cand->id].cost = cost;
2352      use->cost_map[cand->id].depends_on = depends_on;
2353      use->cost_map[cand->id].value = value;
2354      return;
2355    }
2356
2357  /* n_map_members is a power of two, so this computes modulo.  */
2358  s = cand->id & (use->n_map_members - 1);
2359  for (i = s; i < use->n_map_members; i++)
2360    if (!use->cost_map[i].cand)
2361      goto found;
2362  for (i = 0; i < s; i++)
2363    if (!use->cost_map[i].cand)
2364      goto found;
2365
2366  gcc_unreachable ();
2367
2368found:
2369  use->cost_map[i].cand = cand;
2370  use->cost_map[i].cost = cost;
2371  use->cost_map[i].depends_on = depends_on;
2372  use->cost_map[i].value = value;
2373}
2374
2375/* Gets cost of (USE, CANDIDATE) pair.  */
2376
2377static struct cost_pair *
2378get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2379		 struct iv_cand *cand)
2380{
2381  unsigned i, s;
2382  struct cost_pair *ret;
2383
2384  if (!cand)
2385    return NULL;
2386
2387  if (data->consider_all_candidates)
2388    {
2389      ret = use->cost_map + cand->id;
2390      if (!ret->cand)
2391	return NULL;
2392
2393      return ret;
2394    }
2395
2396  /* n_map_members is a power of two, so this computes modulo.  */
2397  s = cand->id & (use->n_map_members - 1);
2398  for (i = s; i < use->n_map_members; i++)
2399    if (use->cost_map[i].cand == cand)
2400      return use->cost_map + i;
2401
2402  for (i = 0; i < s; i++)
2403    if (use->cost_map[i].cand == cand)
2404      return use->cost_map + i;
2405
2406  return NULL;
2407}
2408
2409/* Returns estimate on cost of computing SEQ.  */
2410
2411static unsigned
2412seq_cost (rtx seq)
2413{
2414  unsigned cost = 0;
2415  rtx set;
2416
2417  for (; seq; seq = NEXT_INSN (seq))
2418    {
2419      set = single_set (seq);
2420      if (set)
2421	cost += rtx_cost (set, SET);
2422      else
2423	cost++;
2424    }
2425
2426  return cost;
2427}
2428
2429/* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2430static rtx
2431produce_memory_decl_rtl (tree obj, int *regno)
2432{
2433  rtx x;
2434
2435  gcc_assert (obj);
2436  if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2437    {
2438      const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2439      x = gen_rtx_SYMBOL_REF (Pmode, name);
2440    }
2441  else
2442    x = gen_raw_REG (Pmode, (*regno)++);
2443
2444  return gen_rtx_MEM (DECL_MODE (obj), x);
2445}
2446
2447/* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2448   walk_tree.  DATA contains the actual fake register number.  */
2449
2450static tree
2451prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2452{
2453  tree obj = NULL_TREE;
2454  rtx x = NULL_RTX;
2455  int *regno = data;
2456
2457  switch (TREE_CODE (*expr_p))
2458    {
2459    case ADDR_EXPR:
2460      for (expr_p = &TREE_OPERAND (*expr_p, 0);
2461	   handled_component_p (*expr_p);
2462	   expr_p = &TREE_OPERAND (*expr_p, 0))
2463	continue;
2464      obj = *expr_p;
2465      if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2466        x = produce_memory_decl_rtl (obj, regno);
2467      break;
2468
2469    case SSA_NAME:
2470      *ws = 0;
2471      obj = SSA_NAME_VAR (*expr_p);
2472      if (!DECL_RTL_SET_P (obj))
2473	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2474      break;
2475
2476    case VAR_DECL:
2477    case PARM_DECL:
2478    case RESULT_DECL:
2479      *ws = 0;
2480      obj = *expr_p;
2481
2482      if (DECL_RTL_SET_P (obj))
2483	break;
2484
2485      if (DECL_MODE (obj) == BLKmode)
2486	x = produce_memory_decl_rtl (obj, regno);
2487      else
2488	x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2489
2490      break;
2491
2492    default:
2493      break;
2494    }
2495
2496  if (x)
2497    {
2498      VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2499      SET_DECL_RTL (obj, x);
2500    }
2501
2502  return NULL_TREE;
2503}
2504
2505/* Determines cost of the computation of EXPR.  */
2506
2507static unsigned
2508computation_cost (tree expr)
2509{
2510  rtx seq, rslt;
2511  tree type = TREE_TYPE (expr);
2512  unsigned cost;
2513  /* Avoid using hard regs in ways which may be unsupported.  */
2514  int regno = LAST_VIRTUAL_REGISTER + 1;
2515
2516  walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2517  start_sequence ();
2518  rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2519  seq = get_insns ();
2520  end_sequence ();
2521
2522  cost = seq_cost (seq);
2523  if (MEM_P (rslt))
2524    cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2525
2526  return cost;
2527}
2528
2529/* Returns variable containing the value of candidate CAND at statement AT.  */
2530
2531static tree
2532var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2533{
2534  if (stmt_after_increment (loop, cand, stmt))
2535    return cand->var_after;
2536  else
2537    return cand->var_before;
2538}
2539
2540/* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2541   but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2542
2543int
2544tree_int_cst_sign_bit (tree t)
2545{
2546  unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2547  unsigned HOST_WIDE_INT w;
2548
2549  if (bitno < HOST_BITS_PER_WIDE_INT)
2550    w = TREE_INT_CST_LOW (t);
2551  else
2552    {
2553      w = TREE_INT_CST_HIGH (t);
2554      bitno -= HOST_BITS_PER_WIDE_INT;
2555    }
2556
2557  return (w >> bitno) & 1;
2558}
2559
2560/* If we can prove that TOP = cst * BOT for some constant cst,
2561   store cst to MUL and return true.  Otherwise return false.
2562   The returned value is always sign-extended, regardless of the
2563   signedness of TOP and BOT.  */
2564
2565static bool
2566constant_multiple_of (tree top, tree bot, double_int *mul)
2567{
2568  tree mby;
2569  enum tree_code code;
2570  double_int res, p0, p1;
2571  unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2572
2573  STRIP_NOPS (top);
2574  STRIP_NOPS (bot);
2575
2576  if (operand_equal_p (top, bot, 0))
2577    {
2578      *mul = double_int_one;
2579      return true;
2580    }
2581
2582  code = TREE_CODE (top);
2583  switch (code)
2584    {
2585    case MULT_EXPR:
2586      mby = TREE_OPERAND (top, 1);
2587      if (TREE_CODE (mby) != INTEGER_CST)
2588	return false;
2589
2590      if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2591	return false;
2592
2593      *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2594			      precision);
2595      return true;
2596
2597    case PLUS_EXPR:
2598    case MINUS_EXPR:
2599      if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2600	  || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2601	return false;
2602
2603      if (code == MINUS_EXPR)
2604	p1 = double_int_neg (p1);
2605      *mul = double_int_sext (double_int_add (p0, p1), precision);
2606      return true;
2607
2608    case INTEGER_CST:
2609      if (TREE_CODE (bot) != INTEGER_CST)
2610	return false;
2611
2612      p0 = double_int_sext (tree_to_double_int (bot), precision);
2613      p1 = double_int_sext (tree_to_double_int (top), precision);
2614      if (double_int_zero_p (p1))
2615	return false;
2616      *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2617			      precision);
2618      return double_int_zero_p (res);
2619
2620    default:
2621      return false;
2622    }
2623}
2624
2625/* Sets COMB to CST.  */
2626
2627static void
2628aff_combination_const (struct affine_tree_combination *comb, tree type,
2629		       unsigned HOST_WIDE_INT cst)
2630{
2631  unsigned prec = TYPE_PRECISION (type);
2632
2633  comb->type = type;
2634  comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2635
2636  comb->n = 0;
2637  comb->rest = NULL_TREE;
2638  comb->offset = cst & comb->mask;
2639}
2640
2641/* Sets COMB to single element ELT.  */
2642
2643static void
2644aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2645{
2646  unsigned prec = TYPE_PRECISION (type);
2647
2648  comb->type = type;
2649  comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2650
2651  comb->n = 1;
2652  comb->elts[0] = elt;
2653  comb->coefs[0] = 1;
2654  comb->rest = NULL_TREE;
2655  comb->offset = 0;
2656}
2657
2658/* Scales COMB by SCALE.  */
2659
2660static void
2661aff_combination_scale (struct affine_tree_combination *comb,
2662		       unsigned HOST_WIDE_INT scale)
2663{
2664  unsigned i, j;
2665
2666  if (scale == 1)
2667    return;
2668
2669  if (scale == 0)
2670    {
2671      aff_combination_const (comb, comb->type, 0);
2672      return;
2673    }
2674
2675  comb->offset = (scale * comb->offset) & comb->mask;
2676  for (i = 0, j = 0; i < comb->n; i++)
2677    {
2678      comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2679      comb->elts[j] = comb->elts[i];
2680      if (comb->coefs[j] != 0)
2681	j++;
2682    }
2683  comb->n = j;
2684
2685  if (comb->rest)
2686    {
2687      if (comb->n < MAX_AFF_ELTS)
2688	{
2689	  comb->coefs[comb->n] = scale;
2690	  comb->elts[comb->n] = comb->rest;
2691	  comb->rest = NULL_TREE;
2692	  comb->n++;
2693	}
2694      else
2695	comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2696				  build_int_cst_type (comb->type, scale));
2697    }
2698}
2699
2700/* Adds ELT * SCALE to COMB.  */
2701
2702static void
2703aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2704			 unsigned HOST_WIDE_INT scale)
2705{
2706  unsigned i;
2707
2708  if (scale == 0)
2709    return;
2710
2711  for (i = 0; i < comb->n; i++)
2712    if (operand_equal_p (comb->elts[i], elt, 0))
2713      {
2714	comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2715	if (comb->coefs[i])
2716	  return;
2717
2718	comb->n--;
2719	comb->coefs[i] = comb->coefs[comb->n];
2720	comb->elts[i] = comb->elts[comb->n];
2721
2722	if (comb->rest)
2723	  {
2724	    gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2725	    comb->coefs[comb->n] = 1;
2726	    comb->elts[comb->n] = comb->rest;
2727	    comb->rest = NULL_TREE;
2728	    comb->n++;
2729	  }
2730	return;
2731      }
2732  if (comb->n < MAX_AFF_ELTS)
2733    {
2734      comb->coefs[comb->n] = scale;
2735      comb->elts[comb->n] = elt;
2736      comb->n++;
2737      return;
2738    }
2739
2740  if (scale == 1)
2741    elt = fold_convert (comb->type, elt);
2742  else
2743    elt = fold_build2 (MULT_EXPR, comb->type,
2744		       fold_convert (comb->type, elt),
2745		       build_int_cst_type (comb->type, scale));
2746
2747  if (comb->rest)
2748    comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2749  else
2750    comb->rest = elt;
2751}
2752
2753/* Adds COMB2 to COMB1.  */
2754
2755static void
2756aff_combination_add (struct affine_tree_combination *comb1,
2757		     struct affine_tree_combination *comb2)
2758{
2759  unsigned i;
2760
2761  comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2762  for (i = 0; i < comb2->n; i++)
2763    aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2764  if (comb2->rest)
2765    aff_combination_add_elt (comb1, comb2->rest, 1);
2766}
2767
2768/* Convert COMB to TYPE.  */
2769
2770static void
2771aff_combination_convert (tree type, struct affine_tree_combination *comb)
2772{
2773  unsigned prec = TYPE_PRECISION (type);
2774  unsigned i;
2775
2776  /* If the precision of both types is the same, it suffices to change the type
2777     of the whole combination -- the elements are allowed to have another type
2778     equivalent wrto STRIP_NOPS.  */
2779  if (prec == TYPE_PRECISION (comb->type))
2780    {
2781      comb->type = type;
2782      return;
2783    }
2784
2785  comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2786  comb->offset = comb->offset & comb->mask;
2787
2788  /* The type of the elements can be different from comb->type only as
2789     much as what STRIP_NOPS would remove.  We can just directly cast
2790     to TYPE.  */
2791  for (i = 0; i < comb->n; i++)
2792    comb->elts[i] = fold_convert (type, comb->elts[i]);
2793  if (comb->rest)
2794    comb->rest = fold_convert (type, comb->rest);
2795
2796  comb->type = type;
2797}
2798
2799/* Splits EXPR into an affine combination of parts.  */
2800
2801static void
2802tree_to_aff_combination (tree expr, tree type,
2803			 struct affine_tree_combination *comb)
2804{
2805  struct affine_tree_combination tmp;
2806  enum tree_code code;
2807  tree cst, core, toffset;
2808  HOST_WIDE_INT bitpos, bitsize;
2809  enum machine_mode mode;
2810  int unsignedp, volatilep;
2811
2812  STRIP_NOPS (expr);
2813
2814  code = TREE_CODE (expr);
2815  switch (code)
2816    {
2817    case INTEGER_CST:
2818      aff_combination_const (comb, type, int_cst_value (expr));
2819      return;
2820
2821    case PLUS_EXPR:
2822    case MINUS_EXPR:
2823      tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2824      tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2825      if (code == MINUS_EXPR)
2826	aff_combination_scale (&tmp, -1);
2827      aff_combination_add (comb, &tmp);
2828      return;
2829
2830    case MULT_EXPR:
2831      cst = TREE_OPERAND (expr, 1);
2832      if (TREE_CODE (cst) != INTEGER_CST)
2833	break;
2834      tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2835      aff_combination_scale (comb, int_cst_value (cst));
2836      return;
2837
2838    case NEGATE_EXPR:
2839      tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2840      aff_combination_scale (comb, -1);
2841      return;
2842
2843    case ADDR_EXPR:
2844      core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2845				  &toffset, &mode, &unsignedp, &volatilep,
2846				  false);
2847      if (bitpos % BITS_PER_UNIT != 0)
2848	break;
2849      aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2850      core = build_fold_addr_expr (core);
2851      if (TREE_CODE (core) == ADDR_EXPR)
2852	aff_combination_add_elt (comb, core, 1);
2853      else
2854	{
2855	  tree_to_aff_combination (core, type, &tmp);
2856	  aff_combination_add (comb, &tmp);
2857	}
2858      if (toffset)
2859	{
2860	  tree_to_aff_combination (toffset, type, &tmp);
2861	  aff_combination_add (comb, &tmp);
2862	}
2863      return;
2864
2865    default:
2866      break;
2867    }
2868
2869  aff_combination_elt (comb, type, expr);
2870}
2871
2872/* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2873
2874static tree
2875add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2876		 unsigned HOST_WIDE_INT mask)
2877{
2878  enum tree_code code;
2879
2880  scale &= mask;
2881  elt = fold_convert (type, elt);
2882
2883  if (scale == 1)
2884    {
2885      if (!expr)
2886	return elt;
2887
2888      return fold_build2 (PLUS_EXPR, type, expr, elt);
2889    }
2890
2891  if (scale == mask)
2892    {
2893      if (!expr)
2894	return fold_build1 (NEGATE_EXPR, type, elt);
2895
2896      return fold_build2 (MINUS_EXPR, type, expr, elt);
2897    }
2898
2899  if (!expr)
2900    return fold_build2 (MULT_EXPR, type, elt,
2901			build_int_cst_type (type, scale));
2902
2903  if ((scale | (mask >> 1)) == mask)
2904    {
2905      /* Scale is negative.  */
2906      code = MINUS_EXPR;
2907      scale = (-scale) & mask;
2908    }
2909  else
2910    code = PLUS_EXPR;
2911
2912  elt = fold_build2 (MULT_EXPR, type, elt,
2913		     build_int_cst_type (type, scale));
2914  return fold_build2 (code, type, expr, elt);
2915}
2916
2917/* Copies the tree elements of COMB to ensure that they are not shared.  */
2918
2919static void
2920unshare_aff_combination (struct affine_tree_combination *comb)
2921{
2922  unsigned i;
2923
2924  for (i = 0; i < comb->n; i++)
2925    comb->elts[i] = unshare_expr (comb->elts[i]);
2926  if (comb->rest)
2927    comb->rest = unshare_expr (comb->rest);
2928}
2929
2930/* Makes tree from the affine combination COMB.  */
2931
2932static tree
2933aff_combination_to_tree (struct affine_tree_combination *comb)
2934{
2935  tree type = comb->type;
2936  tree expr = comb->rest;
2937  unsigned i;
2938  unsigned HOST_WIDE_INT off, sgn;
2939
2940  if (comb->n == 0 && comb->offset == 0)
2941    {
2942      if (expr)
2943	{
2944	  /* Handle the special case produced by get_computation_aff when
2945	     the type does not fit in HOST_WIDE_INT.  */
2946	  return fold_convert (type, expr);
2947	}
2948      else
2949	return build_int_cst (type, 0);
2950    }
2951
2952  gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2953
2954  for (i = 0; i < comb->n; i++)
2955    expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2956			    comb->mask);
2957
2958  if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2959    {
2960      /* Offset is negative.  */
2961      off = (-comb->offset) & comb->mask;
2962      sgn = comb->mask;
2963    }
2964  else
2965    {
2966      off = comb->offset;
2967      sgn = 1;
2968    }
2969  return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2970			  comb->mask);
2971}
2972
2973/* Folds EXPR using the affine expressions framework.  */
2974
2975static tree
2976fold_affine_expr (tree expr)
2977{
2978  tree type = TREE_TYPE (expr);
2979  struct affine_tree_combination comb;
2980
2981  if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2982    return expr;
2983
2984  tree_to_aff_combination (expr, type, &comb);
2985  return aff_combination_to_tree (&comb);
2986}
2987
2988/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2989   same precision that is at least as wide as the precision of TYPE, stores
2990   BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2991   type of A and B.  */
2992
2993static tree
2994determine_common_wider_type (tree *a, tree *b)
2995{
2996  tree wider_type = NULL;
2997  tree suba, subb;
2998  tree atype = TREE_TYPE (*a);
2999
3000  if ((TREE_CODE (*a) == NOP_EXPR
3001       || TREE_CODE (*a) == CONVERT_EXPR))
3002    {
3003      suba = TREE_OPERAND (*a, 0);
3004      wider_type = TREE_TYPE (suba);
3005      if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3006	return atype;
3007    }
3008  else
3009    return atype;
3010
3011  if ((TREE_CODE (*b) == NOP_EXPR
3012       || TREE_CODE (*b) == CONVERT_EXPR))
3013    {
3014      subb = TREE_OPERAND (*b, 0);
3015      if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3016	return atype;
3017    }
3018  else
3019    return atype;
3020
3021  *a = suba;
3022  *b = subb;
3023  return wider_type;
3024}
3025
3026/* Determines the expression by that USE is expressed from induction variable
3027   CAND at statement AT in LOOP.  The expression is stored in a decomposed
3028   form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3029
3030static bool
3031get_computation_aff (struct loop *loop,
3032		     struct iv_use *use, struct iv_cand *cand, tree at,
3033		     struct affine_tree_combination *aff)
3034{
3035  tree ubase = use->iv->base;
3036  tree ustep = use->iv->step;
3037  tree cbase = cand->iv->base;
3038  tree cstep = cand->iv->step;
3039  tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3040  tree common_type;
3041  tree uutype;
3042  tree expr, delta;
3043  tree ratio;
3044  unsigned HOST_WIDE_INT ustepi, cstepi;
3045  HOST_WIDE_INT ratioi;
3046  struct affine_tree_combination cbase_aff, expr_aff;
3047  tree cstep_orig = cstep, ustep_orig = ustep;
3048  double_int rat;
3049
3050  if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3051    {
3052      /* We do not have a precision to express the values of use.  */
3053      return false;
3054    }
3055
3056  expr = var_at_stmt (loop, cand, at);
3057
3058  if (TREE_TYPE (expr) != ctype)
3059    {
3060      /* This may happen with the original ivs.  */
3061      expr = fold_convert (ctype, expr);
3062    }
3063
3064  if (TYPE_UNSIGNED (utype))
3065    uutype = utype;
3066  else
3067    {
3068      uutype = unsigned_type_for (utype);
3069      ubase = fold_convert (uutype, ubase);
3070      ustep = fold_convert (uutype, ustep);
3071    }
3072
3073  if (uutype != ctype)
3074    {
3075      expr = fold_convert (uutype, expr);
3076      cbase = fold_convert (uutype, cbase);
3077      cstep = fold_convert (uutype, cstep);
3078
3079      /* If the conversion is not noop, we must take it into account when
3080	 considering the value of the step.  */
3081      if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3082	cstep_orig = cstep;
3083    }
3084
3085  if (cst_and_fits_in_hwi (cstep_orig)
3086      && cst_and_fits_in_hwi (ustep_orig))
3087    {
3088      ustepi = int_cst_value (ustep_orig);
3089      cstepi = int_cst_value (cstep_orig);
3090
3091      if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3092	{
3093	  /* TODO maybe consider case when ustep divides cstep and the ratio is
3094	     a power of 2 (so that the division is fast to execute)?  We would
3095	     need to be much more careful with overflows etc. then.  */
3096	  return false;
3097	}
3098
3099      ratio = build_int_cst_type (uutype, ratioi);
3100    }
3101  else
3102    {
3103      if (!constant_multiple_of (ustep_orig, cstep_orig, &rat))
3104	return false;
3105      ratio = double_int_to_tree (uutype, rat);
3106
3107      /* Ratioi is only used to detect special cases when the multiplicative
3108	 factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may
3109	 set it to 0.  */
3110      if (double_int_fits_in_shwi_p (rat))
3111	ratioi = double_int_to_shwi (rat);
3112      else
3113	ratioi = 0;
3114    }
3115
3116  /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3117     type, we achieve better folding by computing their difference in this
3118     wider type, and cast the result to UUTYPE.  We do not need to worry about
3119     overflows, as all the arithmetics will in the end be performed in UUTYPE
3120     anyway.  */
3121  common_type = determine_common_wider_type (&ubase, &cbase);
3122
3123  /* We may need to shift the value if we are after the increment.  */
3124  if (stmt_after_increment (loop, cand, at))
3125    {
3126      if (uutype != common_type)
3127	cstep = fold_convert (common_type, cstep);
3128      cbase = fold_build2 (PLUS_EXPR, common_type, cbase, cstep);
3129    }
3130
3131  /* use = ubase - ratio * cbase + ratio * var.
3132
3133     In general case ubase + ratio * (var - cbase) could be better (one less
3134     multiplication), but often it is possible to eliminate redundant parts
3135     of computations from (ubase - ratio * cbase) term, and if it does not
3136     happen, fold is able to apply the distributive law to obtain this form
3137     anyway.  */
3138
3139  if (TYPE_PRECISION (common_type) > HOST_BITS_PER_WIDE_INT)
3140    {
3141      /* Let's compute in trees and just return the result in AFF.  This case
3142	 should not be very common, and fold itself is not that bad either,
3143	 so making the aff. functions more complicated to handle this case
3144	 is not that urgent.  */
3145      if (ratioi == 1)
3146	{
3147	  delta = fold_build2 (MINUS_EXPR, common_type, ubase, cbase);
3148	  if (uutype != common_type)
3149	    delta = fold_convert (uutype, delta);
3150	  expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3151	}
3152      else if (ratioi == -1)
3153	{
3154	  delta = fold_build2 (PLUS_EXPR, common_type, ubase, cbase);
3155	  if (uutype != common_type)
3156	    delta = fold_convert (uutype, delta);
3157	  expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3158	}
3159      else
3160	{
3161	  delta = fold_build2 (MULT_EXPR, common_type, cbase, ratio);
3162	  delta = fold_build2 (MINUS_EXPR, common_type, ubase, delta);
3163	  if (uutype != common_type)
3164	    delta = fold_convert (uutype, delta);
3165	  expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3166	  expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3167	}
3168
3169      aff->type = uutype;
3170      aff->n = 0;
3171      aff->offset = 0;
3172      aff->mask = 0;
3173      aff->rest = expr;
3174      return true;
3175    }
3176
3177  /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3178     possible to compute ratioi.  */
3179  gcc_assert (ratioi);
3180
3181  tree_to_aff_combination (ubase, common_type, aff);
3182  tree_to_aff_combination (cbase, common_type, &cbase_aff);
3183  tree_to_aff_combination (expr, uutype, &expr_aff);
3184  aff_combination_scale (&cbase_aff, -ratioi);
3185  aff_combination_scale (&expr_aff, ratioi);
3186  aff_combination_add (aff, &cbase_aff);
3187  if (common_type != uutype)
3188    aff_combination_convert (uutype, aff);
3189  aff_combination_add (aff, &expr_aff);
3190
3191  return true;
3192}
3193
3194/* Determines the expression by that USE is expressed from induction variable
3195   CAND at statement AT in LOOP.  The computation is unshared.  */
3196
3197static tree
3198get_computation_at (struct loop *loop,
3199		    struct iv_use *use, struct iv_cand *cand, tree at)
3200{
3201  struct affine_tree_combination aff;
3202  tree type = TREE_TYPE (use->iv->base);
3203
3204  if (!get_computation_aff (loop, use, cand, at, &aff))
3205    return NULL_TREE;
3206  unshare_aff_combination (&aff);
3207  return fold_convert (type, aff_combination_to_tree (&aff));
3208}
3209
3210/* Determines the expression by that USE is expressed from induction variable
3211   CAND in LOOP.  The computation is unshared.  */
3212
3213static tree
3214get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3215{
3216  return get_computation_at (loop, use, cand, use->stmt);
3217}
3218
3219/* Returns cost of addition in MODE.  */
3220
3221static unsigned
3222add_cost (enum machine_mode mode)
3223{
3224  static unsigned costs[NUM_MACHINE_MODES];
3225  rtx seq;
3226  unsigned cost;
3227
3228  if (costs[mode])
3229    return costs[mode];
3230
3231  start_sequence ();
3232  force_operand (gen_rtx_fmt_ee (PLUS, mode,
3233				 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3234				 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3235		 NULL_RTX);
3236  seq = get_insns ();
3237  end_sequence ();
3238
3239  cost = seq_cost (seq);
3240  if (!cost)
3241    cost = 1;
3242
3243  costs[mode] = cost;
3244
3245  if (dump_file && (dump_flags & TDF_DETAILS))
3246    fprintf (dump_file, "Addition in %s costs %d\n",
3247	     GET_MODE_NAME (mode), cost);
3248  return cost;
3249}
3250
3251/* Entry in a hashtable of already known costs for multiplication.  */
3252struct mbc_entry
3253{
3254  HOST_WIDE_INT cst;		/* The constant to multiply by.  */
3255  enum machine_mode mode;	/* In mode.  */
3256  unsigned cost;		/* The cost.  */
3257};
3258
3259/* Counts hash value for the ENTRY.  */
3260
3261static hashval_t
3262mbc_entry_hash (const void *entry)
3263{
3264  const struct mbc_entry *e = entry;
3265
3266  return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3267}
3268
3269/* Compares the hash table entries ENTRY1 and ENTRY2.  */
3270
3271static int
3272mbc_entry_eq (const void *entry1, const void *entry2)
3273{
3274  const struct mbc_entry *e1 = entry1;
3275  const struct mbc_entry *e2 = entry2;
3276
3277  return (e1->mode == e2->mode
3278	  && e1->cst == e2->cst);
3279}
3280
3281/* Returns cost of multiplication by constant CST in MODE.  */
3282
3283unsigned
3284multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3285{
3286  static htab_t costs;
3287  struct mbc_entry **cached, act;
3288  rtx seq;
3289  unsigned cost;
3290
3291  if (!costs)
3292    costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3293
3294  act.mode = mode;
3295  act.cst = cst;
3296  cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3297  if (*cached)
3298    return (*cached)->cost;
3299
3300  *cached = XNEW (struct mbc_entry);
3301  (*cached)->mode = mode;
3302  (*cached)->cst = cst;
3303
3304  start_sequence ();
3305  expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3306	       gen_int_mode (cst, mode), NULL_RTX, 0);
3307  seq = get_insns ();
3308  end_sequence ();
3309
3310  cost = seq_cost (seq);
3311
3312  if (dump_file && (dump_flags & TDF_DETAILS))
3313    fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3314	     (int) cst, GET_MODE_NAME (mode), cost);
3315
3316  (*cached)->cost = cost;
3317
3318  return cost;
3319}
3320
3321/* Returns true if multiplying by RATIO is allowed in address.  */
3322
3323bool
3324multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3325{
3326#define MAX_RATIO 128
3327  static sbitmap valid_mult;
3328
3329  if (!valid_mult)
3330    {
3331      rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3332      rtx addr;
3333      HOST_WIDE_INT i;
3334
3335      valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3336      sbitmap_zero (valid_mult);
3337      addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3338      for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3339	{
3340	  XEXP (addr, 1) = gen_int_mode (i, Pmode);
3341	  if (memory_address_p (Pmode, addr))
3342	    SET_BIT (valid_mult, i + MAX_RATIO);
3343	}
3344
3345      if (dump_file && (dump_flags & TDF_DETAILS))
3346	{
3347	  fprintf (dump_file, "  allowed multipliers:");
3348	  for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3349	    if (TEST_BIT (valid_mult, i + MAX_RATIO))
3350	      fprintf (dump_file, " %d", (int) i);
3351	  fprintf (dump_file, "\n");
3352	  fprintf (dump_file, "\n");
3353	}
3354    }
3355
3356  if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3357    return false;
3358
3359  return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3360}
3361
3362/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3363   If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3364   variable is omitted.  The created memory accesses MODE.
3365
3366   TODO -- there must be some better way.  This all is quite crude.  */
3367
3368static unsigned
3369get_address_cost (bool symbol_present, bool var_present,
3370		  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3371{
3372  static bool initialized = false;
3373  static HOST_WIDE_INT rat, off;
3374  static HOST_WIDE_INT min_offset, max_offset;
3375  static unsigned costs[2][2][2][2];
3376  unsigned cost, acost;
3377  bool offset_p, ratio_p;
3378  HOST_WIDE_INT s_offset;
3379  unsigned HOST_WIDE_INT mask;
3380  unsigned bits;
3381
3382  if (!initialized)
3383    {
3384      HOST_WIDE_INT i;
3385      int old_cse_not_expected;
3386      unsigned sym_p, var_p, off_p, rat_p, add_c;
3387      rtx seq, addr, base;
3388      rtx reg0, reg1;
3389
3390      initialized = true;
3391
3392      reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3393
3394      addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3395      for (i = 1; i <= 1 << 20; i <<= 1)
3396	{
3397	  XEXP (addr, 1) = gen_int_mode (i, Pmode);
3398	  if (!memory_address_p (Pmode, addr))
3399	    break;
3400	}
3401      max_offset = i >> 1;
3402      off = max_offset;
3403
3404      for (i = 1; i <= 1 << 20; i <<= 1)
3405	{
3406	  XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3407	  if (!memory_address_p (Pmode, addr))
3408	    break;
3409	}
3410      min_offset = -(i >> 1);
3411
3412      if (dump_file && (dump_flags & TDF_DETAILS))
3413	{
3414	  fprintf (dump_file, "get_address_cost:\n");
3415	  fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3416	  fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3417	}
3418
3419      rat = 1;
3420      for (i = 2; i <= MAX_RATIO; i++)
3421	if (multiplier_allowed_in_address_p (i))
3422	  {
3423	    rat = i;
3424	    break;
3425	  }
3426
3427      /* Compute the cost of various addressing modes.  */
3428      acost = 0;
3429      reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3430      reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3431
3432      for (i = 0; i < 16; i++)
3433	{
3434	  sym_p = i & 1;
3435	  var_p = (i >> 1) & 1;
3436	  off_p = (i >> 2) & 1;
3437	  rat_p = (i >> 3) & 1;
3438
3439	  addr = reg0;
3440	  if (rat_p)
3441	    addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3442
3443	  if (var_p)
3444	    addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3445
3446	  if (sym_p)
3447	    {
3448	      base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3449	      if (off_p)
3450		base = gen_rtx_fmt_e (CONST, Pmode,
3451				      gen_rtx_fmt_ee (PLUS, Pmode,
3452						      base,
3453						      gen_int_mode (off, Pmode)));
3454	    }
3455	  else if (off_p)
3456	    base = gen_int_mode (off, Pmode);
3457	  else
3458	    base = NULL_RTX;
3459
3460	  if (base)
3461	    addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3462
3463	  start_sequence ();
3464	  /* To avoid splitting addressing modes, pretend that no cse will
3465	     follow.  */
3466	  old_cse_not_expected = cse_not_expected;
3467	  cse_not_expected = true;
3468	  addr = memory_address (Pmode, addr);
3469	  cse_not_expected = old_cse_not_expected;
3470	  seq = get_insns ();
3471	  end_sequence ();
3472
3473	  acost = seq_cost (seq);
3474	  acost += address_cost (addr, Pmode);
3475
3476	  if (!acost)
3477	    acost = 1;
3478	  costs[sym_p][var_p][off_p][rat_p] = acost;
3479	}
3480
3481      /* On some targets, it is quite expensive to load symbol to a register,
3482	 which makes addresses that contain symbols look much more expensive.
3483	 However, the symbol will have to be loaded in any case before the
3484	 loop (and quite likely we have it in register already), so it does not
3485	 make much sense to penalize them too heavily.  So make some final
3486         tweaks for the SYMBOL_PRESENT modes:
3487
3488         If VAR_PRESENT is false, and the mode obtained by changing symbol to
3489	 var is cheaper, use this mode with small penalty.
3490	 If VAR_PRESENT is true, try whether the mode with
3491	 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3492	 if this is the case, use it.  */
3493      add_c = add_cost (Pmode);
3494      for (i = 0; i < 8; i++)
3495	{
3496	  var_p = i & 1;
3497	  off_p = (i >> 1) & 1;
3498	  rat_p = (i >> 2) & 1;
3499
3500	  acost = costs[0][1][off_p][rat_p] + 1;
3501	  if (var_p)
3502	    acost += add_c;
3503
3504	  if (acost < costs[1][var_p][off_p][rat_p])
3505	    costs[1][var_p][off_p][rat_p] = acost;
3506	}
3507
3508      if (dump_file && (dump_flags & TDF_DETAILS))
3509	{
3510	  fprintf (dump_file, "Address costs:\n");
3511
3512	  for (i = 0; i < 16; i++)
3513	    {
3514	      sym_p = i & 1;
3515	      var_p = (i >> 1) & 1;
3516	      off_p = (i >> 2) & 1;
3517	      rat_p = (i >> 3) & 1;
3518
3519	      fprintf (dump_file, "  ");
3520	      if (sym_p)
3521		fprintf (dump_file, "sym + ");
3522	      if (var_p)
3523		fprintf (dump_file, "var + ");
3524	      if (off_p)
3525		fprintf (dump_file, "cst + ");
3526	      if (rat_p)
3527		fprintf (dump_file, "rat * ");
3528
3529	      acost = costs[sym_p][var_p][off_p][rat_p];
3530	      fprintf (dump_file, "index costs %d\n", acost);
3531	    }
3532	  fprintf (dump_file, "\n");
3533	}
3534    }
3535
3536  bits = GET_MODE_BITSIZE (Pmode);
3537  mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3538  offset &= mask;
3539  if ((offset >> (bits - 1) & 1))
3540    offset |= ~mask;
3541  s_offset = offset;
3542
3543  cost = 0;
3544  offset_p = (s_offset != 0
3545	      && min_offset <= s_offset && s_offset <= max_offset);
3546  ratio_p = (ratio != 1
3547	     && multiplier_allowed_in_address_p (ratio));
3548
3549  if (ratio != 1 && !ratio_p)
3550    cost += multiply_by_cost (ratio, Pmode);
3551
3552  if (s_offset && !offset_p && !symbol_present)
3553    {
3554      cost += add_cost (Pmode);
3555      var_present = true;
3556    }
3557
3558  acost = costs[symbol_present][var_present][offset_p][ratio_p];
3559  return cost + acost;
3560}
3561
3562/* Estimates cost of forcing expression EXPR into a variable.  */
3563
3564unsigned
3565force_expr_to_var_cost (tree expr)
3566{
3567  static bool costs_initialized = false;
3568  static unsigned integer_cost;
3569  static unsigned symbol_cost;
3570  static unsigned address_cost;
3571  tree op0, op1;
3572  unsigned cost0, cost1, cost;
3573  enum machine_mode mode;
3574
3575  if (!costs_initialized)
3576    {
3577      tree var = create_tmp_var_raw (integer_type_node, "test_var");
3578      rtx x = gen_rtx_MEM (DECL_MODE (var),
3579			   gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3580      tree addr;
3581      tree type = build_pointer_type (integer_type_node);
3582
3583      integer_cost = computation_cost (build_int_cst (integer_type_node,
3584						      2000));
3585
3586      SET_DECL_RTL (var, x);
3587      TREE_STATIC (var) = 1;
3588      addr = build1 (ADDR_EXPR, type, var);
3589      symbol_cost = computation_cost (addr) + 1;
3590
3591      address_cost
3592	= computation_cost (build2 (PLUS_EXPR, type,
3593				    addr,
3594				    build_int_cst (type, 2000))) + 1;
3595      if (dump_file && (dump_flags & TDF_DETAILS))
3596	{
3597	  fprintf (dump_file, "force_expr_to_var_cost:\n");
3598	  fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3599	  fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3600	  fprintf (dump_file, "  address %d\n", (int) address_cost);
3601	  fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3602	  fprintf (dump_file, "\n");
3603	}
3604
3605      costs_initialized = true;
3606    }
3607
3608  STRIP_NOPS (expr);
3609
3610  if (SSA_VAR_P (expr))
3611    return 0;
3612
3613  if (TREE_INVARIANT (expr))
3614    {
3615      if (TREE_CODE (expr) == INTEGER_CST)
3616	return integer_cost;
3617
3618      if (TREE_CODE (expr) == ADDR_EXPR)
3619	{
3620	  tree obj = TREE_OPERAND (expr, 0);
3621
3622	  if (TREE_CODE (obj) == VAR_DECL
3623	      || TREE_CODE (obj) == PARM_DECL
3624	      || TREE_CODE (obj) == RESULT_DECL)
3625	    return symbol_cost;
3626	}
3627
3628      return address_cost;
3629    }
3630
3631  switch (TREE_CODE (expr))
3632    {
3633    case PLUS_EXPR:
3634    case MINUS_EXPR:
3635    case MULT_EXPR:
3636      op0 = TREE_OPERAND (expr, 0);
3637      op1 = TREE_OPERAND (expr, 1);
3638      STRIP_NOPS (op0);
3639      STRIP_NOPS (op1);
3640
3641      if (is_gimple_val (op0))
3642	cost0 = 0;
3643      else
3644	cost0 = force_expr_to_var_cost (op0);
3645
3646      if (is_gimple_val (op1))
3647	cost1 = 0;
3648      else
3649	cost1 = force_expr_to_var_cost (op1);
3650
3651      break;
3652
3653    default:
3654      /* Just an arbitrary value, FIXME.  */
3655      return target_spill_cost;
3656    }
3657
3658  mode = TYPE_MODE (TREE_TYPE (expr));
3659  switch (TREE_CODE (expr))
3660    {
3661    case PLUS_EXPR:
3662    case MINUS_EXPR:
3663      cost = add_cost (mode);
3664      break;
3665
3666    case MULT_EXPR:
3667      if (cst_and_fits_in_hwi (op0))
3668	cost = multiply_by_cost (int_cst_value (op0), mode);
3669      else if (cst_and_fits_in_hwi (op1))
3670	cost = multiply_by_cost (int_cst_value (op1), mode);
3671      else
3672	return target_spill_cost;
3673      break;
3674
3675    default:
3676      gcc_unreachable ();
3677    }
3678
3679  cost += cost0;
3680  cost += cost1;
3681
3682  /* Bound the cost by target_spill_cost.  The parts of complicated
3683     computations often are either loop invariant or at least can
3684     be shared between several iv uses, so letting this grow without
3685     limits would not give reasonable results.  */
3686  return cost < target_spill_cost ? cost : target_spill_cost;
3687}
3688
3689/* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3690   invariants the computation depends on.  */
3691
3692static unsigned
3693force_var_cost (struct ivopts_data *data,
3694		tree expr, bitmap *depends_on)
3695{
3696  if (depends_on)
3697    {
3698      fd_ivopts_data = data;
3699      walk_tree (&expr, find_depends, depends_on, NULL);
3700    }
3701
3702  return force_expr_to_var_cost (expr);
3703}
3704
3705/* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3706   value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3707   to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3708   invariants the computation depends on.  */
3709
3710static unsigned
3711split_address_cost (struct ivopts_data *data,
3712		    tree addr, bool *symbol_present, bool *var_present,
3713		    unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3714{
3715  tree core;
3716  HOST_WIDE_INT bitsize;
3717  HOST_WIDE_INT bitpos;
3718  tree toffset;
3719  enum machine_mode mode;
3720  int unsignedp, volatilep;
3721
3722  core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3723			      &unsignedp, &volatilep, false);
3724
3725  if (toffset != 0
3726      || bitpos % BITS_PER_UNIT != 0
3727      || TREE_CODE (core) != VAR_DECL)
3728    {
3729      *symbol_present = false;
3730      *var_present = true;
3731      fd_ivopts_data = data;
3732      walk_tree (&addr, find_depends, depends_on, NULL);
3733      return target_spill_cost;
3734    }
3735
3736  *offset += bitpos / BITS_PER_UNIT;
3737  if (TREE_STATIC (core)
3738      || DECL_EXTERNAL (core))
3739    {
3740      *symbol_present = true;
3741      *var_present = false;
3742      return 0;
3743    }
3744
3745  *symbol_present = false;
3746  *var_present = true;
3747  return 0;
3748}
3749
3750/* Estimates cost of expressing difference of addresses E1 - E2 as
3751   var + symbol + offset.  The value of offset is added to OFFSET,
3752   SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3753   part is missing.  DEPENDS_ON is a set of the invariants the computation
3754   depends on.  */
3755
3756static unsigned
3757ptr_difference_cost (struct ivopts_data *data,
3758		     tree e1, tree e2, bool *symbol_present, bool *var_present,
3759		     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3760{
3761  HOST_WIDE_INT diff = 0;
3762  unsigned cost;
3763
3764  gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3765
3766  if (ptr_difference_const (e1, e2, &diff))
3767    {
3768      *offset += diff;
3769      *symbol_present = false;
3770      *var_present = false;
3771      return 0;
3772    }
3773
3774  if (e2 == integer_zero_node)
3775    return split_address_cost (data, TREE_OPERAND (e1, 0),
3776			       symbol_present, var_present, offset, depends_on);
3777
3778  *symbol_present = false;
3779  *var_present = true;
3780
3781  cost = force_var_cost (data, e1, depends_on);
3782  cost += force_var_cost (data, e2, depends_on);
3783  cost += add_cost (Pmode);
3784
3785  return cost;
3786}
3787
3788/* Estimates cost of expressing difference E1 - E2 as
3789   var + symbol + offset.  The value of offset is added to OFFSET,
3790   SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3791   part is missing.  DEPENDS_ON is a set of the invariants the computation
3792   depends on.  */
3793
3794static unsigned
3795difference_cost (struct ivopts_data *data,
3796		 tree e1, tree e2, bool *symbol_present, bool *var_present,
3797		 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3798{
3799  unsigned cost;
3800  enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3801  unsigned HOST_WIDE_INT off1, off2;
3802
3803  e1 = strip_offset (e1, &off1);
3804  e2 = strip_offset (e2, &off2);
3805  *offset += off1 - off2;
3806
3807  STRIP_NOPS (e1);
3808  STRIP_NOPS (e2);
3809
3810  if (TREE_CODE (e1) == ADDR_EXPR)
3811    return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3812				depends_on);
3813  *symbol_present = false;
3814
3815  if (operand_equal_p (e1, e2, 0))
3816    {
3817      *var_present = false;
3818      return 0;
3819    }
3820  *var_present = true;
3821  if (zero_p (e2))
3822    return force_var_cost (data, e1, depends_on);
3823
3824  if (zero_p (e1))
3825    {
3826      cost = force_var_cost (data, e2, depends_on);
3827      cost += multiply_by_cost (-1, mode);
3828
3829      return cost;
3830    }
3831
3832  cost = force_var_cost (data, e1, depends_on);
3833  cost += force_var_cost (data, e2, depends_on);
3834  cost += add_cost (mode);
3835
3836  return cost;
3837}
3838
3839/* Determines the cost of the computation by that USE is expressed
3840   from induction variable CAND.  If ADDRESS_P is true, we just need
3841   to create an address from it, otherwise we want to get it into
3842   register.  A set of invariants we depend on is stored in
3843   DEPENDS_ON.  AT is the statement at that the value is computed.  */
3844
3845static unsigned
3846get_computation_cost_at (struct ivopts_data *data,
3847			 struct iv_use *use, struct iv_cand *cand,
3848			 bool address_p, bitmap *depends_on, tree at)
3849{
3850  tree ubase = use->iv->base, ustep = use->iv->step;
3851  tree cbase, cstep;
3852  tree utype = TREE_TYPE (ubase), ctype;
3853  unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3854  HOST_WIDE_INT ratio, aratio;
3855  bool var_present, symbol_present;
3856  unsigned cost = 0, n_sums;
3857
3858  *depends_on = NULL;
3859
3860  /* Only consider real candidates.  */
3861  if (!cand->iv)
3862    return INFTY;
3863
3864  cbase = cand->iv->base;
3865  cstep = cand->iv->step;
3866  ctype = TREE_TYPE (cbase);
3867
3868  if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3869    {
3870      /* We do not have a precision to express the values of use.  */
3871      return INFTY;
3872    }
3873
3874  if (address_p)
3875    {
3876      /* Do not try to express address of an object with computation based
3877	 on address of a different object.  This may cause problems in rtl
3878	 level alias analysis (that does not expect this to be happening,
3879	 as this is illegal in C), and would be unlikely to be useful
3880	 anyway.  */
3881      if (use->iv->base_object
3882	  && cand->iv->base_object
3883	  && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3884	return INFTY;
3885    }
3886
3887  if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3888    {
3889      /* TODO -- add direct handling of this case.  */
3890      goto fallback;
3891    }
3892
3893  /* CSTEPI is removed from the offset in case statement is after the
3894     increment.  If the step is not constant, we use zero instead.
3895     This is a bit imprecise (there is the extra addition), but
3896     redundancy elimination is likely to transform the code so that
3897     it uses value of the variable before increment anyway,
3898     so it is not that much unrealistic.  */
3899  if (cst_and_fits_in_hwi (cstep))
3900    cstepi = int_cst_value (cstep);
3901  else
3902    cstepi = 0;
3903
3904  if (cst_and_fits_in_hwi (ustep)
3905      && cst_and_fits_in_hwi (cstep))
3906    {
3907      ustepi = int_cst_value (ustep);
3908
3909      if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3910	return INFTY;
3911    }
3912  else
3913    {
3914      double_int rat;
3915
3916      if (!constant_multiple_of (ustep, cstep, &rat))
3917	return INFTY;
3918
3919      if (double_int_fits_in_shwi_p (rat))
3920	ratio = double_int_to_shwi (rat);
3921      else
3922	return INFTY;
3923    }
3924
3925  /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3926     or ratio == 1, it is better to handle this like
3927
3928     ubase - ratio * cbase + ratio * var
3929
3930     (also holds in the case ratio == -1, TODO.  */
3931
3932  if (cst_and_fits_in_hwi (cbase))
3933    {
3934      offset = - ratio * int_cst_value (cbase);
3935      cost += difference_cost (data,
3936			       ubase, integer_zero_node,
3937			       &symbol_present, &var_present, &offset,
3938			       depends_on);
3939    }
3940  else if (ratio == 1)
3941    {
3942      cost += difference_cost (data,
3943			       ubase, cbase,
3944			       &symbol_present, &var_present, &offset,
3945			       depends_on);
3946    }
3947  else
3948    {
3949      cost += force_var_cost (data, cbase, depends_on);
3950      cost += add_cost (TYPE_MODE (ctype));
3951      cost += difference_cost (data,
3952			       ubase, integer_zero_node,
3953			       &symbol_present, &var_present, &offset,
3954			       depends_on);
3955    }
3956
3957  /* If we are after the increment, the value of the candidate is higher by
3958     one iteration.  */
3959  if (stmt_after_increment (data->current_loop, cand, at))
3960    offset -= ratio * cstepi;
3961
3962  /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3963     (symbol/var/const parts may be omitted).  If we are looking for an address,
3964     find the cost of addressing this.  */
3965  if (address_p)
3966    return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3967
3968  /* Otherwise estimate the costs for computing the expression.  */
3969  aratio = ratio > 0 ? ratio : -ratio;
3970  if (!symbol_present && !var_present && !offset)
3971    {
3972      if (ratio != 1)
3973	cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3974
3975      return cost;
3976    }
3977
3978  if (aratio != 1)
3979    cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3980
3981  n_sums = 1;
3982  if (var_present
3983      /* Symbol + offset should be compile-time computable.  */
3984      && (symbol_present || offset))
3985    n_sums++;
3986
3987  return cost + n_sums * add_cost (TYPE_MODE (ctype));
3988
3989fallback:
3990  {
3991    /* Just get the expression, expand it and measure the cost.  */
3992    tree comp = get_computation_at (data->current_loop, use, cand, at);
3993
3994    if (!comp)
3995      return INFTY;
3996
3997    if (address_p)
3998      comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3999
4000    return computation_cost (comp);
4001  }
4002}
4003
4004/* Determines the cost of the computation by that USE is expressed
4005   from induction variable CAND.  If ADDRESS_P is true, we just need
4006   to create an address from it, otherwise we want to get it into
4007   register.  A set of invariants we depend on is stored in
4008   DEPENDS_ON.  */
4009
4010static unsigned
4011get_computation_cost (struct ivopts_data *data,
4012		      struct iv_use *use, struct iv_cand *cand,
4013		      bool address_p, bitmap *depends_on)
4014{
4015  return get_computation_cost_at (data,
4016				  use, cand, address_p, depends_on, use->stmt);
4017}
4018
4019/* Determines cost of basing replacement of USE on CAND in a generic
4020   expression.  */
4021
4022static bool
4023determine_use_iv_cost_generic (struct ivopts_data *data,
4024			       struct iv_use *use, struct iv_cand *cand)
4025{
4026  bitmap depends_on;
4027  unsigned cost;
4028
4029  /* The simple case first -- if we need to express value of the preserved
4030     original biv, the cost is 0.  This also prevents us from counting the
4031     cost of increment twice -- once at this use and once in the cost of
4032     the candidate.  */
4033  if (cand->pos == IP_ORIGINAL
4034      && cand->incremented_at == use->stmt)
4035    {
4036      set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4037      return true;
4038    }
4039
4040  cost = get_computation_cost (data, use, cand, false, &depends_on);
4041  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4042
4043  return cost != INFTY;
4044}
4045
4046/* Determines cost of basing replacement of USE on CAND in an address.  */
4047
4048static bool
4049determine_use_iv_cost_address (struct ivopts_data *data,
4050			       struct iv_use *use, struct iv_cand *cand)
4051{
4052  bitmap depends_on;
4053  unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
4054
4055  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4056
4057  return cost != INFTY;
4058}
4059
4060/* Computes value of induction variable IV in iteration NITER.  */
4061
4062static tree
4063iv_value (struct iv *iv, tree niter)
4064{
4065  tree val;
4066  tree type = TREE_TYPE (iv->base);
4067
4068  niter = fold_convert (type, niter);
4069  val = fold_build2 (MULT_EXPR, type, iv->step, niter);
4070
4071  return fold_build2 (PLUS_EXPR, type, iv->base, val);
4072}
4073
4074/* Computes value of candidate CAND at position AT in iteration NITER.  */
4075
4076static tree
4077cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
4078{
4079  tree val = iv_value (cand->iv, niter);
4080  tree type = TREE_TYPE (cand->iv->base);
4081
4082  if (stmt_after_increment (loop, cand, at))
4083    val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
4084
4085  return val;
4086}
4087
4088/* Returns period of induction variable iv.  */
4089
4090static tree
4091iv_period (struct iv *iv)
4092{
4093  tree step = iv->step, period, type;
4094  tree pow2div;
4095
4096  gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4097
4098  /* Period of the iv is gcd (step, type range).  Since type range is power
4099     of two, it suffices to determine the maximum power of two that divides
4100     step.  */
4101  pow2div = num_ending_zeros (step);
4102  type = unsigned_type_for (TREE_TYPE (step));
4103
4104  period = build_low_bits_mask (type,
4105				(TYPE_PRECISION (type)
4106				 - tree_low_cst (pow2div, 1)));
4107
4108  return period;
4109}
4110
4111/* Returns the comparison operator used when eliminating the iv USE.  */
4112
4113static enum tree_code
4114iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4115{
4116  struct loop *loop = data->current_loop;
4117  basic_block ex_bb;
4118  edge exit;
4119
4120  ex_bb = bb_for_stmt (use->stmt);
4121  exit = EDGE_SUCC (ex_bb, 0);
4122  if (flow_bb_inside_loop_p (loop, exit->dest))
4123    exit = EDGE_SUCC (ex_bb, 1);
4124
4125  return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4126}
4127
4128/* Check whether it is possible to express the condition in USE by comparison
4129   of candidate CAND.  If so, store the value compared with to BOUND.  */
4130
4131static bool
4132may_eliminate_iv (struct ivopts_data *data,
4133		  struct iv_use *use, struct iv_cand *cand, tree *bound)
4134{
4135  basic_block ex_bb;
4136  edge exit;
4137  tree nit, nit_type;
4138  tree wider_type, period, per_type;
4139  struct loop *loop = data->current_loop;
4140
4141  if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4142    return false;
4143
4144  /* For now works only for exits that dominate the loop latch.  TODO -- extend
4145     for other conditions inside loop body.  */
4146  ex_bb = bb_for_stmt (use->stmt);
4147  if (use->stmt != last_stmt (ex_bb)
4148      || TREE_CODE (use->stmt) != COND_EXPR)
4149    return false;
4150  if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4151    return false;
4152
4153  exit = EDGE_SUCC (ex_bb, 0);
4154  if (flow_bb_inside_loop_p (loop, exit->dest))
4155    exit = EDGE_SUCC (ex_bb, 1);
4156  if (flow_bb_inside_loop_p (loop, exit->dest))
4157    return false;
4158
4159  nit = niter_for_exit (data, exit);
4160  if (!nit)
4161    return false;
4162
4163  nit_type = TREE_TYPE (nit);
4164
4165  /* Determine whether we may use the variable to test whether niter iterations
4166     elapsed.  This is the case iff the period of the induction variable is
4167     greater than the number of iterations.  */
4168  period = iv_period (cand->iv);
4169  if (!period)
4170    return false;
4171  per_type = TREE_TYPE (period);
4172
4173  wider_type = TREE_TYPE (period);
4174  if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4175    wider_type = per_type;
4176  else
4177    wider_type = nit_type;
4178
4179  if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4180				      fold_convert (wider_type, period),
4181				      fold_convert (wider_type, nit))))
4182    return false;
4183
4184  *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
4185  return true;
4186}
4187
4188/* Determines cost of basing replacement of USE on CAND in a condition.  */
4189
4190static bool
4191determine_use_iv_cost_condition (struct ivopts_data *data,
4192				 struct iv_use *use, struct iv_cand *cand)
4193{
4194  tree bound = NULL_TREE, op, cond;
4195  bitmap depends_on = NULL;
4196  unsigned cost;
4197
4198  /* Only consider real candidates.  */
4199  if (!cand->iv)
4200    {
4201      set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4202      return false;
4203    }
4204
4205  if (may_eliminate_iv (data, use, cand, &bound))
4206    {
4207      cost = force_var_cost (data, bound, &depends_on);
4208
4209      set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4210      return cost != INFTY;
4211    }
4212
4213  /* The induction variable elimination failed; just express the original
4214     giv.  If it is compared with an invariant, note that we cannot get
4215     rid of it.  */
4216  cost = get_computation_cost (data, use, cand, false, &depends_on);
4217
4218  cond = *use->op_p;
4219  if (TREE_CODE (cond) != SSA_NAME)
4220    {
4221      op = TREE_OPERAND (cond, 0);
4222      if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4223	op = TREE_OPERAND (cond, 1);
4224      if (TREE_CODE (op) == SSA_NAME)
4225	{
4226	  op = get_iv (data, op)->base;
4227	  fd_ivopts_data = data;
4228	  walk_tree (&op, find_depends, &depends_on, NULL);
4229	}
4230    }
4231
4232  set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4233  return cost != INFTY;
4234}
4235
4236/* Determines cost of basing replacement of USE on CAND.  Returns false
4237   if USE cannot be based on CAND.  */
4238
4239static bool
4240determine_use_iv_cost (struct ivopts_data *data,
4241		       struct iv_use *use, struct iv_cand *cand)
4242{
4243  switch (use->type)
4244    {
4245    case USE_NONLINEAR_EXPR:
4246      return determine_use_iv_cost_generic (data, use, cand);
4247
4248    case USE_ADDRESS:
4249      return determine_use_iv_cost_address (data, use, cand);
4250
4251    case USE_COMPARE:
4252      return determine_use_iv_cost_condition (data, use, cand);
4253
4254    default:
4255      gcc_unreachable ();
4256    }
4257}
4258
4259/* Determines costs of basing the use of the iv on an iv candidate.  */
4260
4261static void
4262determine_use_iv_costs (struct ivopts_data *data)
4263{
4264  unsigned i, j;
4265  struct iv_use *use;
4266  struct iv_cand *cand;
4267  bitmap to_clear = BITMAP_ALLOC (NULL);
4268
4269  alloc_use_cost_map (data);
4270
4271  for (i = 0; i < n_iv_uses (data); i++)
4272    {
4273      use = iv_use (data, i);
4274
4275      if (data->consider_all_candidates)
4276	{
4277	  for (j = 0; j < n_iv_cands (data); j++)
4278	    {
4279	      cand = iv_cand (data, j);
4280	      determine_use_iv_cost (data, use, cand);
4281	    }
4282	}
4283      else
4284	{
4285	  bitmap_iterator bi;
4286
4287	  EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4288	    {
4289	      cand = iv_cand (data, j);
4290	      if (!determine_use_iv_cost (data, use, cand))
4291		bitmap_set_bit (to_clear, j);
4292	    }
4293
4294	  /* Remove the candidates for that the cost is infinite from
4295	     the list of related candidates.  */
4296	  bitmap_and_compl_into (use->related_cands, to_clear);
4297	  bitmap_clear (to_clear);
4298	}
4299    }
4300
4301  BITMAP_FREE (to_clear);
4302
4303  if (dump_file && (dump_flags & TDF_DETAILS))
4304    {
4305      fprintf (dump_file, "Use-candidate costs:\n");
4306
4307      for (i = 0; i < n_iv_uses (data); i++)
4308	{
4309	  use = iv_use (data, i);
4310
4311	  fprintf (dump_file, "Use %d:\n", i);
4312	  fprintf (dump_file, "  cand\tcost\tdepends on\n");
4313	  for (j = 0; j < use->n_map_members; j++)
4314	    {
4315	      if (!use->cost_map[j].cand
4316		  || use->cost_map[j].cost == INFTY)
4317		continue;
4318
4319	      fprintf (dump_file, "  %d\t%d\t",
4320		       use->cost_map[j].cand->id,
4321		       use->cost_map[j].cost);
4322	      if (use->cost_map[j].depends_on)
4323		bitmap_print (dump_file,
4324			      use->cost_map[j].depends_on, "","");
4325	      fprintf (dump_file, "\n");
4326	    }
4327
4328	  fprintf (dump_file, "\n");
4329	}
4330      fprintf (dump_file, "\n");
4331    }
4332}
4333
4334/* Determines cost of the candidate CAND.  */
4335
4336static void
4337determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4338{
4339  unsigned cost_base, cost_step;
4340  tree base;
4341
4342  if (!cand->iv)
4343    {
4344      cand->cost = 0;
4345      return;
4346    }
4347
4348  /* There are two costs associated with the candidate -- its increment
4349     and its initialization.  The second is almost negligible for any loop
4350     that rolls enough, so we take it just very little into account.  */
4351
4352  base = cand->iv->base;
4353  cost_base = force_var_cost (data, base, NULL);
4354  cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4355
4356  cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4357
4358  /* Prefer the original iv unless we may gain something by replacing it;
4359     this is not really relevant for artificial ivs created by other
4360     passes.  */
4361  if (cand->pos == IP_ORIGINAL
4362      && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4363    cand->cost--;
4364
4365  /* Prefer not to insert statements into latch unless there are some
4366     already (so that we do not create unnecessary jumps).  */
4367  if (cand->pos == IP_END
4368      && empty_block_p (ip_end_pos (data->current_loop)))
4369    cand->cost++;
4370}
4371
4372/* Determines costs of computation of the candidates.  */
4373
4374static void
4375determine_iv_costs (struct ivopts_data *data)
4376{
4377  unsigned i;
4378
4379  if (dump_file && (dump_flags & TDF_DETAILS))
4380    {
4381      fprintf (dump_file, "Candidate costs:\n");
4382      fprintf (dump_file, "  cand\tcost\n");
4383    }
4384
4385  for (i = 0; i < n_iv_cands (data); i++)
4386    {
4387      struct iv_cand *cand = iv_cand (data, i);
4388
4389      determine_iv_cost (data, cand);
4390
4391      if (dump_file && (dump_flags & TDF_DETAILS))
4392	fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4393    }
4394
4395if (dump_file && (dump_flags & TDF_DETAILS))
4396      fprintf (dump_file, "\n");
4397}
4398
4399/* Calculates cost for having SIZE induction variables.  */
4400
4401static unsigned
4402ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4403{
4404  return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4405}
4406
4407/* For each size of the induction variable set determine the penalty.  */
4408
4409static void
4410determine_set_costs (struct ivopts_data *data)
4411{
4412  unsigned j, n;
4413  tree phi, op;
4414  struct loop *loop = data->current_loop;
4415  bitmap_iterator bi;
4416
4417  /* We use the following model (definitely improvable, especially the
4418     cost function -- TODO):
4419
4420     We estimate the number of registers available (using MD data), name it A.
4421
4422     We estimate the number of registers used by the loop, name it U.  This
4423     number is obtained as the number of loop phi nodes (not counting virtual
4424     registers and bivs) + the number of variables from outside of the loop.
4425
4426     We set a reserve R (free regs that are used for temporary computations,
4427     etc.).  For now the reserve is a constant 3.
4428
4429     Let I be the number of induction variables.
4430
4431     -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4432	make a lot of ivs without a reason).
4433     -- if A - R < U + I <= A, the cost is I * PRES_COST
4434     -- if U + I > A, the cost is I * PRES_COST and
4435        number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4436
4437  if (dump_file && (dump_flags & TDF_DETAILS))
4438    {
4439      fprintf (dump_file, "Global costs:\n");
4440      fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4441      fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4442      fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4443      fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4444    }
4445
4446  n = 0;
4447  for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4448    {
4449      op = PHI_RESULT (phi);
4450
4451      if (!is_gimple_reg (op))
4452	continue;
4453
4454      if (get_iv (data, op))
4455	continue;
4456
4457      n++;
4458    }
4459
4460  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4461    {
4462      struct version_info *info = ver_info (data, j);
4463
4464      if (info->inv_id && info->has_nonlin_use)
4465	n++;
4466    }
4467
4468  data->regs_used = n;
4469  if (dump_file && (dump_flags & TDF_DETAILS))
4470    fprintf (dump_file, "  regs_used %d\n", n);
4471
4472  if (dump_file && (dump_flags & TDF_DETAILS))
4473    {
4474      fprintf (dump_file, "  cost for size:\n");
4475      fprintf (dump_file, "  ivs\tcost\n");
4476      for (j = 0; j <= 2 * target_avail_regs; j++)
4477	fprintf (dump_file, "  %d\t%d\n", j,
4478		 ivopts_global_cost_for_size (data, j));
4479      fprintf (dump_file, "\n");
4480    }
4481}
4482
4483/* Returns true if A is a cheaper cost pair than B.  */
4484
4485static bool
4486cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4487{
4488  if (!a)
4489    return false;
4490
4491  if (!b)
4492    return true;
4493
4494  if (a->cost < b->cost)
4495    return true;
4496
4497  if (a->cost > b->cost)
4498    return false;
4499
4500  /* In case the costs are the same, prefer the cheaper candidate.  */
4501  if (a->cand->cost < b->cand->cost)
4502    return true;
4503
4504  return false;
4505}
4506
4507/* Computes the cost field of IVS structure.  */
4508
4509static void
4510iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4511{
4512  unsigned cost = 0;
4513
4514  cost += ivs->cand_use_cost;
4515  cost += ivs->cand_cost;
4516  cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4517
4518  ivs->cost = cost;
4519}
4520
4521/* Remove invariants in set INVS to set IVS.  */
4522
4523static void
4524iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4525{
4526  bitmap_iterator bi;
4527  unsigned iid;
4528
4529  if (!invs)
4530    return;
4531
4532  EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4533    {
4534      ivs->n_invariant_uses[iid]--;
4535      if (ivs->n_invariant_uses[iid] == 0)
4536	ivs->n_regs--;
4537    }
4538}
4539
4540/* Set USE not to be expressed by any candidate in IVS.  */
4541
4542static void
4543iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4544		 struct iv_use *use)
4545{
4546  unsigned uid = use->id, cid;
4547  struct cost_pair *cp;
4548
4549  cp = ivs->cand_for_use[uid];
4550  if (!cp)
4551    return;
4552  cid = cp->cand->id;
4553
4554  ivs->bad_uses++;
4555  ivs->cand_for_use[uid] = NULL;
4556  ivs->n_cand_uses[cid]--;
4557
4558  if (ivs->n_cand_uses[cid] == 0)
4559    {
4560      bitmap_clear_bit (ivs->cands, cid);
4561      /* Do not count the pseudocandidates.  */
4562      if (cp->cand->iv)
4563	ivs->n_regs--;
4564      ivs->n_cands--;
4565      ivs->cand_cost -= cp->cand->cost;
4566
4567      iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4568    }
4569
4570  ivs->cand_use_cost -= cp->cost;
4571
4572  iv_ca_set_remove_invariants (ivs, cp->depends_on);
4573  iv_ca_recount_cost (data, ivs);
4574}
4575
4576/* Add invariants in set INVS to set IVS.  */
4577
4578static void
4579iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4580{
4581  bitmap_iterator bi;
4582  unsigned iid;
4583
4584  if (!invs)
4585    return;
4586
4587  EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4588    {
4589      ivs->n_invariant_uses[iid]++;
4590      if (ivs->n_invariant_uses[iid] == 1)
4591	ivs->n_regs++;
4592    }
4593}
4594
4595/* Set cost pair for USE in set IVS to CP.  */
4596
4597static void
4598iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4599	      struct iv_use *use, struct cost_pair *cp)
4600{
4601  unsigned uid = use->id, cid;
4602
4603  if (ivs->cand_for_use[uid] == cp)
4604    return;
4605
4606  if (ivs->cand_for_use[uid])
4607    iv_ca_set_no_cp (data, ivs, use);
4608
4609  if (cp)
4610    {
4611      cid = cp->cand->id;
4612
4613      ivs->bad_uses--;
4614      ivs->cand_for_use[uid] = cp;
4615      ivs->n_cand_uses[cid]++;
4616      if (ivs->n_cand_uses[cid] == 1)
4617	{
4618	  bitmap_set_bit (ivs->cands, cid);
4619	  /* Do not count the pseudocandidates.  */
4620	  if (cp->cand->iv)
4621	    ivs->n_regs++;
4622	  ivs->n_cands++;
4623	  ivs->cand_cost += cp->cand->cost;
4624
4625	  iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4626	}
4627
4628      ivs->cand_use_cost += cp->cost;
4629      iv_ca_set_add_invariants (ivs, cp->depends_on);
4630      iv_ca_recount_cost (data, ivs);
4631    }
4632}
4633
4634/* Extend set IVS by expressing USE by some of the candidates in it
4635   if possible.  */
4636
4637static void
4638iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4639	       struct iv_use *use)
4640{
4641  struct cost_pair *best_cp = NULL, *cp;
4642  bitmap_iterator bi;
4643  unsigned i;
4644
4645  gcc_assert (ivs->upto >= use->id);
4646
4647  if (ivs->upto == use->id)
4648    {
4649      ivs->upto++;
4650      ivs->bad_uses++;
4651    }
4652
4653  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4654    {
4655      cp = get_use_iv_cost (data, use, iv_cand (data, i));
4656
4657      if (cheaper_cost_pair (cp, best_cp))
4658	best_cp = cp;
4659    }
4660
4661  iv_ca_set_cp (data, ivs, use, best_cp);
4662}
4663
4664/* Get cost for assignment IVS.  */
4665
4666static unsigned
4667iv_ca_cost (struct iv_ca *ivs)
4668{
4669  return (ivs->bad_uses ? INFTY : ivs->cost);
4670}
4671
4672/* Returns true if all dependences of CP are among invariants in IVS.  */
4673
4674static bool
4675iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4676{
4677  unsigned i;
4678  bitmap_iterator bi;
4679
4680  if (!cp->depends_on)
4681    return true;
4682
4683  EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4684    {
4685      if (ivs->n_invariant_uses[i] == 0)
4686	return false;
4687    }
4688
4689  return true;
4690}
4691
4692/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4693   it before NEXT_CHANGE.  */
4694
4695static struct iv_ca_delta *
4696iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4697		 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4698{
4699  struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4700
4701  change->use = use;
4702  change->old_cp = old_cp;
4703  change->new_cp = new_cp;
4704  change->next_change = next_change;
4705
4706  return change;
4707}
4708
4709/* Joins two lists of changes L1 and L2.  Destructive -- old lists
4710   are rewritten.  */
4711
4712static struct iv_ca_delta *
4713iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4714{
4715  struct iv_ca_delta *last;
4716
4717  if (!l2)
4718    return l1;
4719
4720  if (!l1)
4721    return l2;
4722
4723  for (last = l1; last->next_change; last = last->next_change)
4724    continue;
4725  last->next_change = l2;
4726
4727  return l1;
4728}
4729
4730/* Returns candidate by that USE is expressed in IVS.  */
4731
4732static struct cost_pair *
4733iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4734{
4735  return ivs->cand_for_use[use->id];
4736}
4737
4738/* Reverse the list of changes DELTA, forming the inverse to it.  */
4739
4740static struct iv_ca_delta *
4741iv_ca_delta_reverse (struct iv_ca_delta *delta)
4742{
4743  struct iv_ca_delta *act, *next, *prev = NULL;
4744  struct cost_pair *tmp;
4745
4746  for (act = delta; act; act = next)
4747    {
4748      next = act->next_change;
4749      act->next_change = prev;
4750      prev = act;
4751
4752      tmp = act->old_cp;
4753      act->old_cp = act->new_cp;
4754      act->new_cp = tmp;
4755    }
4756
4757  return prev;
4758}
4759
4760/* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4761   reverted instead.  */
4762
4763static void
4764iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4765		    struct iv_ca_delta *delta, bool forward)
4766{
4767  struct cost_pair *from, *to;
4768  struct iv_ca_delta *act;
4769
4770  if (!forward)
4771    delta = iv_ca_delta_reverse (delta);
4772
4773  for (act = delta; act; act = act->next_change)
4774    {
4775      from = act->old_cp;
4776      to = act->new_cp;
4777      gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4778      iv_ca_set_cp (data, ivs, act->use, to);
4779    }
4780
4781  if (!forward)
4782    iv_ca_delta_reverse (delta);
4783}
4784
4785/* Returns true if CAND is used in IVS.  */
4786
4787static bool
4788iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4789{
4790  return ivs->n_cand_uses[cand->id] > 0;
4791}
4792
4793/* Returns number of induction variable candidates in the set IVS.  */
4794
4795static unsigned
4796iv_ca_n_cands (struct iv_ca *ivs)
4797{
4798  return ivs->n_cands;
4799}
4800
4801/* Free the list of changes DELTA.  */
4802
4803static void
4804iv_ca_delta_free (struct iv_ca_delta **delta)
4805{
4806  struct iv_ca_delta *act, *next;
4807
4808  for (act = *delta; act; act = next)
4809    {
4810      next = act->next_change;
4811      free (act);
4812    }
4813
4814  *delta = NULL;
4815}
4816
4817/* Allocates new iv candidates assignment.  */
4818
4819static struct iv_ca *
4820iv_ca_new (struct ivopts_data *data)
4821{
4822  struct iv_ca *nw = XNEW (struct iv_ca);
4823
4824  nw->upto = 0;
4825  nw->bad_uses = 0;
4826  nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4827  nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4828  nw->cands = BITMAP_ALLOC (NULL);
4829  nw->n_cands = 0;
4830  nw->n_regs = 0;
4831  nw->cand_use_cost = 0;
4832  nw->cand_cost = 0;
4833  nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4834  nw->cost = 0;
4835
4836  return nw;
4837}
4838
4839/* Free memory occupied by the set IVS.  */
4840
4841static void
4842iv_ca_free (struct iv_ca **ivs)
4843{
4844  free ((*ivs)->cand_for_use);
4845  free ((*ivs)->n_cand_uses);
4846  BITMAP_FREE ((*ivs)->cands);
4847  free ((*ivs)->n_invariant_uses);
4848  free (*ivs);
4849  *ivs = NULL;
4850}
4851
4852/* Dumps IVS to FILE.  */
4853
4854static void
4855iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4856{
4857  const char *pref = "  invariants ";
4858  unsigned i;
4859
4860  fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4861  bitmap_print (file, ivs->cands, "  candidates ","\n");
4862
4863  for (i = 1; i <= data->max_inv_id; i++)
4864    if (ivs->n_invariant_uses[i])
4865      {
4866	fprintf (file, "%s%d", pref, i);
4867	pref = ", ";
4868      }
4869  fprintf (file, "\n");
4870}
4871
4872/* Try changing candidate in IVS to CAND for each use.  Return cost of the
4873   new set, and store differences in DELTA.  Number of induction variables
4874   in the new set is stored to N_IVS.  */
4875
4876static unsigned
4877iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4878	      struct iv_cand *cand, struct iv_ca_delta **delta,
4879	      unsigned *n_ivs)
4880{
4881  unsigned i, cost;
4882  struct iv_use *use;
4883  struct cost_pair *old_cp, *new_cp;
4884
4885  *delta = NULL;
4886  for (i = 0; i < ivs->upto; i++)
4887    {
4888      use = iv_use (data, i);
4889      old_cp = iv_ca_cand_for_use (ivs, use);
4890
4891      if (old_cp
4892	  && old_cp->cand == cand)
4893	continue;
4894
4895      new_cp = get_use_iv_cost (data, use, cand);
4896      if (!new_cp)
4897	continue;
4898
4899      if (!iv_ca_has_deps (ivs, new_cp))
4900	continue;
4901
4902      if (!cheaper_cost_pair (new_cp, old_cp))
4903	continue;
4904
4905      *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4906    }
4907
4908  iv_ca_delta_commit (data, ivs, *delta, true);
4909  cost = iv_ca_cost (ivs);
4910  if (n_ivs)
4911    *n_ivs = iv_ca_n_cands (ivs);
4912  iv_ca_delta_commit (data, ivs, *delta, false);
4913
4914  return cost;
4915}
4916
4917/* Try narrowing set IVS by removing CAND.  Return the cost of
4918   the new set and store the differences in DELTA.  */
4919
4920static unsigned
4921iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4922	      struct iv_cand *cand, struct iv_ca_delta **delta)
4923{
4924  unsigned i, ci;
4925  struct iv_use *use;
4926  struct cost_pair *old_cp, *new_cp, *cp;
4927  bitmap_iterator bi;
4928  struct iv_cand *cnd;
4929  unsigned cost;
4930
4931  *delta = NULL;
4932  for (i = 0; i < n_iv_uses (data); i++)
4933    {
4934      use = iv_use (data, i);
4935
4936      old_cp = iv_ca_cand_for_use (ivs, use);
4937      if (old_cp->cand != cand)
4938	continue;
4939
4940      new_cp = NULL;
4941
4942      if (data->consider_all_candidates)
4943	{
4944	  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4945	    {
4946	      if (ci == cand->id)
4947		continue;
4948
4949	      cnd = iv_cand (data, ci);
4950
4951	      cp = get_use_iv_cost (data, use, cnd);
4952	      if (!cp)
4953		continue;
4954	      if (!iv_ca_has_deps (ivs, cp))
4955		continue;
4956
4957	      if (!cheaper_cost_pair (cp, new_cp))
4958		continue;
4959
4960	      new_cp = cp;
4961	    }
4962	}
4963      else
4964	{
4965	  EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4966	    {
4967	      if (ci == cand->id)
4968		continue;
4969
4970	      cnd = iv_cand (data, ci);
4971
4972	      cp = get_use_iv_cost (data, use, cnd);
4973	      if (!cp)
4974		continue;
4975	      if (!iv_ca_has_deps (ivs, cp))
4976		continue;
4977
4978	      if (!cheaper_cost_pair (cp, new_cp))
4979		continue;
4980
4981	      new_cp = cp;
4982	    }
4983	}
4984
4985      if (!new_cp)
4986	{
4987	  iv_ca_delta_free (delta);
4988	  return INFTY;
4989	}
4990
4991      *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4992    }
4993
4994  iv_ca_delta_commit (data, ivs, *delta, true);
4995  cost = iv_ca_cost (ivs);
4996  iv_ca_delta_commit (data, ivs, *delta, false);
4997
4998  return cost;
4999}
5000
5001/* Try optimizing the set of candidates IVS by removing candidates different
5002   from to EXCEPT_CAND from it.  Return cost of the new set, and store
5003   differences in DELTA.  */
5004
5005static unsigned
5006iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5007	     struct iv_cand *except_cand, struct iv_ca_delta **delta)
5008{
5009  bitmap_iterator bi;
5010  struct iv_ca_delta *act_delta, *best_delta;
5011  unsigned i, best_cost, acost;
5012  struct iv_cand *cand;
5013
5014  best_delta = NULL;
5015  best_cost = iv_ca_cost (ivs);
5016
5017  EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5018    {
5019      cand = iv_cand (data, i);
5020
5021      if (cand == except_cand)
5022	continue;
5023
5024      acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5025
5026      if (acost < best_cost)
5027	{
5028	  best_cost = acost;
5029	  iv_ca_delta_free (&best_delta);
5030	  best_delta = act_delta;
5031	}
5032      else
5033	iv_ca_delta_free (&act_delta);
5034    }
5035
5036  if (!best_delta)
5037    {
5038      *delta = NULL;
5039      return best_cost;
5040    }
5041
5042  /* Recurse to possibly remove other unnecessary ivs.  */
5043  iv_ca_delta_commit (data, ivs, best_delta, true);
5044  best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5045  iv_ca_delta_commit (data, ivs, best_delta, false);
5046  *delta = iv_ca_delta_join (best_delta, *delta);
5047  return best_cost;
5048}
5049
5050/* Tries to extend the sets IVS in the best possible way in order
5051   to express the USE.  */
5052
5053static bool
5054try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5055		  struct iv_use *use)
5056{
5057  unsigned best_cost, act_cost;
5058  unsigned i;
5059  bitmap_iterator bi;
5060  struct iv_cand *cand;
5061  struct iv_ca_delta *best_delta = NULL, *act_delta;
5062  struct cost_pair *cp;
5063
5064  iv_ca_add_use (data, ivs, use);
5065  best_cost = iv_ca_cost (ivs);
5066
5067  cp = iv_ca_cand_for_use (ivs, use);
5068  if (cp)
5069    {
5070      best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5071      iv_ca_set_no_cp (data, ivs, use);
5072    }
5073
5074  /* First try important candidates.  Only if it fails, try the specific ones.
5075     Rationale -- in loops with many variables the best choice often is to use
5076     just one generic biv.  If we added here many ivs specific to the uses,
5077     the optimization algorithm later would be likely to get stuck in a local
5078     minimum, thus causing us to create too many ivs.  The approach from
5079     few ivs to more seems more likely to be successful -- starting from few
5080     ivs, replacing an expensive use by a specific iv should always be a
5081     win.  */
5082  EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5083    {
5084      cand = iv_cand (data, i);
5085
5086      if (iv_ca_cand_used_p (ivs, cand))
5087	continue;
5088
5089      cp = get_use_iv_cost (data, use, cand);
5090      if (!cp)
5091	continue;
5092
5093      iv_ca_set_cp (data, ivs, use, cp);
5094      act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5095      iv_ca_set_no_cp (data, ivs, use);
5096      act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5097
5098      if (act_cost < best_cost)
5099	{
5100	  best_cost = act_cost;
5101
5102	  iv_ca_delta_free (&best_delta);
5103	  best_delta = act_delta;
5104	}
5105      else
5106	iv_ca_delta_free (&act_delta);
5107    }
5108
5109  if (best_cost == INFTY)
5110    {
5111      for (i = 0; i < use->n_map_members; i++)
5112	{
5113	  cp = use->cost_map + i;
5114	  cand = cp->cand;
5115	  if (!cand)
5116	    continue;
5117
5118	  /* Already tried this.  */
5119	  if (cand->important)
5120	    continue;
5121
5122	  if (iv_ca_cand_used_p (ivs, cand))
5123	    continue;
5124
5125	  act_delta = NULL;
5126	  iv_ca_set_cp (data, ivs, use, cp);
5127	  act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5128	  iv_ca_set_no_cp (data, ivs, use);
5129	  act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5130				       cp, act_delta);
5131
5132	  if (act_cost < best_cost)
5133	    {
5134	      best_cost = act_cost;
5135
5136	      if (best_delta)
5137		iv_ca_delta_free (&best_delta);
5138	      best_delta = act_delta;
5139	    }
5140	  else
5141	    iv_ca_delta_free (&act_delta);
5142	}
5143    }
5144
5145  iv_ca_delta_commit (data, ivs, best_delta, true);
5146  iv_ca_delta_free (&best_delta);
5147
5148  return (best_cost != INFTY);
5149}
5150
5151/* Finds an initial assignment of candidates to uses.  */
5152
5153static struct iv_ca *
5154get_initial_solution (struct ivopts_data *data)
5155{
5156  struct iv_ca *ivs = iv_ca_new (data);
5157  unsigned i;
5158
5159  for (i = 0; i < n_iv_uses (data); i++)
5160    if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5161      {
5162	iv_ca_free (&ivs);
5163	return NULL;
5164      }
5165
5166  return ivs;
5167}
5168
5169/* Tries to improve set of induction variables IVS.  */
5170
5171static bool
5172try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5173{
5174  unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5175  struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5176  struct iv_cand *cand;
5177
5178  /* Try extending the set of induction variables by one.  */
5179  for (i = 0; i < n_iv_cands (data); i++)
5180    {
5181      cand = iv_cand (data, i);
5182
5183      if (iv_ca_cand_used_p (ivs, cand))
5184	continue;
5185
5186      acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5187      if (!act_delta)
5188	continue;
5189
5190      /* If we successfully added the candidate and the set is small enough,
5191	 try optimizing it by removing other candidates.  */
5192      if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5193      	{
5194	  iv_ca_delta_commit (data, ivs, act_delta, true);
5195	  acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5196	  iv_ca_delta_commit (data, ivs, act_delta, false);
5197	  act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5198	}
5199
5200      if (acost < best_cost)
5201	{
5202	  best_cost = acost;
5203	  iv_ca_delta_free (&best_delta);
5204	  best_delta = act_delta;
5205	}
5206      else
5207	iv_ca_delta_free (&act_delta);
5208    }
5209
5210  if (!best_delta)
5211    {
5212      /* Try removing the candidates from the set instead.  */
5213      best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5214
5215      /* Nothing more we can do.  */
5216      if (!best_delta)
5217	return false;
5218    }
5219
5220  iv_ca_delta_commit (data, ivs, best_delta, true);
5221  gcc_assert (best_cost == iv_ca_cost (ivs));
5222  iv_ca_delta_free (&best_delta);
5223  return true;
5224}
5225
5226/* Attempts to find the optimal set of induction variables.  We do simple
5227   greedy heuristic -- we try to replace at most one candidate in the selected
5228   solution and remove the unused ivs while this improves the cost.  */
5229
5230static struct iv_ca *
5231find_optimal_iv_set (struct ivopts_data *data)
5232{
5233  unsigned i;
5234  struct iv_ca *set;
5235  struct iv_use *use;
5236
5237  /* Get the initial solution.  */
5238  set = get_initial_solution (data);
5239  if (!set)
5240    {
5241      if (dump_file && (dump_flags & TDF_DETAILS))
5242	fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5243      return NULL;
5244    }
5245
5246  if (dump_file && (dump_flags & TDF_DETAILS))
5247    {
5248      fprintf (dump_file, "Initial set of candidates:\n");
5249      iv_ca_dump (data, dump_file, set);
5250    }
5251
5252  while (try_improve_iv_set (data, set))
5253    {
5254      if (dump_file && (dump_flags & TDF_DETAILS))
5255	{
5256	  fprintf (dump_file, "Improved to:\n");
5257	  iv_ca_dump (data, dump_file, set);
5258	}
5259    }
5260
5261  if (dump_file && (dump_flags & TDF_DETAILS))
5262    fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5263
5264  for (i = 0; i < n_iv_uses (data); i++)
5265    {
5266      use = iv_use (data, i);
5267      use->selected = iv_ca_cand_for_use (set, use)->cand;
5268    }
5269
5270  return set;
5271}
5272
5273/* Creates a new induction variable corresponding to CAND.  */
5274
5275static void
5276create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5277{
5278  block_stmt_iterator incr_pos;
5279  tree base;
5280  bool after = false;
5281
5282  if (!cand->iv)
5283    return;
5284
5285  switch (cand->pos)
5286    {
5287    case IP_NORMAL:
5288      incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5289      break;
5290
5291    case IP_END:
5292      incr_pos = bsi_last (ip_end_pos (data->current_loop));
5293      after = true;
5294      break;
5295
5296    case IP_ORIGINAL:
5297      /* Mark that the iv is preserved.  */
5298      name_info (data, cand->var_before)->preserve_biv = true;
5299      name_info (data, cand->var_after)->preserve_biv = true;
5300
5301      /* Rewrite the increment so that it uses var_before directly.  */
5302      find_interesting_uses_op (data, cand->var_after)->selected = cand;
5303
5304      return;
5305    }
5306
5307  gimple_add_tmp_var (cand->var_before);
5308  add_referenced_var (cand->var_before);
5309
5310  base = unshare_expr (cand->iv->base);
5311
5312  create_iv (base, unshare_expr (cand->iv->step),
5313	     cand->var_before, data->current_loop,
5314	     &incr_pos, after, &cand->var_before, &cand->var_after);
5315}
5316
5317/* Creates new induction variables described in SET.  */
5318
5319static void
5320create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5321{
5322  unsigned i;
5323  struct iv_cand *cand;
5324  bitmap_iterator bi;
5325
5326  EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5327    {
5328      cand = iv_cand (data, i);
5329      create_new_iv (data, cand);
5330    }
5331}
5332
5333/* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5334   is true, remove also the ssa name defined by the statement.  */
5335
5336static void
5337remove_statement (tree stmt, bool including_defined_name)
5338{
5339  if (TREE_CODE (stmt) == PHI_NODE)
5340    {
5341      if (!including_defined_name)
5342	{
5343	  /* Prevent the ssa name defined by the statement from being removed.  */
5344	  SET_PHI_RESULT (stmt, NULL);
5345	}
5346      remove_phi_node (stmt, NULL_TREE);
5347    }
5348  else
5349    {
5350      block_stmt_iterator bsi = bsi_for_stmt (stmt);
5351
5352      bsi_remove (&bsi, true);
5353    }
5354}
5355
5356/* Rewrites USE (definition of iv used in a nonlinear expression)
5357   using candidate CAND.  */
5358
5359static void
5360rewrite_use_nonlinear_expr (struct ivopts_data *data,
5361			    struct iv_use *use, struct iv_cand *cand)
5362{
5363  tree comp;
5364  tree op, stmts, tgt, ass;
5365  block_stmt_iterator bsi, pbsi;
5366
5367  /* An important special case -- if we are asked to express value of
5368     the original iv by itself, just exit; there is no need to
5369     introduce a new computation (that might also need casting the
5370     variable to unsigned and back).  */
5371  if (cand->pos == IP_ORIGINAL
5372      && cand->incremented_at == use->stmt)
5373    {
5374      tree step, ctype, utype;
5375      enum tree_code incr_code = PLUS_EXPR;
5376
5377      gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5378      gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5379
5380      step = cand->iv->step;
5381      ctype = TREE_TYPE (step);
5382      utype = TREE_TYPE (cand->var_after);
5383      if (TREE_CODE (step) == NEGATE_EXPR)
5384	{
5385	  incr_code = MINUS_EXPR;
5386	  step = TREE_OPERAND (step, 0);
5387	}
5388
5389      /* Check whether we may leave the computation unchanged.
5390	 This is the case only if it does not rely on other
5391	 computations in the loop -- otherwise, the computation
5392	 we rely upon may be removed in remove_unused_ivs,
5393	 thus leading to ICE.  */
5394      op = TREE_OPERAND (use->stmt, 1);
5395      if (TREE_CODE (op) == PLUS_EXPR
5396	  || TREE_CODE (op) == MINUS_EXPR)
5397	{
5398	  if (TREE_OPERAND (op, 0) == cand->var_before)
5399	    op = TREE_OPERAND (op, 1);
5400	  else if (TREE_CODE (op) == PLUS_EXPR
5401		   && TREE_OPERAND (op, 1) == cand->var_before)
5402	    op = TREE_OPERAND (op, 0);
5403	  else
5404	    op = NULL_TREE;
5405	}
5406      else
5407	op = NULL_TREE;
5408
5409      if (op
5410	  && (TREE_CODE (op) == INTEGER_CST
5411	      || operand_equal_p (op, step, 0)))
5412	return;
5413
5414      /* Otherwise, add the necessary computations to express
5415	 the iv.  */
5416      op = fold_convert (ctype, cand->var_before);
5417      comp = fold_convert (utype,
5418			   build2 (incr_code, ctype, op,
5419				   unshare_expr (step)));
5420    }
5421  else
5422    comp = get_computation (data->current_loop, use, cand);
5423
5424  switch (TREE_CODE (use->stmt))
5425    {
5426    case PHI_NODE:
5427      tgt = PHI_RESULT (use->stmt);
5428
5429      /* If we should keep the biv, do not replace it.  */
5430      if (name_info (data, tgt)->preserve_biv)
5431	return;
5432
5433      pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5434      while (!bsi_end_p (pbsi)
5435	     && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5436	{
5437	  bsi = pbsi;
5438	  bsi_next (&pbsi);
5439	}
5440      break;
5441
5442    case MODIFY_EXPR:
5443      tgt = TREE_OPERAND (use->stmt, 0);
5444      bsi = bsi_for_stmt (use->stmt);
5445      break;
5446
5447    default:
5448      gcc_unreachable ();
5449    }
5450
5451  op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5452
5453  if (TREE_CODE (use->stmt) == PHI_NODE)
5454    {
5455      if (stmts)
5456	bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5457      ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5458      bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5459      remove_statement (use->stmt, false);
5460      SSA_NAME_DEF_STMT (tgt) = ass;
5461    }
5462  else
5463    {
5464      if (stmts)
5465	bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5466      TREE_OPERAND (use->stmt, 1) = op;
5467    }
5468}
5469
5470/* Replaces ssa name in index IDX by its basic variable.  Callback for
5471   for_each_index.  */
5472
5473static bool
5474idx_remove_ssa_names (tree base, tree *idx,
5475		      void *data ATTRIBUTE_UNUSED)
5476{
5477  tree *op;
5478
5479  if (TREE_CODE (*idx) == SSA_NAME)
5480    *idx = SSA_NAME_VAR (*idx);
5481
5482  if (TREE_CODE (base) == ARRAY_REF)
5483    {
5484      op = &TREE_OPERAND (base, 2);
5485      if (*op
5486	  && TREE_CODE (*op) == SSA_NAME)
5487	*op = SSA_NAME_VAR (*op);
5488      op = &TREE_OPERAND (base, 3);
5489      if (*op
5490	  && TREE_CODE (*op) == SSA_NAME)
5491	*op = SSA_NAME_VAR (*op);
5492    }
5493
5494  return true;
5495}
5496
5497/* Unshares REF and replaces ssa names inside it by their basic variables.  */
5498
5499static tree
5500unshare_and_remove_ssa_names (tree ref)
5501{
5502  ref = unshare_expr (ref);
5503  for_each_index (&ref, idx_remove_ssa_names, NULL);
5504
5505  return ref;
5506}
5507
5508/* Extract the alias analysis info for the memory reference REF.  There are
5509   several ways how this information may be stored and what precisely is
5510   its semantics depending on the type of the reference, but there always is
5511   somewhere hidden one _DECL node that is used to determine the set of
5512   virtual operands for the reference.  The code below deciphers this jungle
5513   and extracts this single useful piece of information.  */
5514
5515static tree
5516get_ref_tag (tree ref, tree orig)
5517{
5518  tree var = get_base_address (ref);
5519  tree aref = NULL_TREE, tag, sv;
5520  HOST_WIDE_INT offset, size, maxsize;
5521
5522  for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5523    {
5524      aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5525      if (ref)
5526	break;
5527    }
5528
5529  if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5530    return unshare_expr (sv);
5531
5532  if (!var)
5533    return NULL_TREE;
5534
5535  if (TREE_CODE (var) == INDIRECT_REF)
5536    {
5537      /* If the base is a dereference of a pointer, first check its name memory
5538	 tag.  If it does not have one, use its symbol memory tag.  */
5539      var = TREE_OPERAND (var, 0);
5540      if (TREE_CODE (var) != SSA_NAME)
5541	return NULL_TREE;
5542
5543      if (SSA_NAME_PTR_INFO (var))
5544	{
5545	  tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5546	  if (tag)
5547	    return tag;
5548	}
5549
5550      var = SSA_NAME_VAR (var);
5551      tag = var_ann (var)->symbol_mem_tag;
5552      gcc_assert (tag != NULL_TREE);
5553      return tag;
5554    }
5555  else
5556    {
5557      if (!DECL_P (var))
5558	return NULL_TREE;
5559
5560      tag = var_ann (var)->symbol_mem_tag;
5561      if (tag)
5562	return tag;
5563
5564      return var;
5565    }
5566}
5567
5568/* Copies the reference information from OLD_REF to NEW_REF.  */
5569
5570static void
5571copy_ref_info (tree new_ref, tree old_ref)
5572{
5573  if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5574    copy_mem_ref_info (new_ref, old_ref);
5575  else
5576    {
5577      TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5578      TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5579    }
5580}
5581
5582/* Rewrites USE (address that is an iv) using candidate CAND.  */
5583
5584static void
5585rewrite_use_address (struct ivopts_data *data,
5586		     struct iv_use *use, struct iv_cand *cand)
5587{
5588  struct affine_tree_combination aff;
5589  block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5590  tree ref;
5591
5592  get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5593  unshare_aff_combination (&aff);
5594
5595  ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5596  copy_ref_info (ref, *use->op_p);
5597  *use->op_p = ref;
5598}
5599
5600/* Rewrites USE (the condition such that one of the arguments is an iv) using
5601   candidate CAND.  */
5602
5603static void
5604rewrite_use_compare (struct ivopts_data *data,
5605		     struct iv_use *use, struct iv_cand *cand)
5606{
5607  tree comp;
5608  tree *op_p, cond, op, stmts, bound;
5609  block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5610  enum tree_code compare;
5611  struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5612
5613  bound = cp->value;
5614  if (bound)
5615    {
5616      tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5617      tree var_type = TREE_TYPE (var);
5618
5619      compare = iv_elimination_compare (data, use);
5620      bound = fold_convert (var_type, bound);
5621      op = force_gimple_operand (unshare_expr (bound), &stmts,
5622				 true, NULL_TREE);
5623
5624      if (stmts)
5625	bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5626
5627      *use->op_p = build2 (compare, boolean_type_node, var, op);
5628      update_stmt (use->stmt);
5629      return;
5630    }
5631
5632  /* The induction variable elimination failed; just express the original
5633     giv.  */
5634  comp = get_computation (data->current_loop, use, cand);
5635
5636  cond = *use->op_p;
5637  op_p = &TREE_OPERAND (cond, 0);
5638  if (TREE_CODE (*op_p) != SSA_NAME
5639      || zero_p (get_iv (data, *op_p)->step))
5640    op_p = &TREE_OPERAND (cond, 1);
5641
5642  op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5643  if (stmts)
5644    bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5645
5646  *op_p = op;
5647}
5648
5649/* Rewrites USE using candidate CAND.  */
5650
5651static void
5652rewrite_use (struct ivopts_data *data,
5653	     struct iv_use *use, struct iv_cand *cand)
5654{
5655  switch (use->type)
5656    {
5657      case USE_NONLINEAR_EXPR:
5658	rewrite_use_nonlinear_expr (data, use, cand);
5659	break;
5660
5661      case USE_ADDRESS:
5662	rewrite_use_address (data, use, cand);
5663	break;
5664
5665      case USE_COMPARE:
5666	rewrite_use_compare (data, use, cand);
5667	break;
5668
5669      default:
5670	gcc_unreachable ();
5671    }
5672  mark_new_vars_to_rename (use->stmt);
5673}
5674
5675/* Rewrite the uses using the selected induction variables.  */
5676
5677static void
5678rewrite_uses (struct ivopts_data *data)
5679{
5680  unsigned i;
5681  struct iv_cand *cand;
5682  struct iv_use *use;
5683
5684  for (i = 0; i < n_iv_uses (data); i++)
5685    {
5686      use = iv_use (data, i);
5687      cand = use->selected;
5688      gcc_assert (cand);
5689
5690      rewrite_use (data, use, cand);
5691    }
5692}
5693
5694/* Removes the ivs that are not used after rewriting.  */
5695
5696static void
5697remove_unused_ivs (struct ivopts_data *data)
5698{
5699  unsigned j;
5700  bitmap_iterator bi;
5701
5702  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5703    {
5704      struct version_info *info;
5705
5706      info = ver_info (data, j);
5707      if (info->iv
5708	  && !zero_p (info->iv->step)
5709	  && !info->inv_id
5710	  && !info->iv->have_use_for
5711	  && !info->preserve_biv)
5712	remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5713    }
5714}
5715
5716/* Frees data allocated by the optimization of a single loop.  */
5717
5718static void
5719free_loop_data (struct ivopts_data *data)
5720{
5721  unsigned i, j;
5722  bitmap_iterator bi;
5723  tree obj;
5724
5725  htab_empty (data->niters);
5726
5727  EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5728    {
5729      struct version_info *info;
5730
5731      info = ver_info (data, i);
5732      if (info->iv)
5733	free (info->iv);
5734      info->iv = NULL;
5735      info->has_nonlin_use = false;
5736      info->preserve_biv = false;
5737      info->inv_id = 0;
5738    }
5739  bitmap_clear (data->relevant);
5740  bitmap_clear (data->important_candidates);
5741
5742  for (i = 0; i < n_iv_uses (data); i++)
5743    {
5744      struct iv_use *use = iv_use (data, i);
5745
5746      free (use->iv);
5747      BITMAP_FREE (use->related_cands);
5748      for (j = 0; j < use->n_map_members; j++)
5749	if (use->cost_map[j].depends_on)
5750	  BITMAP_FREE (use->cost_map[j].depends_on);
5751      free (use->cost_map);
5752      free (use);
5753    }
5754  VEC_truncate (iv_use_p, data->iv_uses, 0);
5755
5756  for (i = 0; i < n_iv_cands (data); i++)
5757    {
5758      struct iv_cand *cand = iv_cand (data, i);
5759
5760      if (cand->iv)
5761	free (cand->iv);
5762      if (cand->depends_on)
5763	BITMAP_FREE (cand->depends_on);
5764      free (cand);
5765    }
5766  VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5767
5768  if (data->version_info_size < num_ssa_names)
5769    {
5770      data->version_info_size = 2 * num_ssa_names;
5771      free (data->version_info);
5772      data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5773    }
5774
5775  data->max_inv_id = 0;
5776
5777  for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5778    SET_DECL_RTL (obj, NULL_RTX);
5779
5780  VEC_truncate (tree, decl_rtl_to_reset, 0);
5781}
5782
5783/* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5784   loop tree.  */
5785
5786static void
5787tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5788{
5789  free_loop_data (data);
5790  free (data->version_info);
5791  BITMAP_FREE (data->relevant);
5792  BITMAP_FREE (data->important_candidates);
5793  htab_delete (data->niters);
5794
5795  VEC_free (tree, heap, decl_rtl_to_reset);
5796  VEC_free (iv_use_p, heap, data->iv_uses);
5797  VEC_free (iv_cand_p, heap, data->iv_candidates);
5798}
5799
5800/* Optimizes the LOOP.  Returns true if anything changed.  */
5801
5802static bool
5803tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5804{
5805  bool changed = false;
5806  struct iv_ca *iv_ca;
5807  edge exit;
5808
5809  data->current_loop = loop;
5810
5811  if (dump_file && (dump_flags & TDF_DETAILS))
5812    {
5813      fprintf (dump_file, "Processing loop %d\n", loop->num);
5814
5815      exit = single_dom_exit (loop);
5816      if (exit)
5817	{
5818	  fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5819		   exit->src->index, exit->dest->index);
5820	  print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5821	  fprintf (dump_file, "\n");
5822	}
5823
5824      fprintf (dump_file, "\n");
5825    }
5826
5827  /* For each ssa name determines whether it behaves as an induction variable
5828     in some loop.  */
5829  if (!find_induction_variables (data))
5830    goto finish;
5831
5832  /* Finds interesting uses (item 1).  */
5833  find_interesting_uses (data);
5834  if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5835    goto finish;
5836
5837  /* Finds candidates for the induction variables (item 2).  */
5838  find_iv_candidates (data);
5839
5840  /* Calculates the costs (item 3, part 1).  */
5841  determine_use_iv_costs (data);
5842  determine_iv_costs (data);
5843  determine_set_costs (data);
5844
5845  /* Find the optimal set of induction variables (item 3, part 2).  */
5846  iv_ca = find_optimal_iv_set (data);
5847  if (!iv_ca)
5848    goto finish;
5849  changed = true;
5850
5851  /* Create the new induction variables (item 4, part 1).  */
5852  create_new_ivs (data, iv_ca);
5853  iv_ca_free (&iv_ca);
5854
5855  /* Rewrite the uses (item 4, part 2).  */
5856  rewrite_uses (data);
5857
5858  /* Remove the ivs that are unused after rewriting.  */
5859  remove_unused_ivs (data);
5860
5861  /* We have changed the structure of induction variables; it might happen
5862     that definitions in the scev database refer to some of them that were
5863     eliminated.  */
5864  scev_reset ();
5865
5866finish:
5867  free_loop_data (data);
5868
5869  return changed;
5870}
5871
5872/* Main entry point.  Optimizes induction variables in LOOPS.  */
5873
5874void
5875tree_ssa_iv_optimize (struct loops *loops)
5876{
5877  struct loop *loop;
5878  struct ivopts_data data;
5879
5880  tree_ssa_iv_optimize_init (&data);
5881
5882  /* Optimize the loops starting with the innermost ones.  */
5883  loop = loops->tree_root;
5884  while (loop->inner)
5885    loop = loop->inner;
5886
5887  /* Scan the loops, inner ones first.  */
5888  while (loop != loops->tree_root)
5889    {
5890      if (dump_file && (dump_flags & TDF_DETAILS))
5891	flow_loop_dump (loop, dump_file, NULL, 1);
5892
5893      tree_ssa_iv_optimize_loop (&data, loop);
5894
5895      if (loop->next)
5896	{
5897	  loop = loop->next;
5898	  while (loop->inner)
5899	    loop = loop->inner;
5900	}
5901      else
5902	loop = loop->outer;
5903    }
5904
5905  tree_ssa_iv_optimize_finalize (&data);
5906}
5907