1169689Skan/*  Loop transformation code generation
2169689Skan    Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3169689Skan    Contributed by Daniel Berlin <dberlin@dberlin.org>
4169689Skan
5169689Skan    This file is part of GCC.
6169689Skan
7169689Skan    GCC is free software; you can redistribute it and/or modify it under
8169689Skan    the terms of the GNU General Public License as published by the Free
9169689Skan    Software Foundation; either version 2, or (at your option) any later
10169689Skan    version.
11169689Skan
12169689Skan    GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13169689Skan    WARRANTY; without even the implied warranty of MERCHANTABILITY or
14169689Skan    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15169689Skan    for more details.
16169689Skan
17169689Skan    You should have received a copy of the GNU General Public License
18169689Skan    along with GCC; see the file COPYING.  If not, write to the Free
19169689Skan    Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20169689Skan    02110-1301, USA.  */
21169689Skan
22169689Skan#include "config.h"
23169689Skan#include "system.h"
24169689Skan#include "coretypes.h"
25169689Skan#include "tm.h"
26169689Skan#include "ggc.h"
27169689Skan#include "tree.h"
28169689Skan#include "target.h"
29169689Skan#include "rtl.h"
30169689Skan#include "basic-block.h"
31169689Skan#include "diagnostic.h"
32169689Skan#include "tree-flow.h"
33169689Skan#include "tree-dump.h"
34169689Skan#include "timevar.h"
35169689Skan#include "cfgloop.h"
36169689Skan#include "expr.h"
37169689Skan#include "optabs.h"
38169689Skan#include "tree-chrec.h"
39169689Skan#include "tree-data-ref.h"
40169689Skan#include "tree-pass.h"
41169689Skan#include "tree-scalar-evolution.h"
42169689Skan#include "vec.h"
43169689Skan#include "lambda.h"
44169689Skan#include "vecprim.h"
45169689Skan
46169689Skan/* This loop nest code generation is based on non-singular matrix
47169689Skan   math.
48169689Skan
49169689Skan A little terminology and a general sketch of the algorithm.  See "A singular
50169689Skan loop transformation framework based on non-singular matrices" by Wei Li and
51169689Skan Keshav Pingali for formal proofs that the various statements below are
52169689Skan correct.
53169689Skan
54169689Skan A loop iteration space represents the points traversed by the loop.  A point in the
55169689Skan iteration space can be represented by a vector of size <loop depth>.  You can
56169689Skan therefore represent the iteration space as an integral combinations of a set
57169689Skan of basis vectors.
58169689Skan
59169689Skan A loop iteration space is dense if every integer point between the loop
60169689Skan bounds is a point in the iteration space.  Every loop with a step of 1
61169689Skan therefore has a dense iteration space.
62169689Skan
63169689Skan for i = 1 to 3, step 1 is a dense iteration space.
64169689Skan
65169689Skan A loop iteration space is sparse if it is not dense.  That is, the iteration
66169689Skan space skips integer points that are within the loop bounds.
67169689Skan
68169689Skan for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
69169689Skan 2 is skipped.
70169689Skan
71169689Skan Dense source spaces are easy to transform, because they don't skip any
72169689Skan points to begin with.  Thus we can compute the exact bounds of the target
73169689Skan space using min/max and floor/ceil.
74169689Skan
75169689Skan For a dense source space, we take the transformation matrix, decompose it
76169689Skan into a lower triangular part (H) and a unimodular part (U).
77169689Skan We then compute the auxiliary space from the unimodular part (source loop
78169689Skan nest . U = auxiliary space) , which has two important properties:
79169689Skan  1. It traverses the iterations in the same lexicographic order as the source
80169689Skan  space.
81169689Skan  2. It is a dense space when the source is a dense space (even if the target
82169689Skan  space is going to be sparse).
83169689Skan
84169689Skan Given the auxiliary space, we use the lower triangular part to compute the
85169689Skan bounds in the target space by simple matrix multiplication.
86169689Skan The gaps in the target space (IE the new loop step sizes) will be the
87169689Skan diagonals of the H matrix.
88169689Skan
89169689Skan Sparse source spaces require another step, because you can't directly compute
90169689Skan the exact bounds of the auxiliary and target space from the sparse space.
91169689Skan Rather than try to come up with a separate algorithm to handle sparse source
92169689Skan spaces directly, we just find a legal transformation matrix that gives you
93169689Skan the sparse source space, from a dense space, and then transform the dense
94169689Skan space.
95169689Skan
96169689Skan For a regular sparse space, you can represent the source space as an integer
97169689Skan lattice, and the base space of that lattice will always be dense.  Thus, we
98169689Skan effectively use the lattice to figure out the transformation from the lattice
99169689Skan base space, to the sparse iteration space (IE what transform was applied to
100169689Skan the dense space to make it sparse).  We then compose this transform with the
101169689Skan transformation matrix specified by the user (since our matrix transformations
102169689Skan are closed under composition, this is okay).  We can then use the base space
103169689Skan (which is dense) plus the composed transformation matrix, to compute the rest
104169689Skan of the transform using the dense space algorithm above.
105169689Skan
106169689Skan In other words, our sparse source space (B) is decomposed into a dense base
107169689Skan space (A), and a matrix (L) that transforms A into B, such that A.L = B.
108169689Skan We then compute the composition of L and the user transformation matrix (T),
109169689Skan so that T is now a transform from A to the result, instead of from B to the
110169689Skan result.
111169689Skan IE A.(LT) = result instead of B.T = result
112169689Skan Since A is now a dense source space, we can use the dense source space
113169689Skan algorithm above to compute the result of applying transform (LT) to A.
114169689Skan
115169689Skan Fourier-Motzkin elimination is used to compute the bounds of the base space
116169689Skan of the lattice.  */
117169689Skan
118169689Skanstatic bool perfect_nestify (struct loops *,
119169689Skan			     struct loop *, VEC(tree,heap) *,
120169689Skan			     VEC(tree,heap) *, VEC(int,heap) *,
121169689Skan			     VEC(tree,heap) *);
122169689Skan/* Lattice stuff that is internal to the code generation algorithm.  */
123169689Skan
124169689Skantypedef struct
125169689Skan{
126169689Skan  /* Lattice base matrix.  */
127169689Skan  lambda_matrix base;
128169689Skan  /* Lattice dimension.  */
129169689Skan  int dimension;
130169689Skan  /* Origin vector for the coefficients.  */
131169689Skan  lambda_vector origin;
132169689Skan  /* Origin matrix for the invariants.  */
133169689Skan  lambda_matrix origin_invariants;
134169689Skan  /* Number of invariants.  */
135169689Skan  int invariants;
136169689Skan} *lambda_lattice;
137169689Skan
138169689Skan#define LATTICE_BASE(T) ((T)->base)
139169689Skan#define LATTICE_DIMENSION(T) ((T)->dimension)
140169689Skan#define LATTICE_ORIGIN(T) ((T)->origin)
141169689Skan#define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
142169689Skan#define LATTICE_INVARIANTS(T) ((T)->invariants)
143169689Skan
144169689Skanstatic bool lle_equal (lambda_linear_expression, lambda_linear_expression,
145169689Skan		       int, int);
146169689Skanstatic lambda_lattice lambda_lattice_new (int, int);
147169689Skanstatic lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
148169689Skan
149169689Skanstatic tree find_induction_var_from_exit_cond (struct loop *);
150169689Skanstatic bool can_convert_to_perfect_nest (struct loop *);
151169689Skan
152169689Skan/* Create a new lambda body vector.  */
153169689Skan
154169689Skanlambda_body_vector
155169689Skanlambda_body_vector_new (int size)
156169689Skan{
157169689Skan  lambda_body_vector ret;
158169689Skan
159169689Skan  ret = ggc_alloc (sizeof (*ret));
160169689Skan  LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
161169689Skan  LBV_SIZE (ret) = size;
162169689Skan  LBV_DENOMINATOR (ret) = 1;
163169689Skan  return ret;
164169689Skan}
165169689Skan
166169689Skan/* Compute the new coefficients for the vector based on the
167169689Skan  *inverse* of the transformation matrix.  */
168169689Skan
169169689Skanlambda_body_vector
170169689Skanlambda_body_vector_compute_new (lambda_trans_matrix transform,
171169689Skan				lambda_body_vector vect)
172169689Skan{
173169689Skan  lambda_body_vector temp;
174169689Skan  int depth;
175169689Skan
176169689Skan  /* Make sure the matrix is square.  */
177169689Skan  gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
178169689Skan
179169689Skan  depth = LTM_ROWSIZE (transform);
180169689Skan
181169689Skan  temp = lambda_body_vector_new (depth);
182169689Skan  LBV_DENOMINATOR (temp) =
183169689Skan    LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
184169689Skan  lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
185169689Skan			     LTM_MATRIX (transform), depth,
186169689Skan			     LBV_COEFFICIENTS (temp));
187169689Skan  LBV_SIZE (temp) = LBV_SIZE (vect);
188169689Skan  return temp;
189169689Skan}
190169689Skan
191169689Skan/* Print out a lambda body vector.  */
192169689Skan
193169689Skanvoid
194169689Skanprint_lambda_body_vector (FILE * outfile, lambda_body_vector body)
195169689Skan{
196169689Skan  print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
197169689Skan}
198169689Skan
199169689Skan/* Return TRUE if two linear expressions are equal.  */
200169689Skan
201169689Skanstatic bool
202169689Skanlle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
203169689Skan	   int depth, int invariants)
204169689Skan{
205169689Skan  int i;
206169689Skan
207169689Skan  if (lle1 == NULL || lle2 == NULL)
208169689Skan    return false;
209169689Skan  if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
210169689Skan    return false;
211169689Skan  if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
212169689Skan    return false;
213169689Skan  for (i = 0; i < depth; i++)
214169689Skan    if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
215169689Skan      return false;
216169689Skan  for (i = 0; i < invariants; i++)
217169689Skan    if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
218169689Skan	LLE_INVARIANT_COEFFICIENTS (lle2)[i])
219169689Skan      return false;
220169689Skan  return true;
221169689Skan}
222169689Skan
223169689Skan/* Create a new linear expression with dimension DIM, and total number
224169689Skan   of invariants INVARIANTS.  */
225169689Skan
226169689Skanlambda_linear_expression
227169689Skanlambda_linear_expression_new (int dim, int invariants)
228169689Skan{
229169689Skan  lambda_linear_expression ret;
230169689Skan
231169689Skan  ret = ggc_alloc_cleared (sizeof (*ret));
232169689Skan
233169689Skan  LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
234169689Skan  LLE_CONSTANT (ret) = 0;
235169689Skan  LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
236169689Skan  LLE_DENOMINATOR (ret) = 1;
237169689Skan  LLE_NEXT (ret) = NULL;
238169689Skan
239169689Skan  return ret;
240169689Skan}
241169689Skan
242169689Skan/* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
243169689Skan   The starting letter used for variable names is START.  */
244169689Skan
245169689Skanstatic void
246169689Skanprint_linear_expression (FILE * outfile, lambda_vector expr, int size,
247169689Skan			 char start)
248169689Skan{
249169689Skan  int i;
250169689Skan  bool first = true;
251169689Skan  for (i = 0; i < size; i++)
252169689Skan    {
253169689Skan      if (expr[i] != 0)
254169689Skan	{
255169689Skan	  if (first)
256169689Skan	    {
257169689Skan	      if (expr[i] < 0)
258169689Skan		fprintf (outfile, "-");
259169689Skan	      first = false;
260169689Skan	    }
261169689Skan	  else if (expr[i] > 0)
262169689Skan	    fprintf (outfile, " + ");
263169689Skan	  else
264169689Skan	    fprintf (outfile, " - ");
265169689Skan	  if (abs (expr[i]) == 1)
266169689Skan	    fprintf (outfile, "%c", start + i);
267169689Skan	  else
268169689Skan	    fprintf (outfile, "%d%c", abs (expr[i]), start + i);
269169689Skan	}
270169689Skan    }
271169689Skan}
272169689Skan
273169689Skan/* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
274169689Skan   depth/number of coefficients is given by DEPTH, the number of invariants is
275169689Skan   given by INVARIANTS, and the character to start variable names with is given
276169689Skan   by START.  */
277169689Skan
278169689Skanvoid
279169689Skanprint_lambda_linear_expression (FILE * outfile,
280169689Skan				lambda_linear_expression expr,
281169689Skan				int depth, int invariants, char start)
282169689Skan{
283169689Skan  fprintf (outfile, "\tLinear expression: ");
284169689Skan  print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
285169689Skan  fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
286169689Skan  fprintf (outfile, "  invariants: ");
287169689Skan  print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
288169689Skan			   invariants, 'A');
289169689Skan  fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
290169689Skan}
291169689Skan
292169689Skan/* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
293169689Skan   coefficients is given by DEPTH, the number of invariants is
294169689Skan   given by INVARIANTS, and the character to start variable names with is given
295169689Skan   by START.  */
296169689Skan
297169689Skanvoid
298169689Skanprint_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
299169689Skan		   int invariants, char start)
300169689Skan{
301169689Skan  int step;
302169689Skan  lambda_linear_expression expr;
303169689Skan
304169689Skan  gcc_assert (loop);
305169689Skan
306169689Skan  expr = LL_LINEAR_OFFSET (loop);
307169689Skan  step = LL_STEP (loop);
308169689Skan  fprintf (outfile, "  step size = %d \n", step);
309169689Skan
310169689Skan  if (expr)
311169689Skan    {
312169689Skan      fprintf (outfile, "  linear offset: \n");
313169689Skan      print_lambda_linear_expression (outfile, expr, depth, invariants,
314169689Skan				      start);
315169689Skan    }
316169689Skan
317169689Skan  fprintf (outfile, "  lower bound: \n");
318169689Skan  for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
319169689Skan    print_lambda_linear_expression (outfile, expr, depth, invariants, start);
320169689Skan  fprintf (outfile, "  upper bound: \n");
321169689Skan  for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
322169689Skan    print_lambda_linear_expression (outfile, expr, depth, invariants, start);
323169689Skan}
324169689Skan
325169689Skan/* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
326169689Skan   number of invariants.  */
327169689Skan
328169689Skanlambda_loopnest
329169689Skanlambda_loopnest_new (int depth, int invariants)
330169689Skan{
331169689Skan  lambda_loopnest ret;
332169689Skan  ret = ggc_alloc (sizeof (*ret));
333169689Skan
334169689Skan  LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
335169689Skan  LN_DEPTH (ret) = depth;
336169689Skan  LN_INVARIANTS (ret) = invariants;
337169689Skan
338169689Skan  return ret;
339169689Skan}
340169689Skan
341169689Skan/* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
342169689Skan   character to use for loop names is given by START.  */
343169689Skan
344169689Skanvoid
345169689Skanprint_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
346169689Skan{
347169689Skan  int i;
348169689Skan  for (i = 0; i < LN_DEPTH (nest); i++)
349169689Skan    {
350169689Skan      fprintf (outfile, "Loop %c\n", start + i);
351169689Skan      print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
352169689Skan			 LN_INVARIANTS (nest), 'i');
353169689Skan      fprintf (outfile, "\n");
354169689Skan    }
355169689Skan}
356169689Skan
357169689Skan/* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
358169689Skan   of invariants.  */
359169689Skan
360169689Skanstatic lambda_lattice
361169689Skanlambda_lattice_new (int depth, int invariants)
362169689Skan{
363169689Skan  lambda_lattice ret;
364169689Skan  ret = ggc_alloc (sizeof (*ret));
365169689Skan  LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
366169689Skan  LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
367169689Skan  LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
368169689Skan  LATTICE_DIMENSION (ret) = depth;
369169689Skan  LATTICE_INVARIANTS (ret) = invariants;
370169689Skan  return ret;
371169689Skan}
372169689Skan
373169689Skan/* Compute the lattice base for NEST.  The lattice base is essentially a
374169689Skan   non-singular transform from a dense base space to a sparse iteration space.
375169689Skan   We use it so that we don't have to specially handle the case of a sparse
376169689Skan   iteration space in other parts of the algorithm.  As a result, this routine
377169689Skan   only does something interesting (IE produce a matrix that isn't the
378169689Skan   identity matrix) if NEST is a sparse space.  */
379169689Skan
380169689Skanstatic lambda_lattice
381169689Skanlambda_lattice_compute_base (lambda_loopnest nest)
382169689Skan{
383169689Skan  lambda_lattice ret;
384169689Skan  int depth, invariants;
385169689Skan  lambda_matrix base;
386169689Skan
387169689Skan  int i, j, step;
388169689Skan  lambda_loop loop;
389169689Skan  lambda_linear_expression expression;
390169689Skan
391169689Skan  depth = LN_DEPTH (nest);
392169689Skan  invariants = LN_INVARIANTS (nest);
393169689Skan
394169689Skan  ret = lambda_lattice_new (depth, invariants);
395169689Skan  base = LATTICE_BASE (ret);
396169689Skan  for (i = 0; i < depth; i++)
397169689Skan    {
398169689Skan      loop = LN_LOOPS (nest)[i];
399169689Skan      gcc_assert (loop);
400169689Skan      step = LL_STEP (loop);
401169689Skan      /* If we have a step of 1, then the base is one, and the
402169689Skan         origin and invariant coefficients are 0.  */
403169689Skan      if (step == 1)
404169689Skan	{
405169689Skan	  for (j = 0; j < depth; j++)
406169689Skan	    base[i][j] = 0;
407169689Skan	  base[i][i] = 1;
408169689Skan	  LATTICE_ORIGIN (ret)[i] = 0;
409169689Skan	  for (j = 0; j < invariants; j++)
410169689Skan	    LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
411169689Skan	}
412169689Skan      else
413169689Skan	{
414169689Skan	  /* Otherwise, we need the lower bound expression (which must
415169689Skan	     be an affine function)  to determine the base.  */
416169689Skan	  expression = LL_LOWER_BOUND (loop);
417169689Skan	  gcc_assert (expression && !LLE_NEXT (expression)
418169689Skan		      && LLE_DENOMINATOR (expression) == 1);
419169689Skan
420169689Skan	  /* The lower triangular portion of the base is going to be the
421169689Skan	     coefficient times the step */
422169689Skan	  for (j = 0; j < i; j++)
423169689Skan	    base[i][j] = LLE_COEFFICIENTS (expression)[j]
424169689Skan	      * LL_STEP (LN_LOOPS (nest)[j]);
425169689Skan	  base[i][i] = step;
426169689Skan	  for (j = i + 1; j < depth; j++)
427169689Skan	    base[i][j] = 0;
428169689Skan
429169689Skan	  /* Origin for this loop is the constant of the lower bound
430169689Skan	     expression.  */
431169689Skan	  LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
432169689Skan
433169689Skan	  /* Coefficient for the invariants are equal to the invariant
434169689Skan	     coefficients in the expression.  */
435169689Skan	  for (j = 0; j < invariants; j++)
436169689Skan	    LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
437169689Skan	      LLE_INVARIANT_COEFFICIENTS (expression)[j];
438169689Skan	}
439169689Skan    }
440169689Skan  return ret;
441169689Skan}
442169689Skan
443169689Skan/* Compute the least common multiple of two numbers A and B .  */
444169689Skan
445169689Skanstatic int
446169689Skanlcm (int a, int b)
447169689Skan{
448169689Skan  return (abs (a) * abs (b) / gcd (a, b));
449169689Skan}
450169689Skan
451169689Skan/* Perform Fourier-Motzkin elimination to calculate the bounds of the
452169689Skan   auxiliary nest.
453169689Skan   Fourier-Motzkin is a way of reducing systems of linear inequalities so that
454169689Skan   it is easy to calculate the answer and bounds.
455169689Skan   A sketch of how it works:
456169689Skan   Given a system of linear inequalities, ai * xj >= bk, you can always
457169689Skan   rewrite the constraints so they are all of the form
458169689Skan   a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
459169689Skan   in b1 ... bk, and some a in a1...ai)
460169689Skan   You can then eliminate this x from the non-constant inequalities by
461169689Skan   rewriting these as a <= b, x >= constant, and delete the x variable.
462169689Skan   You can then repeat this for any remaining x variables, and then we have
463169689Skan   an easy to use variable <= constant (or no variables at all) form that we
464169689Skan   can construct our bounds from.
465169689Skan
466169689Skan   In our case, each time we eliminate, we construct part of the bound from
467169689Skan   the ith variable, then delete the ith variable.
468169689Skan
469169689Skan   Remember the constant are in our vector a, our coefficient matrix is A,
470169689Skan   and our invariant coefficient matrix is B.
471169689Skan
472169689Skan   SIZE is the size of the matrices being passed.
473169689Skan   DEPTH is the loop nest depth.
474169689Skan   INVARIANTS is the number of loop invariants.
475169689Skan   A, B, and a are the coefficient matrix, invariant coefficient, and a
476169689Skan   vector of constants, respectively.  */
477169689Skan
478169689Skanstatic lambda_loopnest
479169689Skancompute_nest_using_fourier_motzkin (int size,
480169689Skan				    int depth,
481169689Skan				    int invariants,
482169689Skan				    lambda_matrix A,
483169689Skan				    lambda_matrix B,
484169689Skan				    lambda_vector a)
485169689Skan{
486169689Skan
487169689Skan  int multiple, f1, f2;
488169689Skan  int i, j, k;
489169689Skan  lambda_linear_expression expression;
490169689Skan  lambda_loop loop;
491169689Skan  lambda_loopnest auxillary_nest;
492169689Skan  lambda_matrix swapmatrix, A1, B1;
493169689Skan  lambda_vector swapvector, a1;
494169689Skan  int newsize;
495169689Skan
496169689Skan  A1 = lambda_matrix_new (128, depth);
497169689Skan  B1 = lambda_matrix_new (128, invariants);
498169689Skan  a1 = lambda_vector_new (128);
499169689Skan
500169689Skan  auxillary_nest = lambda_loopnest_new (depth, invariants);
501169689Skan
502169689Skan  for (i = depth - 1; i >= 0; i--)
503169689Skan    {
504169689Skan      loop = lambda_loop_new ();
505169689Skan      LN_LOOPS (auxillary_nest)[i] = loop;
506169689Skan      LL_STEP (loop) = 1;
507169689Skan
508169689Skan      for (j = 0; j < size; j++)
509169689Skan	{
510169689Skan	  if (A[j][i] < 0)
511169689Skan	    {
512169689Skan	      /* Any linear expression in the matrix with a coefficient less
513169689Skan		 than 0 becomes part of the new lower bound.  */
514169689Skan	      expression = lambda_linear_expression_new (depth, invariants);
515169689Skan
516169689Skan	      for (k = 0; k < i; k++)
517169689Skan		LLE_COEFFICIENTS (expression)[k] = A[j][k];
518169689Skan
519169689Skan	      for (k = 0; k < invariants; k++)
520169689Skan		LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
521169689Skan
522169689Skan	      LLE_DENOMINATOR (expression) = -1 * A[j][i];
523169689Skan	      LLE_CONSTANT (expression) = -1 * a[j];
524169689Skan
525169689Skan	      /* Ignore if identical to the existing lower bound.  */
526169689Skan	      if (!lle_equal (LL_LOWER_BOUND (loop),
527169689Skan			      expression, depth, invariants))
528169689Skan		{
529169689Skan		  LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
530169689Skan		  LL_LOWER_BOUND (loop) = expression;
531169689Skan		}
532169689Skan
533169689Skan	    }
534169689Skan	  else if (A[j][i] > 0)
535169689Skan	    {
536169689Skan	      /* Any linear expression with a coefficient greater than 0
537169689Skan		 becomes part of the new upper bound.  */
538169689Skan	      expression = lambda_linear_expression_new (depth, invariants);
539169689Skan	      for (k = 0; k < i; k++)
540169689Skan		LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
541169689Skan
542169689Skan	      for (k = 0; k < invariants; k++)
543169689Skan		LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
544169689Skan
545169689Skan	      LLE_DENOMINATOR (expression) = A[j][i];
546169689Skan	      LLE_CONSTANT (expression) = a[j];
547169689Skan
548169689Skan	      /* Ignore if identical to the existing upper bound.  */
549169689Skan	      if (!lle_equal (LL_UPPER_BOUND (loop),
550169689Skan			      expression, depth, invariants))
551169689Skan		{
552169689Skan		  LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
553169689Skan		  LL_UPPER_BOUND (loop) = expression;
554169689Skan		}
555169689Skan
556169689Skan	    }
557169689Skan	}
558169689Skan
559169689Skan      /* This portion creates a new system of linear inequalities by deleting
560169689Skan	 the i'th variable, reducing the system by one variable.  */
561169689Skan      newsize = 0;
562169689Skan      for (j = 0; j < size; j++)
563169689Skan	{
564169689Skan	  /* If the coefficient for the i'th variable is 0, then we can just
565169689Skan	     eliminate the variable straightaway.  Otherwise, we have to
566169689Skan	     multiply through by the coefficients we are eliminating.  */
567169689Skan	  if (A[j][i] == 0)
568169689Skan	    {
569169689Skan	      lambda_vector_copy (A[j], A1[newsize], depth);
570169689Skan	      lambda_vector_copy (B[j], B1[newsize], invariants);
571169689Skan	      a1[newsize] = a[j];
572169689Skan	      newsize++;
573169689Skan	    }
574169689Skan	  else if (A[j][i] > 0)
575169689Skan	    {
576169689Skan	      for (k = 0; k < size; k++)
577169689Skan		{
578169689Skan		  if (A[k][i] < 0)
579169689Skan		    {
580169689Skan		      multiple = lcm (A[j][i], A[k][i]);
581169689Skan		      f1 = multiple / A[j][i];
582169689Skan		      f2 = -1 * multiple / A[k][i];
583169689Skan
584169689Skan		      lambda_vector_add_mc (A[j], f1, A[k], f2,
585169689Skan					    A1[newsize], depth);
586169689Skan		      lambda_vector_add_mc (B[j], f1, B[k], f2,
587169689Skan					    B1[newsize], invariants);
588169689Skan		      a1[newsize] = f1 * a[j] + f2 * a[k];
589169689Skan		      newsize++;
590169689Skan		    }
591169689Skan		}
592169689Skan	    }
593169689Skan	}
594169689Skan
595169689Skan      swapmatrix = A;
596169689Skan      A = A1;
597169689Skan      A1 = swapmatrix;
598169689Skan
599169689Skan      swapmatrix = B;
600169689Skan      B = B1;
601169689Skan      B1 = swapmatrix;
602169689Skan
603169689Skan      swapvector = a;
604169689Skan      a = a1;
605169689Skan      a1 = swapvector;
606169689Skan
607169689Skan      size = newsize;
608169689Skan    }
609169689Skan
610169689Skan  return auxillary_nest;
611169689Skan}
612169689Skan
613169689Skan/* Compute the loop bounds for the auxiliary space NEST.
614169689Skan   Input system used is Ax <= b.  TRANS is the unimodular transformation.
615169689Skan   Given the original nest, this function will
616169689Skan   1. Convert the nest into matrix form, which consists of a matrix for the
617169689Skan   coefficients, a matrix for the
618169689Skan   invariant coefficients, and a vector for the constants.
619169689Skan   2. Use the matrix form to calculate the lattice base for the nest (which is
620169689Skan   a dense space)
621169689Skan   3. Compose the dense space transform with the user specified transform, to
622169689Skan   get a transform we can easily calculate transformed bounds for.
623169689Skan   4. Multiply the composed transformation matrix times the matrix form of the
624169689Skan   loop.
625169689Skan   5. Transform the newly created matrix (from step 4) back into a loop nest
626169689Skan   using Fourier-Motzkin elimination to figure out the bounds.  */
627169689Skan
628169689Skanstatic lambda_loopnest
629169689Skanlambda_compute_auxillary_space (lambda_loopnest nest,
630169689Skan				lambda_trans_matrix trans)
631169689Skan{
632169689Skan  lambda_matrix A, B, A1, B1;
633169689Skan  lambda_vector a, a1;
634169689Skan  lambda_matrix invertedtrans;
635169689Skan  int depth, invariants, size;
636169689Skan  int i, j;
637169689Skan  lambda_loop loop;
638169689Skan  lambda_linear_expression expression;
639169689Skan  lambda_lattice lattice;
640169689Skan
641169689Skan  depth = LN_DEPTH (nest);
642169689Skan  invariants = LN_INVARIANTS (nest);
643169689Skan
644169689Skan  /* Unfortunately, we can't know the number of constraints we'll have
645169689Skan     ahead of time, but this should be enough even in ridiculous loop nest
646169689Skan     cases. We must not go over this limit.  */
647169689Skan  A = lambda_matrix_new (128, depth);
648169689Skan  B = lambda_matrix_new (128, invariants);
649169689Skan  a = lambda_vector_new (128);
650169689Skan
651169689Skan  A1 = lambda_matrix_new (128, depth);
652169689Skan  B1 = lambda_matrix_new (128, invariants);
653169689Skan  a1 = lambda_vector_new (128);
654169689Skan
655169689Skan  /* Store the bounds in the equation matrix A, constant vector a, and
656169689Skan     invariant matrix B, so that we have Ax <= a + B.
657169689Skan     This requires a little equation rearranging so that everything is on the
658169689Skan     correct side of the inequality.  */
659169689Skan  size = 0;
660169689Skan  for (i = 0; i < depth; i++)
661169689Skan    {
662169689Skan      loop = LN_LOOPS (nest)[i];
663169689Skan
664169689Skan      /* First we do the lower bound.  */
665169689Skan      if (LL_STEP (loop) > 0)
666169689Skan	expression = LL_LOWER_BOUND (loop);
667169689Skan      else
668169689Skan	expression = LL_UPPER_BOUND (loop);
669169689Skan
670169689Skan      for (; expression != NULL; expression = LLE_NEXT (expression))
671169689Skan	{
672169689Skan	  /* Fill in the coefficient.  */
673169689Skan	  for (j = 0; j < i; j++)
674169689Skan	    A[size][j] = LLE_COEFFICIENTS (expression)[j];
675169689Skan
676169689Skan	  /* And the invariant coefficient.  */
677169689Skan	  for (j = 0; j < invariants; j++)
678169689Skan	    B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
679169689Skan
680169689Skan	  /* And the constant.  */
681169689Skan	  a[size] = LLE_CONSTANT (expression);
682169689Skan
683169689Skan	  /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
684169689Skan	     constants and single variables on   */
685169689Skan	  A[size][i] = -1 * LLE_DENOMINATOR (expression);
686169689Skan	  a[size] *= -1;
687169689Skan	  for (j = 0; j < invariants; j++)
688169689Skan	    B[size][j] *= -1;
689169689Skan
690169689Skan	  size++;
691169689Skan	  /* Need to increase matrix sizes above.  */
692169689Skan	  gcc_assert (size <= 127);
693169689Skan
694169689Skan	}
695169689Skan
696169689Skan      /* Then do the exact same thing for the upper bounds.  */
697169689Skan      if (LL_STEP (loop) > 0)
698169689Skan	expression = LL_UPPER_BOUND (loop);
699169689Skan      else
700169689Skan	expression = LL_LOWER_BOUND (loop);
701169689Skan
702169689Skan      for (; expression != NULL; expression = LLE_NEXT (expression))
703169689Skan	{
704169689Skan	  /* Fill in the coefficient.  */
705169689Skan	  for (j = 0; j < i; j++)
706169689Skan	    A[size][j] = LLE_COEFFICIENTS (expression)[j];
707169689Skan
708169689Skan	  /* And the invariant coefficient.  */
709169689Skan	  for (j = 0; j < invariants; j++)
710169689Skan	    B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
711169689Skan
712169689Skan	  /* And the constant.  */
713169689Skan	  a[size] = LLE_CONSTANT (expression);
714169689Skan
715169689Skan	  /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
716169689Skan	  for (j = 0; j < i; j++)
717169689Skan	    A[size][j] *= -1;
718169689Skan	  A[size][i] = LLE_DENOMINATOR (expression);
719169689Skan	  size++;
720169689Skan	  /* Need to increase matrix sizes above.  */
721169689Skan	  gcc_assert (size <= 127);
722169689Skan
723169689Skan	}
724169689Skan    }
725169689Skan
726169689Skan  /* Compute the lattice base x = base * y + origin, where y is the
727169689Skan     base space.  */
728169689Skan  lattice = lambda_lattice_compute_base (nest);
729169689Skan
730169689Skan  /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
731169689Skan
732169689Skan  /* A1 = A * L */
733169689Skan  lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
734169689Skan
735169689Skan  /* a1 = a - A * origin constant.  */
736169689Skan  lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
737169689Skan  lambda_vector_add_mc (a, 1, a1, -1, a1, size);
738169689Skan
739169689Skan  /* B1 = B - A * origin invariant.  */
740169689Skan  lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
741169689Skan		      invariants);
742169689Skan  lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
743169689Skan
744169689Skan  /* Now compute the auxiliary space bounds by first inverting U, multiplying
745169689Skan     it by A1, then performing Fourier-Motzkin.  */
746169689Skan
747169689Skan  invertedtrans = lambda_matrix_new (depth, depth);
748169689Skan
749169689Skan  /* Compute the inverse of U.  */
750169689Skan  lambda_matrix_inverse (LTM_MATRIX (trans),
751169689Skan			 invertedtrans, depth);
752169689Skan
753169689Skan  /* A = A1 inv(U).  */
754169689Skan  lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
755169689Skan
756169689Skan  return compute_nest_using_fourier_motzkin (size, depth, invariants,
757169689Skan					     A, B1, a1);
758169689Skan}
759169689Skan
760169689Skan/* Compute the loop bounds for the target space, using the bounds of
761169689Skan   the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
762169689Skan   The target space loop bounds are computed by multiplying the triangular
763169689Skan   matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
764169689Skan   the loop steps (positive or negative) is then used to swap the bounds if
765169689Skan   the loop counts downwards.
766169689Skan   Return the target loopnest.  */
767169689Skan
768169689Skanstatic lambda_loopnest
769169689Skanlambda_compute_target_space (lambda_loopnest auxillary_nest,
770169689Skan			     lambda_trans_matrix H, lambda_vector stepsigns)
771169689Skan{
772169689Skan  lambda_matrix inverse, H1;
773169689Skan  int determinant, i, j;
774169689Skan  int gcd1, gcd2;
775169689Skan  int factor;
776169689Skan
777169689Skan  lambda_loopnest target_nest;
778169689Skan  int depth, invariants;
779169689Skan  lambda_matrix target;
780169689Skan
781169689Skan  lambda_loop auxillary_loop, target_loop;
782169689Skan  lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
783169689Skan
784169689Skan  depth = LN_DEPTH (auxillary_nest);
785169689Skan  invariants = LN_INVARIANTS (auxillary_nest);
786169689Skan
787169689Skan  inverse = lambda_matrix_new (depth, depth);
788169689Skan  determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
789169689Skan
790169689Skan  /* H1 is H excluding its diagonal.  */
791169689Skan  H1 = lambda_matrix_new (depth, depth);
792169689Skan  lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
793169689Skan
794169689Skan  for (i = 0; i < depth; i++)
795169689Skan    H1[i][i] = 0;
796169689Skan
797169689Skan  /* Computes the linear offsets of the loop bounds.  */
798169689Skan  target = lambda_matrix_new (depth, depth);
799169689Skan  lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
800169689Skan
801169689Skan  target_nest = lambda_loopnest_new (depth, invariants);
802169689Skan
803169689Skan  for (i = 0; i < depth; i++)
804169689Skan    {
805169689Skan
806169689Skan      /* Get a new loop structure.  */
807169689Skan      target_loop = lambda_loop_new ();
808169689Skan      LN_LOOPS (target_nest)[i] = target_loop;
809169689Skan
810169689Skan      /* Computes the gcd of the coefficients of the linear part.  */
811169689Skan      gcd1 = lambda_vector_gcd (target[i], i);
812169689Skan
813169689Skan      /* Include the denominator in the GCD.  */
814169689Skan      gcd1 = gcd (gcd1, determinant);
815169689Skan
816169689Skan      /* Now divide through by the gcd.  */
817169689Skan      for (j = 0; j < i; j++)
818169689Skan	target[i][j] = target[i][j] / gcd1;
819169689Skan
820169689Skan      expression = lambda_linear_expression_new (depth, invariants);
821169689Skan      lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
822169689Skan      LLE_DENOMINATOR (expression) = determinant / gcd1;
823169689Skan      LLE_CONSTANT (expression) = 0;
824169689Skan      lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
825169689Skan			   invariants);
826169689Skan      LL_LINEAR_OFFSET (target_loop) = expression;
827169689Skan    }
828169689Skan
829169689Skan  /* For each loop, compute the new bounds from H.  */
830169689Skan  for (i = 0; i < depth; i++)
831169689Skan    {
832169689Skan      auxillary_loop = LN_LOOPS (auxillary_nest)[i];
833169689Skan      target_loop = LN_LOOPS (target_nest)[i];
834169689Skan      LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
835169689Skan      factor = LTM_MATRIX (H)[i][i];
836169689Skan
837169689Skan      /* First we do the lower bound.  */
838169689Skan      auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
839169689Skan
840169689Skan      for (; auxillary_expr != NULL;
841169689Skan	   auxillary_expr = LLE_NEXT (auxillary_expr))
842169689Skan	{
843169689Skan	  target_expr = lambda_linear_expression_new (depth, invariants);
844169689Skan	  lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
845169689Skan				     depth, inverse, depth,
846169689Skan				     LLE_COEFFICIENTS (target_expr));
847169689Skan	  lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
848169689Skan				    LLE_COEFFICIENTS (target_expr), depth,
849169689Skan				    factor);
850169689Skan
851169689Skan	  LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
852169689Skan	  lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
853169689Skan			      LLE_INVARIANT_COEFFICIENTS (target_expr),
854169689Skan			      invariants);
855169689Skan	  lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
856169689Skan				    LLE_INVARIANT_COEFFICIENTS (target_expr),
857169689Skan				    invariants, factor);
858169689Skan	  LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
859169689Skan
860169689Skan	  if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
861169689Skan	    {
862169689Skan	      LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
863169689Skan		* determinant;
864169689Skan	      lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
865169689Skan					(target_expr),
866169689Skan					LLE_INVARIANT_COEFFICIENTS
867169689Skan					(target_expr), invariants,
868169689Skan					determinant);
869169689Skan	      LLE_DENOMINATOR (target_expr) =
870169689Skan		LLE_DENOMINATOR (target_expr) * determinant;
871169689Skan	    }
872169689Skan	  /* Find the gcd and divide by it here, rather than doing it
873169689Skan	     at the tree level.  */
874169689Skan	  gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
875169689Skan	  gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
876169689Skan				    invariants);
877169689Skan	  gcd1 = gcd (gcd1, gcd2);
878169689Skan	  gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
879169689Skan	  gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
880169689Skan	  for (j = 0; j < depth; j++)
881169689Skan	    LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
882169689Skan	  for (j = 0; j < invariants; j++)
883169689Skan	    LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
884169689Skan	  LLE_CONSTANT (target_expr) /= gcd1;
885169689Skan	  LLE_DENOMINATOR (target_expr) /= gcd1;
886169689Skan	  /* Ignore if identical to existing bound.  */
887169689Skan	  if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
888169689Skan			  invariants))
889169689Skan	    {
890169689Skan	      LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
891169689Skan	      LL_LOWER_BOUND (target_loop) = target_expr;
892169689Skan	    }
893169689Skan	}
894169689Skan      /* Now do the upper bound.  */
895169689Skan      auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
896169689Skan
897169689Skan      for (; auxillary_expr != NULL;
898169689Skan	   auxillary_expr = LLE_NEXT (auxillary_expr))
899169689Skan	{
900169689Skan	  target_expr = lambda_linear_expression_new (depth, invariants);
901169689Skan	  lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
902169689Skan				     depth, inverse, depth,
903169689Skan				     LLE_COEFFICIENTS (target_expr));
904169689Skan	  lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
905169689Skan				    LLE_COEFFICIENTS (target_expr), depth,
906169689Skan				    factor);
907169689Skan	  LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
908169689Skan	  lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
909169689Skan			      LLE_INVARIANT_COEFFICIENTS (target_expr),
910169689Skan			      invariants);
911169689Skan	  lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
912169689Skan				    LLE_INVARIANT_COEFFICIENTS (target_expr),
913169689Skan				    invariants, factor);
914169689Skan	  LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
915169689Skan
916169689Skan	  if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
917169689Skan	    {
918169689Skan	      LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
919169689Skan		* determinant;
920169689Skan	      lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
921169689Skan					(target_expr),
922169689Skan					LLE_INVARIANT_COEFFICIENTS
923169689Skan					(target_expr), invariants,
924169689Skan					determinant);
925169689Skan	      LLE_DENOMINATOR (target_expr) =
926169689Skan		LLE_DENOMINATOR (target_expr) * determinant;
927169689Skan	    }
928169689Skan	  /* Find the gcd and divide by it here, instead of at the
929169689Skan	     tree level.  */
930169689Skan	  gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
931169689Skan	  gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
932169689Skan				    invariants);
933169689Skan	  gcd1 = gcd (gcd1, gcd2);
934169689Skan	  gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
935169689Skan	  gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
936169689Skan	  for (j = 0; j < depth; j++)
937169689Skan	    LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
938169689Skan	  for (j = 0; j < invariants; j++)
939169689Skan	    LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
940169689Skan	  LLE_CONSTANT (target_expr) /= gcd1;
941169689Skan	  LLE_DENOMINATOR (target_expr) /= gcd1;
942169689Skan	  /* Ignore if equal to existing bound.  */
943169689Skan	  if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
944169689Skan			  invariants))
945169689Skan	    {
946169689Skan	      LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
947169689Skan	      LL_UPPER_BOUND (target_loop) = target_expr;
948169689Skan	    }
949169689Skan	}
950169689Skan    }
951169689Skan  for (i = 0; i < depth; i++)
952169689Skan    {
953169689Skan      target_loop = LN_LOOPS (target_nest)[i];
954169689Skan      /* If necessary, exchange the upper and lower bounds and negate
955169689Skan         the step size.  */
956169689Skan      if (stepsigns[i] < 0)
957169689Skan	{
958169689Skan	  LL_STEP (target_loop) *= -1;
959169689Skan	  tmp_expr = LL_LOWER_BOUND (target_loop);
960169689Skan	  LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
961169689Skan	  LL_UPPER_BOUND (target_loop) = tmp_expr;
962169689Skan	}
963169689Skan    }
964169689Skan  return target_nest;
965169689Skan}
966169689Skan
967169689Skan/* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
968169689Skan   result.  */
969169689Skan
970169689Skanstatic lambda_vector
971169689Skanlambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
972169689Skan{
973169689Skan  lambda_matrix matrix, H;
974169689Skan  int size;
975169689Skan  lambda_vector newsteps;
976169689Skan  int i, j, factor, minimum_column;
977169689Skan  int temp;
978169689Skan
979169689Skan  matrix = LTM_MATRIX (trans);
980169689Skan  size = LTM_ROWSIZE (trans);
981169689Skan  H = lambda_matrix_new (size, size);
982169689Skan
983169689Skan  newsteps = lambda_vector_new (size);
984169689Skan  lambda_vector_copy (stepsigns, newsteps, size);
985169689Skan
986169689Skan  lambda_matrix_copy (matrix, H, size, size);
987169689Skan
988169689Skan  for (j = 0; j < size; j++)
989169689Skan    {
990169689Skan      lambda_vector row;
991169689Skan      row = H[j];
992169689Skan      for (i = j; i < size; i++)
993169689Skan	if (row[i] < 0)
994169689Skan	  lambda_matrix_col_negate (H, size, i);
995169689Skan      while (lambda_vector_first_nz (row, size, j + 1) < size)
996169689Skan	{
997169689Skan	  minimum_column = lambda_vector_min_nz (row, size, j);
998169689Skan	  lambda_matrix_col_exchange (H, size, j, minimum_column);
999169689Skan
1000169689Skan	  temp = newsteps[j];
1001169689Skan	  newsteps[j] = newsteps[minimum_column];
1002169689Skan	  newsteps[minimum_column] = temp;
1003169689Skan
1004169689Skan	  for (i = j + 1; i < size; i++)
1005169689Skan	    {
1006169689Skan	      factor = row[i] / row[j];
1007169689Skan	      lambda_matrix_col_add (H, size, j, i, -1 * factor);
1008169689Skan	    }
1009169689Skan	}
1010169689Skan    }
1011169689Skan  return newsteps;
1012169689Skan}
1013169689Skan
1014169689Skan/* Transform NEST according to TRANS, and return the new loopnest.
1015169689Skan   This involves
1016169689Skan   1. Computing a lattice base for the transformation
1017169689Skan   2. Composing the dense base with the specified transformation (TRANS)
1018169689Skan   3. Decomposing the combined transformation into a lower triangular portion,
1019169689Skan   and a unimodular portion.
1020169689Skan   4. Computing the auxiliary nest using the unimodular portion.
1021169689Skan   5. Computing the target nest using the auxiliary nest and the lower
1022169689Skan   triangular portion.  */
1023169689Skan
1024169689Skanlambda_loopnest
1025169689Skanlambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1026169689Skan{
1027169689Skan  lambda_loopnest auxillary_nest, target_nest;
1028169689Skan
1029169689Skan  int depth, invariants;
1030169689Skan  int i, j;
1031169689Skan  lambda_lattice lattice;
1032169689Skan  lambda_trans_matrix trans1, H, U;
1033169689Skan  lambda_loop loop;
1034169689Skan  lambda_linear_expression expression;
1035169689Skan  lambda_vector origin;
1036169689Skan  lambda_matrix origin_invariants;
1037169689Skan  lambda_vector stepsigns;
1038169689Skan  int f;
1039169689Skan
1040169689Skan  depth = LN_DEPTH (nest);
1041169689Skan  invariants = LN_INVARIANTS (nest);
1042169689Skan
1043169689Skan  /* Keep track of the signs of the loop steps.  */
1044169689Skan  stepsigns = lambda_vector_new (depth);
1045169689Skan  for (i = 0; i < depth; i++)
1046169689Skan    {
1047169689Skan      if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1048169689Skan	stepsigns[i] = 1;
1049169689Skan      else
1050169689Skan	stepsigns[i] = -1;
1051169689Skan    }
1052169689Skan
1053169689Skan  /* Compute the lattice base.  */
1054169689Skan  lattice = lambda_lattice_compute_base (nest);
1055169689Skan  trans1 = lambda_trans_matrix_new (depth, depth);
1056169689Skan
1057169689Skan  /* Multiply the transformation matrix by the lattice base.  */
1058169689Skan
1059169689Skan  lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1060169689Skan		      LTM_MATRIX (trans1), depth, depth, depth);
1061169689Skan
1062169689Skan  /* Compute the Hermite normal form for the new transformation matrix.  */
1063169689Skan  H = lambda_trans_matrix_new (depth, depth);
1064169689Skan  U = lambda_trans_matrix_new (depth, depth);
1065169689Skan  lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1066169689Skan			 LTM_MATRIX (U));
1067169689Skan
1068169689Skan  /* Compute the auxiliary loop nest's space from the unimodular
1069169689Skan     portion.  */
1070169689Skan  auxillary_nest = lambda_compute_auxillary_space (nest, U);
1071169689Skan
1072169689Skan  /* Compute the loop step signs from the old step signs and the
1073169689Skan     transformation matrix.  */
1074169689Skan  stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1075169689Skan
1076169689Skan  /* Compute the target loop nest space from the auxiliary nest and
1077169689Skan     the lower triangular matrix H.  */
1078169689Skan  target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1079169689Skan  origin = lambda_vector_new (depth);
1080169689Skan  origin_invariants = lambda_matrix_new (depth, invariants);
1081169689Skan  lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1082169689Skan			     LATTICE_ORIGIN (lattice), origin);
1083169689Skan  lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1084169689Skan		      origin_invariants, depth, depth, invariants);
1085169689Skan
1086169689Skan  for (i = 0; i < depth; i++)
1087169689Skan    {
1088169689Skan      loop = LN_LOOPS (target_nest)[i];
1089169689Skan      expression = LL_LINEAR_OFFSET (loop);
1090169689Skan      if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1091169689Skan	f = 1;
1092169689Skan      else
1093169689Skan	f = LLE_DENOMINATOR (expression);
1094169689Skan
1095169689Skan      LLE_CONSTANT (expression) += f * origin[i];
1096169689Skan
1097169689Skan      for (j = 0; j < invariants; j++)
1098169689Skan	LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1099169689Skan	  f * origin_invariants[i][j];
1100169689Skan    }
1101169689Skan
1102169689Skan  return target_nest;
1103169689Skan
1104169689Skan}
1105169689Skan
1106169689Skan/* Convert a gcc tree expression EXPR to a lambda linear expression, and
1107169689Skan   return the new expression.  DEPTH is the depth of the loopnest.
1108169689Skan   OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1109169689Skan   in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1110169689Skan   is the amount we have to add/subtract from the expression because of the
1111169689Skan   type of comparison it is used in.  */
1112169689Skan
1113169689Skanstatic lambda_linear_expression
1114169689Skangcc_tree_to_linear_expression (int depth, tree expr,
1115169689Skan			       VEC(tree,heap) *outerinductionvars,
1116169689Skan			       VEC(tree,heap) *invariants, int extra)
1117169689Skan{
1118169689Skan  lambda_linear_expression lle = NULL;
1119169689Skan  switch (TREE_CODE (expr))
1120169689Skan    {
1121169689Skan    case INTEGER_CST:
1122169689Skan      {
1123169689Skan	lle = lambda_linear_expression_new (depth, 2 * depth);
1124169689Skan	LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1125169689Skan	if (extra != 0)
1126169689Skan	  LLE_CONSTANT (lle) += extra;
1127169689Skan
1128169689Skan	LLE_DENOMINATOR (lle) = 1;
1129169689Skan      }
1130169689Skan      break;
1131169689Skan    case SSA_NAME:
1132169689Skan      {
1133169689Skan	tree iv, invar;
1134169689Skan	size_t i;
1135169689Skan	for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1136169689Skan	  if (iv != NULL)
1137169689Skan	    {
1138169689Skan	      if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1139169689Skan		{
1140169689Skan		  lle = lambda_linear_expression_new (depth, 2 * depth);
1141169689Skan		  LLE_COEFFICIENTS (lle)[i] = 1;
1142169689Skan		  if (extra != 0)
1143169689Skan		    LLE_CONSTANT (lle) = extra;
1144169689Skan
1145169689Skan		  LLE_DENOMINATOR (lle) = 1;
1146169689Skan		}
1147169689Skan	    }
1148169689Skan	for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1149169689Skan	  if (invar != NULL)
1150169689Skan	    {
1151169689Skan	      if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1152169689Skan		{
1153169689Skan		  lle = lambda_linear_expression_new (depth, 2 * depth);
1154169689Skan		  LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1155169689Skan		  if (extra != 0)
1156169689Skan		    LLE_CONSTANT (lle) = extra;
1157169689Skan		  LLE_DENOMINATOR (lle) = 1;
1158169689Skan		}
1159169689Skan	    }
1160169689Skan      }
1161169689Skan      break;
1162169689Skan    default:
1163169689Skan      return NULL;
1164169689Skan    }
1165169689Skan
1166169689Skan  return lle;
1167169689Skan}
1168169689Skan
1169169689Skan/* Return the depth of the loopnest NEST */
1170169689Skan
1171169689Skanstatic int
1172169689Skandepth_of_nest (struct loop *nest)
1173169689Skan{
1174169689Skan  size_t depth = 0;
1175169689Skan  while (nest)
1176169689Skan    {
1177169689Skan      depth++;
1178169689Skan      nest = nest->inner;
1179169689Skan    }
1180169689Skan  return depth;
1181169689Skan}
1182169689Skan
1183169689Skan
1184169689Skan/* Return true if OP is invariant in LOOP and all outer loops.  */
1185169689Skan
1186169689Skanstatic bool
1187169689Skaninvariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1188169689Skan{
1189169689Skan  if (is_gimple_min_invariant (op))
1190169689Skan    return true;
1191169689Skan  if (loop->depth == 0)
1192169689Skan    return true;
1193169689Skan  if (!expr_invariant_in_loop_p (loop, op))
1194169689Skan    return false;
1195169689Skan  if (loop->outer
1196169689Skan      && !invariant_in_loop_and_outer_loops (loop->outer, op))
1197169689Skan    return false;
1198169689Skan  return true;
1199169689Skan}
1200169689Skan
1201169689Skan/* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1202169689Skan   or NULL if it could not be converted.
1203169689Skan   DEPTH is the depth of the loop.
1204169689Skan   INVARIANTS is a pointer to the array of loop invariants.
1205169689Skan   The induction variable for this loop should be stored in the parameter
1206169689Skan   OURINDUCTIONVAR.
1207169689Skan   OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1208169689Skan
1209169689Skanstatic lambda_loop
1210169689Skangcc_loop_to_lambda_loop (struct loop *loop, int depth,
1211169689Skan			 VEC(tree,heap) ** invariants,
1212169689Skan			 tree * ourinductionvar,
1213169689Skan			 VEC(tree,heap) * outerinductionvars,
1214169689Skan			 VEC(tree,heap) ** lboundvars,
1215169689Skan			 VEC(tree,heap) ** uboundvars,
1216169689Skan			 VEC(int,heap) ** steps)
1217169689Skan{
1218169689Skan  tree phi;
1219169689Skan  tree exit_cond;
1220169689Skan  tree access_fn, inductionvar;
1221169689Skan  tree step;
1222169689Skan  lambda_loop lloop = NULL;
1223169689Skan  lambda_linear_expression lbound, ubound;
1224169689Skan  tree test;
1225169689Skan  int stepint;
1226169689Skan  int extra = 0;
1227169689Skan  tree lboundvar, uboundvar, uboundresult;
1228169689Skan
1229169689Skan  /* Find out induction var and exit condition.  */
1230169689Skan  inductionvar = find_induction_var_from_exit_cond (loop);
1231169689Skan  exit_cond = get_loop_exit_condition (loop);
1232169689Skan
1233169689Skan  if (inductionvar == NULL || exit_cond == NULL)
1234169689Skan    {
1235169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
1236169689Skan	fprintf (dump_file,
1237169689Skan		 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1238169689Skan      return NULL;
1239169689Skan    }
1240169689Skan
1241169689Skan  test = TREE_OPERAND (exit_cond, 0);
1242169689Skan
1243169689Skan  if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1244169689Skan    {
1245169689Skan
1246169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
1247169689Skan	fprintf (dump_file,
1248169689Skan		 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1249169689Skan
1250169689Skan      return NULL;
1251169689Skan    }
1252169689Skan
1253169689Skan  phi = SSA_NAME_DEF_STMT (inductionvar);
1254169689Skan  if (TREE_CODE (phi) != PHI_NODE)
1255169689Skan    {
1256169689Skan      phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1257169689Skan      if (!phi)
1258169689Skan	{
1259169689Skan
1260169689Skan	  if (dump_file && (dump_flags & TDF_DETAILS))
1261169689Skan	    fprintf (dump_file,
1262169689Skan		     "Unable to convert loop: Cannot find PHI node for induction variable\n");
1263169689Skan
1264169689Skan	  return NULL;
1265169689Skan	}
1266169689Skan
1267169689Skan      phi = SSA_NAME_DEF_STMT (phi);
1268169689Skan      if (TREE_CODE (phi) != PHI_NODE)
1269169689Skan	{
1270169689Skan
1271169689Skan	  if (dump_file && (dump_flags & TDF_DETAILS))
1272169689Skan	    fprintf (dump_file,
1273169689Skan		     "Unable to convert loop: Cannot find PHI node for induction variable\n");
1274169689Skan	  return NULL;
1275169689Skan	}
1276169689Skan
1277169689Skan    }
1278169689Skan
1279169689Skan  /* The induction variable name/version we want to put in the array is the
1280169689Skan     result of the induction variable phi node.  */
1281169689Skan  *ourinductionvar = PHI_RESULT (phi);
1282169689Skan  access_fn = instantiate_parameters
1283169689Skan    (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1284169689Skan  if (access_fn == chrec_dont_know)
1285169689Skan    {
1286169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
1287169689Skan	fprintf (dump_file,
1288169689Skan		 "Unable to convert loop: Access function for induction variable phi is unknown\n");
1289169689Skan
1290169689Skan      return NULL;
1291169689Skan    }
1292169689Skan
1293169689Skan  step = evolution_part_in_loop_num (access_fn, loop->num);
1294169689Skan  if (!step || step == chrec_dont_know)
1295169689Skan    {
1296169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
1297169689Skan	fprintf (dump_file,
1298169689Skan		 "Unable to convert loop: Cannot determine step of loop.\n");
1299169689Skan
1300169689Skan      return NULL;
1301169689Skan    }
1302169689Skan  if (TREE_CODE (step) != INTEGER_CST)
1303169689Skan    {
1304169689Skan
1305169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
1306169689Skan	fprintf (dump_file,
1307169689Skan		 "Unable to convert loop: Step of loop is not integer.\n");
1308169689Skan      return NULL;
1309169689Skan    }
1310169689Skan
1311169689Skan  stepint = TREE_INT_CST_LOW (step);
1312169689Skan
1313169689Skan  /* Only want phis for induction vars, which will have two
1314169689Skan     arguments.  */
1315169689Skan  if (PHI_NUM_ARGS (phi) != 2)
1316169689Skan    {
1317169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
1318169689Skan	fprintf (dump_file,
1319169689Skan		 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1320169689Skan      return NULL;
1321169689Skan    }
1322169689Skan
1323169689Skan  /* Another induction variable check. One argument's source should be
1324169689Skan     in the loop, one outside the loop.  */
1325169689Skan  if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1326169689Skan      && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1327169689Skan    {
1328169689Skan
1329169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
1330169689Skan	fprintf (dump_file,
1331169689Skan		 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1332169689Skan
1333169689Skan      return NULL;
1334169689Skan    }
1335169689Skan
1336169689Skan  if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1337169689Skan    {
1338169689Skan      lboundvar = PHI_ARG_DEF (phi, 1);
1339169689Skan      lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1340169689Skan					      outerinductionvars, *invariants,
1341169689Skan					      0);
1342169689Skan    }
1343169689Skan  else
1344169689Skan    {
1345169689Skan      lboundvar = PHI_ARG_DEF (phi, 0);
1346169689Skan      lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1347169689Skan					      outerinductionvars, *invariants,
1348169689Skan					      0);
1349169689Skan    }
1350169689Skan
1351169689Skan  if (!lbound)
1352169689Skan    {
1353169689Skan
1354169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
1355169689Skan	fprintf (dump_file,
1356169689Skan		 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1357169689Skan
1358169689Skan      return NULL;
1359169689Skan    }
1360169689Skan  /* One part of the test may be a loop invariant tree.  */
1361169689Skan  VEC_reserve (tree, heap, *invariants, 1);
1362169689Skan  if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1363169689Skan      && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1364169689Skan    VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1365169689Skan  else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1366169689Skan	   && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1367169689Skan    VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1368169689Skan
1369169689Skan  /* The non-induction variable part of the test is the upper bound variable.
1370169689Skan   */
1371169689Skan  if (TREE_OPERAND (test, 0) == inductionvar)
1372169689Skan    uboundvar = TREE_OPERAND (test, 1);
1373169689Skan  else
1374169689Skan    uboundvar = TREE_OPERAND (test, 0);
1375169689Skan
1376169689Skan
1377169689Skan  /* We only size the vectors assuming we have, at max, 2 times as many
1378169689Skan     invariants as we do loops (one for each bound).
1379169689Skan     This is just an arbitrary number, but it has to be matched against the
1380169689Skan     code below.  */
1381169689Skan  gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1382169689Skan
1383169689Skan
1384169689Skan  /* We might have some leftover.  */
1385169689Skan  if (TREE_CODE (test) == LT_EXPR)
1386169689Skan    extra = -1 * stepint;
1387169689Skan  else if (TREE_CODE (test) == NE_EXPR)
1388169689Skan    extra = -1 * stepint;
1389169689Skan  else if (TREE_CODE (test) == GT_EXPR)
1390169689Skan    extra = -1 * stepint;
1391169689Skan  else if (TREE_CODE (test) == EQ_EXPR)
1392169689Skan    extra = 1 * stepint;
1393169689Skan
1394169689Skan  ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1395169689Skan					  outerinductionvars,
1396169689Skan					  *invariants, extra);
1397169689Skan  uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1398169689Skan			 build_int_cst (TREE_TYPE (uboundvar), extra));
1399169689Skan  VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1400169689Skan  VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1401169689Skan  VEC_safe_push (int, heap, *steps, stepint);
1402169689Skan  if (!ubound)
1403169689Skan    {
1404169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
1405169689Skan	fprintf (dump_file,
1406169689Skan		 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1407169689Skan      return NULL;
1408169689Skan    }
1409169689Skan
1410169689Skan  lloop = lambda_loop_new ();
1411169689Skan  LL_STEP (lloop) = stepint;
1412169689Skan  LL_LOWER_BOUND (lloop) = lbound;
1413169689Skan  LL_UPPER_BOUND (lloop) = ubound;
1414169689Skan  return lloop;
1415169689Skan}
1416169689Skan
1417169689Skan/* Given a LOOP, find the induction variable it is testing against in the exit
1418169689Skan   condition.  Return the induction variable if found, NULL otherwise.  */
1419169689Skan
1420169689Skanstatic tree
1421169689Skanfind_induction_var_from_exit_cond (struct loop *loop)
1422169689Skan{
1423169689Skan  tree expr = get_loop_exit_condition (loop);
1424169689Skan  tree ivarop;
1425169689Skan  tree test;
1426169689Skan  if (expr == NULL_TREE)
1427169689Skan    return NULL_TREE;
1428169689Skan  if (TREE_CODE (expr) != COND_EXPR)
1429169689Skan    return NULL_TREE;
1430169689Skan  test = TREE_OPERAND (expr, 0);
1431169689Skan  if (!COMPARISON_CLASS_P (test))
1432169689Skan    return NULL_TREE;
1433169689Skan
1434169689Skan  /* Find the side that is invariant in this loop. The ivar must be the other
1435169689Skan     side.  */
1436169689Skan
1437169689Skan  if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1438169689Skan      ivarop = TREE_OPERAND (test, 1);
1439169689Skan  else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1440169689Skan      ivarop = TREE_OPERAND (test, 0);
1441169689Skan  else
1442169689Skan    return NULL_TREE;
1443169689Skan
1444169689Skan  if (TREE_CODE (ivarop) != SSA_NAME)
1445169689Skan    return NULL_TREE;
1446169689Skan  return ivarop;
1447169689Skan}
1448169689Skan
1449169689SkanDEF_VEC_P(lambda_loop);
1450169689SkanDEF_VEC_ALLOC_P(lambda_loop,heap);
1451169689Skan
1452169689Skan/* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1453169689Skan   Return the new loop nest.
1454169689Skan   INDUCTIONVARS is a pointer to an array of induction variables for the
1455169689Skan   loopnest that will be filled in during this process.
1456169689Skan   INVARIANTS is a pointer to an array of invariants that will be filled in
1457169689Skan   during this process.  */
1458169689Skan
1459169689Skanlambda_loopnest
1460169689Skangcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1461169689Skan				 struct loop *loop_nest,
1462169689Skan				 VEC(tree,heap) **inductionvars,
1463169689Skan				 VEC(tree,heap) **invariants)
1464169689Skan{
1465169689Skan  lambda_loopnest ret = NULL;
1466169689Skan  struct loop *temp = loop_nest;
1467169689Skan  int depth = depth_of_nest (loop_nest);
1468169689Skan  size_t i;
1469169689Skan  VEC(lambda_loop,heap) *loops = NULL;
1470169689Skan  VEC(tree,heap) *uboundvars = NULL;
1471169689Skan  VEC(tree,heap) *lboundvars  = NULL;
1472169689Skan  VEC(int,heap) *steps = NULL;
1473169689Skan  lambda_loop newloop;
1474169689Skan  tree inductionvar = NULL;
1475169689Skan  bool perfect_nest = perfect_nest_p (loop_nest);
1476169689Skan
1477169689Skan  if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1478169689Skan    goto fail;
1479169689Skan
1480169689Skan  while (temp)
1481169689Skan    {
1482169689Skan      newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1483169689Skan					 &inductionvar, *inductionvars,
1484169689Skan					 &lboundvars, &uboundvars,
1485169689Skan					 &steps);
1486169689Skan      if (!newloop)
1487169689Skan	goto fail;
1488169689Skan
1489169689Skan      VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1490169689Skan      VEC_safe_push (lambda_loop, heap, loops, newloop);
1491169689Skan      temp = temp->inner;
1492169689Skan    }
1493169689Skan
1494169689Skan  if (!perfect_nest)
1495169689Skan    {
1496169689Skan      if (!perfect_nestify (currloops, loop_nest,
1497169689Skan			    lboundvars, uboundvars, steps, *inductionvars))
1498169689Skan	{
1499169689Skan	  if (dump_file)
1500169689Skan	    fprintf (dump_file,
1501169689Skan		     "Not a perfect loop nest and couldn't convert to one.\n");
1502169689Skan	  goto fail;
1503169689Skan	}
1504169689Skan      else if (dump_file)
1505169689Skan	fprintf (dump_file,
1506169689Skan		 "Successfully converted loop nest to perfect loop nest.\n");
1507169689Skan    }
1508169689Skan
1509169689Skan  ret = lambda_loopnest_new (depth, 2 * depth);
1510169689Skan
1511169689Skan  for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1512169689Skan    LN_LOOPS (ret)[i] = newloop;
1513169689Skan
1514169689Skan fail:
1515169689Skan  VEC_free (lambda_loop, heap, loops);
1516169689Skan  VEC_free (tree, heap, uboundvars);
1517169689Skan  VEC_free (tree, heap, lboundvars);
1518169689Skan  VEC_free (int, heap, steps);
1519169689Skan
1520169689Skan  return ret;
1521169689Skan}
1522169689Skan
1523169689Skan/* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1524169689Skan   STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1525169689Skan   inserted for us are stored.  INDUCTION_VARS is the array of induction
1526169689Skan   variables for the loop this LBV is from.  TYPE is the tree type to use for
1527169689Skan   the variables and trees involved.  */
1528169689Skan
1529169689Skanstatic tree
1530169689Skanlbv_to_gcc_expression (lambda_body_vector lbv,
1531169689Skan		       tree type, VEC(tree,heap) *induction_vars,
1532169689Skan		       tree *stmts_to_insert)
1533169689Skan{
1534169689Skan  tree stmts, stmt, resvar, name;
1535169689Skan  tree iv;
1536169689Skan  size_t i;
1537169689Skan  tree_stmt_iterator tsi;
1538169689Skan
1539169689Skan  /* Create a statement list and a linear expression temporary.  */
1540169689Skan  stmts = alloc_stmt_list ();
1541169689Skan  resvar = create_tmp_var (type, "lbvtmp");
1542169689Skan  add_referenced_var (resvar);
1543169689Skan
1544169689Skan  /* Start at 0.  */
1545169689Skan  stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1546169689Skan  name = make_ssa_name (resvar, stmt);
1547169689Skan  TREE_OPERAND (stmt, 0) = name;
1548169689Skan  tsi = tsi_last (stmts);
1549169689Skan  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1550169689Skan
1551169689Skan  for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1552169689Skan    {
1553169689Skan      if (LBV_COEFFICIENTS (lbv)[i] != 0)
1554169689Skan	{
1555169689Skan	  tree newname;
1556169689Skan	  tree coeffmult;
1557169689Skan
1558169689Skan	  /* newname = coefficient * induction_variable */
1559169689Skan	  coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1560169689Skan	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1561169689Skan			 fold_build2 (MULT_EXPR, type, iv, coeffmult));
1562169689Skan
1563169689Skan	  newname = make_ssa_name (resvar, stmt);
1564169689Skan	  TREE_OPERAND (stmt, 0) = newname;
1565169689Skan	  fold_stmt (&stmt);
1566169689Skan	  tsi = tsi_last (stmts);
1567169689Skan	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1568169689Skan
1569169689Skan	  /* name = name + newname */
1570169689Skan	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1571169689Skan			 build2 (PLUS_EXPR, type, name, newname));
1572169689Skan	  name = make_ssa_name (resvar, stmt);
1573169689Skan	  TREE_OPERAND (stmt, 0) = name;
1574169689Skan	  fold_stmt (&stmt);
1575169689Skan	  tsi = tsi_last (stmts);
1576169689Skan	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1577169689Skan
1578169689Skan	}
1579169689Skan    }
1580169689Skan
1581169689Skan  /* Handle any denominator that occurs.  */
1582169689Skan  if (LBV_DENOMINATOR (lbv) != 1)
1583169689Skan    {
1584169689Skan      tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1585169689Skan      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1586169689Skan		     build2 (CEIL_DIV_EXPR, type, name, denominator));
1587169689Skan      name = make_ssa_name (resvar, stmt);
1588169689Skan      TREE_OPERAND (stmt, 0) = name;
1589169689Skan      fold_stmt (&stmt);
1590169689Skan      tsi = tsi_last (stmts);
1591169689Skan      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1592169689Skan    }
1593169689Skan  *stmts_to_insert = stmts;
1594169689Skan  return name;
1595169689Skan}
1596169689Skan
1597169689Skan/* Convert a linear expression from coefficient and constant form to a
1598169689Skan   gcc tree.
1599169689Skan   Return the tree that represents the final value of the expression.
1600169689Skan   LLE is the linear expression to convert.
1601169689Skan   OFFSET is the linear offset to apply to the expression.
1602169689Skan   TYPE is the tree type to use for the variables and math.
1603169689Skan   INDUCTION_VARS is a vector of induction variables for the loops.
1604169689Skan   INVARIANTS is a vector of the loop nest invariants.
1605169689Skan   WRAP specifies what tree code to wrap the results in, if there is more than
1606169689Skan   one (it is either MAX_EXPR, or MIN_EXPR).
1607169689Skan   STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1608169689Skan   statements that need to be inserted for the linear expression.  */
1609169689Skan
1610169689Skanstatic tree
1611169689Skanlle_to_gcc_expression (lambda_linear_expression lle,
1612169689Skan		       lambda_linear_expression offset,
1613169689Skan		       tree type,
1614169689Skan		       VEC(tree,heap) *induction_vars,
1615169689Skan		       VEC(tree,heap) *invariants,
1616169689Skan		       enum tree_code wrap, tree *stmts_to_insert)
1617169689Skan{
1618169689Skan  tree stmts, stmt, resvar, name;
1619169689Skan  size_t i;
1620169689Skan  tree_stmt_iterator tsi;
1621169689Skan  tree iv, invar;
1622169689Skan  VEC(tree,heap) *results = NULL;
1623169689Skan
1624169689Skan  gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1625169689Skan  name = NULL_TREE;
1626169689Skan  /* Create a statement list and a linear expression temporary.  */
1627169689Skan  stmts = alloc_stmt_list ();
1628169689Skan  resvar = create_tmp_var (type, "lletmp");
1629169689Skan  add_referenced_var (resvar);
1630169689Skan
1631169689Skan  /* Build up the linear expressions, and put the variable representing the
1632169689Skan     result in the results array.  */
1633169689Skan  for (; lle != NULL; lle = LLE_NEXT (lle))
1634169689Skan    {
1635169689Skan      /* Start at name = 0.  */
1636169689Skan      stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1637169689Skan      name = make_ssa_name (resvar, stmt);
1638169689Skan      TREE_OPERAND (stmt, 0) = name;
1639169689Skan      fold_stmt (&stmt);
1640169689Skan      tsi = tsi_last (stmts);
1641169689Skan      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1642169689Skan
1643169689Skan      /* First do the induction variables.
1644169689Skan         at the end, name = name + all the induction variables added
1645169689Skan         together.  */
1646169689Skan      for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1647169689Skan	{
1648169689Skan	  if (LLE_COEFFICIENTS (lle)[i] != 0)
1649169689Skan	    {
1650169689Skan	      tree newname;
1651169689Skan	      tree mult;
1652169689Skan	      tree coeff;
1653169689Skan
1654169689Skan	      /* mult = induction variable * coefficient.  */
1655169689Skan	      if (LLE_COEFFICIENTS (lle)[i] == 1)
1656169689Skan		{
1657169689Skan		  mult = VEC_index (tree, induction_vars, i);
1658169689Skan		}
1659169689Skan	      else
1660169689Skan		{
1661169689Skan		  coeff = build_int_cst (type,
1662169689Skan					 LLE_COEFFICIENTS (lle)[i]);
1663169689Skan		  mult = fold_build2 (MULT_EXPR, type, iv, coeff);
1664169689Skan		}
1665169689Skan
1666169689Skan	      /* newname = mult */
1667169689Skan	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult);
1668169689Skan	      newname = make_ssa_name (resvar, stmt);
1669169689Skan	      TREE_OPERAND (stmt, 0) = newname;
1670169689Skan	      fold_stmt (&stmt);
1671169689Skan	      tsi = tsi_last (stmts);
1672169689Skan	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1673169689Skan
1674169689Skan	      /* name = name + newname */
1675169689Skan	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1676169689Skan			     build2 (PLUS_EXPR, type, name, newname));
1677169689Skan	      name = make_ssa_name (resvar, stmt);
1678169689Skan	      TREE_OPERAND (stmt, 0) = name;
1679169689Skan	      fold_stmt (&stmt);
1680169689Skan	      tsi = tsi_last (stmts);
1681169689Skan	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1682169689Skan	    }
1683169689Skan	}
1684169689Skan
1685169689Skan      /* Handle our invariants.
1686169689Skan         At the end, we have name = name + result of adding all multiplied
1687169689Skan         invariants.  */
1688169689Skan      for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1689169689Skan	{
1690169689Skan	  if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1691169689Skan	    {
1692169689Skan	      tree newname;
1693169689Skan	      tree mult;
1694169689Skan	      tree coeff;
1695169689Skan	      int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1696169689Skan	      /* mult = invariant * coefficient  */
1697169689Skan	      if (invcoeff == 1)
1698169689Skan		{
1699169689Skan		  mult = invar;
1700169689Skan		}
1701169689Skan	      else
1702169689Skan		{
1703169689Skan		  coeff = build_int_cst (type, invcoeff);
1704169689Skan		  mult = fold_build2 (MULT_EXPR, type, invar, coeff);
1705169689Skan		}
1706169689Skan
1707169689Skan	      /* newname = mult */
1708169689Skan	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult);
1709169689Skan	      newname = make_ssa_name (resvar, stmt);
1710169689Skan	      TREE_OPERAND (stmt, 0) = newname;
1711169689Skan	      fold_stmt (&stmt);
1712169689Skan	      tsi = tsi_last (stmts);
1713169689Skan	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1714169689Skan
1715169689Skan	      /* name = name + newname */
1716169689Skan	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1717169689Skan			     build2 (PLUS_EXPR, type, name, newname));
1718169689Skan	      name = make_ssa_name (resvar, stmt);
1719169689Skan	      TREE_OPERAND (stmt, 0) = name;
1720169689Skan	      fold_stmt (&stmt);
1721169689Skan	      tsi = tsi_last (stmts);
1722169689Skan	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1723169689Skan	    }
1724169689Skan	}
1725169689Skan
1726169689Skan      /* Now handle the constant.
1727169689Skan         name = name + constant.  */
1728169689Skan      if (LLE_CONSTANT (lle) != 0)
1729169689Skan	{
1730169689Skan	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1731169689Skan			 build2 (PLUS_EXPR, type, name,
1732169689Skan			         build_int_cst (type, LLE_CONSTANT (lle))));
1733169689Skan	  name = make_ssa_name (resvar, stmt);
1734169689Skan	  TREE_OPERAND (stmt, 0) = name;
1735169689Skan	  fold_stmt (&stmt);
1736169689Skan	  tsi = tsi_last (stmts);
1737169689Skan	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1738169689Skan	}
1739169689Skan
1740169689Skan      /* Now handle the offset.
1741169689Skan         name = name + linear offset.  */
1742169689Skan      if (LLE_CONSTANT (offset) != 0)
1743169689Skan	{
1744169689Skan	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1745169689Skan			 build2 (PLUS_EXPR, type, name,
1746169689Skan			         build_int_cst (type, LLE_CONSTANT (offset))));
1747169689Skan	  name = make_ssa_name (resvar, stmt);
1748169689Skan	  TREE_OPERAND (stmt, 0) = name;
1749169689Skan	  fold_stmt (&stmt);
1750169689Skan	  tsi = tsi_last (stmts);
1751169689Skan	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1752169689Skan	}
1753169689Skan
1754169689Skan      /* Handle any denominator that occurs.  */
1755169689Skan      if (LLE_DENOMINATOR (lle) != 1)
1756169689Skan	{
1757169689Skan	  stmt = build_int_cst (type, LLE_DENOMINATOR (lle));
1758169689Skan	  stmt = build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1759169689Skan			 type, name, stmt);
1760169689Skan	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar, stmt);
1761169689Skan
1762169689Skan	  /* name = {ceil, floor}(name/denominator) */
1763169689Skan	  name = make_ssa_name (resvar, stmt);
1764169689Skan	  TREE_OPERAND (stmt, 0) = name;
1765169689Skan	  tsi = tsi_last (stmts);
1766169689Skan	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1767169689Skan	}
1768169689Skan      VEC_safe_push (tree, heap, results, name);
1769169689Skan    }
1770169689Skan
1771169689Skan  /* Again, out of laziness, we don't handle this case yet.  It's not
1772169689Skan     hard, it just hasn't occurred.  */
1773169689Skan  gcc_assert (VEC_length (tree, results) <= 2);
1774169689Skan
1775169689Skan  /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1776169689Skan  if (VEC_length (tree, results) > 1)
1777169689Skan    {
1778169689Skan      tree op1 = VEC_index (tree, results, 0);
1779169689Skan      tree op2 = VEC_index (tree, results, 1);
1780169689Skan      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1781169689Skan		     build2 (wrap, type, op1, op2));
1782169689Skan      name = make_ssa_name (resvar, stmt);
1783169689Skan      TREE_OPERAND (stmt, 0) = name;
1784169689Skan      tsi = tsi_last (stmts);
1785169689Skan      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1786169689Skan    }
1787169689Skan
1788169689Skan  VEC_free (tree, heap, results);
1789169689Skan
1790169689Skan  *stmts_to_insert = stmts;
1791169689Skan  return name;
1792169689Skan}
1793169689Skan
1794169689Skan/* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1795169689Skan   it, back into gcc code.  This changes the
1796169689Skan   loops, their induction variables, and their bodies, so that they
1797169689Skan   match the transformed loopnest.
1798169689Skan   OLD_LOOPNEST is the loopnest before we've replaced it with the new
1799169689Skan   loopnest.
1800169689Skan   OLD_IVS is a vector of induction variables from the old loopnest.
1801169689Skan   INVARIANTS is a vector of loop invariants from the old loopnest.
1802169689Skan   NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1803169689Skan   TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1804169689Skan   NEW_LOOPNEST.  */
1805169689Skan
1806169689Skanvoid
1807169689Skanlambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1808169689Skan				 VEC(tree,heap) *old_ivs,
1809169689Skan				 VEC(tree,heap) *invariants,
1810169689Skan				 lambda_loopnest new_loopnest,
1811169689Skan				 lambda_trans_matrix transform)
1812169689Skan{
1813169689Skan  struct loop *temp;
1814169689Skan  size_t i = 0;
1815169689Skan  size_t depth = 0;
1816169689Skan  VEC(tree,heap) *new_ivs = NULL;
1817169689Skan  tree oldiv;
1818169689Skan
1819169689Skan  block_stmt_iterator bsi;
1820169689Skan
1821169689Skan  if (dump_file)
1822169689Skan    {
1823169689Skan      transform = lambda_trans_matrix_inverse (transform);
1824169689Skan      fprintf (dump_file, "Inverse of transformation matrix:\n");
1825169689Skan      print_lambda_trans_matrix (dump_file, transform);
1826169689Skan    }
1827169689Skan  depth = depth_of_nest (old_loopnest);
1828169689Skan  temp = old_loopnest;
1829169689Skan
1830169689Skan  while (temp)
1831169689Skan    {
1832169689Skan      lambda_loop newloop;
1833169689Skan      basic_block bb;
1834169689Skan      edge exit;
1835169689Skan      tree ivvar, ivvarinced, exitcond, stmts;
1836169689Skan      enum tree_code testtype;
1837169689Skan      tree newupperbound, newlowerbound;
1838169689Skan      lambda_linear_expression offset;
1839169689Skan      tree type;
1840169689Skan      bool insert_after;
1841169689Skan      tree inc_stmt;
1842169689Skan
1843169689Skan      oldiv = VEC_index (tree, old_ivs, i);
1844169689Skan      type = TREE_TYPE (oldiv);
1845169689Skan
1846169689Skan      /* First, build the new induction variable temporary  */
1847169689Skan
1848169689Skan      ivvar = create_tmp_var (type, "lnivtmp");
1849169689Skan      add_referenced_var (ivvar);
1850169689Skan
1851169689Skan      VEC_safe_push (tree, heap, new_ivs, ivvar);
1852169689Skan
1853169689Skan      newloop = LN_LOOPS (new_loopnest)[i];
1854169689Skan
1855169689Skan      /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1856169689Skan         cases for now.  */
1857169689Skan      offset = LL_LINEAR_OFFSET (newloop);
1858169689Skan
1859169689Skan      gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1860169689Skan		  lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1861169689Skan
1862169689Skan      /* Now build the  new lower bounds, and insert the statements
1863169689Skan         necessary to generate it on the loop preheader.  */
1864169689Skan      newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1865169689Skan					     LL_LINEAR_OFFSET (newloop),
1866169689Skan					     type,
1867169689Skan					     new_ivs,
1868169689Skan					     invariants, MAX_EXPR, &stmts);
1869169689Skan      bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1870169689Skan      bsi_commit_edge_inserts ();
1871169689Skan      /* Build the new upper bound and insert its statements in the
1872169689Skan         basic block of the exit condition */
1873169689Skan      newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1874169689Skan					     LL_LINEAR_OFFSET (newloop),
1875169689Skan					     type,
1876169689Skan					     new_ivs,
1877169689Skan					     invariants, MIN_EXPR, &stmts);
1878169689Skan      exit = temp->single_exit;
1879169689Skan      exitcond = get_loop_exit_condition (temp);
1880169689Skan      bb = bb_for_stmt (exitcond);
1881169689Skan      bsi = bsi_start (bb);
1882169689Skan      bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1883169689Skan
1884169689Skan      /* Create the new iv.  */
1885169689Skan
1886169689Skan      standard_iv_increment_position (temp, &bsi, &insert_after);
1887169689Skan      create_iv (newlowerbound,
1888169689Skan		 build_int_cst (type, LL_STEP (newloop)),
1889169689Skan		 ivvar, temp, &bsi, insert_after, &ivvar,
1890169689Skan		 NULL);
1891169689Skan
1892169689Skan      /* Unfortunately, the incremented ivvar that create_iv inserted may not
1893169689Skan	 dominate the block containing the exit condition.
1894169689Skan	 So we simply create our own incremented iv to use in the new exit
1895169689Skan	 test,  and let redundancy elimination sort it out.  */
1896169689Skan      inc_stmt = build2 (PLUS_EXPR, type,
1897169689Skan			 ivvar, build_int_cst (type, LL_STEP (newloop)));
1898169689Skan      inc_stmt = build2 (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1899169689Skan			 inc_stmt);
1900169689Skan      ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1901169689Skan      TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1902169689Skan      bsi = bsi_for_stmt (exitcond);
1903169689Skan      bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1904169689Skan
1905169689Skan      /* Replace the exit condition with the new upper bound
1906169689Skan         comparison.  */
1907169689Skan
1908169689Skan      testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1909169689Skan
1910169689Skan      /* We want to build a conditional where true means exit the loop, and
1911169689Skan	 false means continue the loop.
1912169689Skan	 So swap the testtype if this isn't the way things are.*/
1913169689Skan
1914169689Skan      if (exit->flags & EDGE_FALSE_VALUE)
1915169689Skan	testtype = swap_tree_comparison (testtype);
1916169689Skan
1917169689Skan      COND_EXPR_COND (exitcond) = build2 (testtype,
1918169689Skan					  boolean_type_node,
1919169689Skan					  newupperbound, ivvarinced);
1920169689Skan      update_stmt (exitcond);
1921169689Skan      VEC_replace (tree, new_ivs, i, ivvar);
1922169689Skan
1923169689Skan      i++;
1924169689Skan      temp = temp->inner;
1925169689Skan    }
1926169689Skan
1927169689Skan  /* Rewrite uses of the old ivs so that they are now specified in terms of
1928169689Skan     the new ivs.  */
1929169689Skan
1930169689Skan  for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1931169689Skan    {
1932169689Skan      imm_use_iterator imm_iter;
1933169689Skan      use_operand_p use_p;
1934169689Skan      tree oldiv_def;
1935169689Skan      tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1936169689Skan      tree stmt;
1937169689Skan
1938169689Skan      if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1939169689Skan        oldiv_def = PHI_RESULT (oldiv_stmt);
1940169689Skan      else
1941169689Skan	oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1942169689Skan      gcc_assert (oldiv_def != NULL_TREE);
1943169689Skan
1944169689Skan      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1945169689Skan        {
1946169689Skan	  tree newiv, stmts;
1947169689Skan	  lambda_body_vector lbv, newlbv;
1948169689Skan
1949169689Skan	  gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1950169689Skan
1951169689Skan	  /* Compute the new expression for the induction
1952169689Skan	     variable.  */
1953169689Skan	  depth = VEC_length (tree, new_ivs);
1954169689Skan	  lbv = lambda_body_vector_new (depth);
1955169689Skan	  LBV_COEFFICIENTS (lbv)[i] = 1;
1956169689Skan
1957169689Skan	  newlbv = lambda_body_vector_compute_new (transform, lbv);
1958169689Skan
1959169689Skan	  newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1960169689Skan					 new_ivs, &stmts);
1961169689Skan	  bsi = bsi_for_stmt (stmt);
1962169689Skan	  /* Insert the statements to build that
1963169689Skan	     expression.  */
1964169689Skan	  bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1965169689Skan
1966169689Skan	  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1967169689Skan	    propagate_value (use_p, newiv);
1968169689Skan	  update_stmt (stmt);
1969169689Skan	}
1970169689Skan    }
1971169689Skan  VEC_free (tree, heap, new_ivs);
1972169689Skan}
1973169689Skan
1974169689Skan/* Return TRUE if this is not interesting statement from the perspective of
1975169689Skan   determining if we have a perfect loop nest.  */
1976169689Skan
1977169689Skanstatic bool
1978169689Skannot_interesting_stmt (tree stmt)
1979169689Skan{
1980169689Skan  /* Note that COND_EXPR's aren't interesting because if they were exiting the
1981169689Skan     loop, we would have already failed the number of exits tests.  */
1982169689Skan  if (TREE_CODE (stmt) == LABEL_EXPR
1983169689Skan      || TREE_CODE (stmt) == GOTO_EXPR
1984169689Skan      || TREE_CODE (stmt) == COND_EXPR)
1985169689Skan    return true;
1986169689Skan  return false;
1987169689Skan}
1988169689Skan
1989169689Skan/* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
1990169689Skan
1991169689Skanstatic bool
1992169689Skanphi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
1993169689Skan{
1994169689Skan  int i;
1995169689Skan  for (i = 0; i < PHI_NUM_ARGS (phi); i++)
1996169689Skan    if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
1997169689Skan      if (PHI_ARG_DEF (phi, i) == def)
1998169689Skan	return true;
1999169689Skan  return false;
2000169689Skan}
2001169689Skan
2002169689Skan/* Return TRUE if STMT is a use of PHI_RESULT.  */
2003169689Skan
2004169689Skanstatic bool
2005169689Skanstmt_uses_phi_result (tree stmt, tree phi_result)
2006169689Skan{
2007169689Skan  tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
2008169689Skan
2009169689Skan  /* This is conservatively true, because we only want SIMPLE bumpers
2010169689Skan     of the form x +- constant for our pass.  */
2011169689Skan  return (use == phi_result);
2012169689Skan}
2013169689Skan
2014169689Skan/* STMT is a bumper stmt for LOOP if the version it defines is used in the
2015169689Skan   in-loop-edge in a phi node, and the operand it uses is the result of that
2016169689Skan   phi node.
2017169689Skan   I.E. i_29 = i_3 + 1
2018169689Skan        i_3 = PHI (0, i_29);  */
2019169689Skan
2020169689Skanstatic bool
2021169689Skanstmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2022169689Skan{
2023169689Skan  tree use;
2024169689Skan  tree def;
2025169689Skan  imm_use_iterator iter;
2026169689Skan  use_operand_p use_p;
2027169689Skan
2028169689Skan  def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2029169689Skan  if (!def)
2030169689Skan    return false;
2031169689Skan
2032169689Skan  FOR_EACH_IMM_USE_FAST (use_p, iter, def)
2033169689Skan    {
2034169689Skan      use = USE_STMT (use_p);
2035169689Skan      if (TREE_CODE (use) == PHI_NODE)
2036169689Skan	{
2037169689Skan	  if (phi_loop_edge_uses_def (loop, use, def))
2038169689Skan	    if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2039169689Skan	      return true;
2040169689Skan	}
2041169689Skan    }
2042169689Skan  return false;
2043169689Skan}
2044169689Skan
2045169689Skan
2046169689Skan/* Return true if LOOP is a perfect loop nest.
2047169689Skan   Perfect loop nests are those loop nests where all code occurs in the
2048169689Skan   innermost loop body.
2049169689Skan   If S is a program statement, then
2050169689Skan
2051169689Skan   i.e.
2052169689Skan   DO I = 1, 20
2053169689Skan       S1
2054169689Skan       DO J = 1, 20
2055169689Skan       ...
2056169689Skan       END DO
2057169689Skan   END DO
2058169689Skan   is not a perfect loop nest because of S1.
2059169689Skan
2060169689Skan   DO I = 1, 20
2061169689Skan      DO J = 1, 20
2062169689Skan        S1
2063169689Skan	...
2064169689Skan      END DO
2065169689Skan   END DO
2066169689Skan   is a perfect loop nest.
2067169689Skan
2068169689Skan   Since we don't have high level loops anymore, we basically have to walk our
2069169689Skan   statements and ignore those that are there because the loop needs them (IE
2070169689Skan   the induction variable increment, and jump back to the top of the loop).  */
2071169689Skan
2072169689Skanbool
2073169689Skanperfect_nest_p (struct loop *loop)
2074169689Skan{
2075169689Skan  basic_block *bbs;
2076169689Skan  size_t i;
2077169689Skan  tree exit_cond;
2078169689Skan
2079169689Skan  if (!loop->inner)
2080169689Skan    return true;
2081169689Skan  bbs = get_loop_body (loop);
2082169689Skan  exit_cond = get_loop_exit_condition (loop);
2083169689Skan  for (i = 0; i < loop->num_nodes; i++)
2084169689Skan    {
2085169689Skan      if (bbs[i]->loop_father == loop)
2086169689Skan	{
2087169689Skan	  block_stmt_iterator bsi;
2088169689Skan	  for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2089169689Skan	    {
2090169689Skan	      tree stmt = bsi_stmt (bsi);
2091169689Skan	      if (stmt == exit_cond
2092169689Skan		  || not_interesting_stmt (stmt)
2093169689Skan		  || stmt_is_bumper_for_loop (loop, stmt))
2094169689Skan		continue;
2095169689Skan	      free (bbs);
2096169689Skan	      return false;
2097169689Skan	    }
2098169689Skan	}
2099169689Skan    }
2100169689Skan  free (bbs);
2101169689Skan  /* See if the inner loops are perfectly nested as well.  */
2102169689Skan  if (loop->inner)
2103169689Skan    return perfect_nest_p (loop->inner);
2104169689Skan  return true;
2105169689Skan}
2106169689Skan
2107169689Skan/* Replace the USES of X in STMT, or uses with the same step as X with Y.
2108169689Skan   YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2109169689Skan   avoid creating duplicate temporaries and FIRSTBSI is statement
2110169689Skan   iterator where new temporaries should be inserted at the beginning
2111169689Skan   of body basic block.  */
2112169689Skan
2113169689Skanstatic void
2114169689Skanreplace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
2115169689Skan				int xstep, tree y, tree yinit,
2116169689Skan				htab_t replacements,
2117169689Skan				block_stmt_iterator *firstbsi)
2118169689Skan{
2119169689Skan  ssa_op_iter iter;
2120169689Skan  use_operand_p use_p;
2121169689Skan
2122169689Skan  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2123169689Skan    {
2124169689Skan      tree use = USE_FROM_PTR (use_p);
2125169689Skan      tree step = NULL_TREE;
2126169689Skan      tree scev, init, val, var, setstmt;
2127169689Skan      struct tree_map *h, in;
2128169689Skan      void **loc;
2129169689Skan
2130169689Skan      /* Replace uses of X with Y right away.  */
2131169689Skan      if (use == x)
2132169689Skan	{
2133169689Skan	  SET_USE (use_p, y);
2134169689Skan	  continue;
2135169689Skan	}
2136169689Skan
2137169689Skan      scev = instantiate_parameters (loop,
2138169689Skan				     analyze_scalar_evolution (loop, use));
2139169689Skan
2140169689Skan      if (scev == NULL || scev == chrec_dont_know)
2141169689Skan	continue;
2142169689Skan
2143169689Skan      step = evolution_part_in_loop_num (scev, loop->num);
2144169689Skan      if (step == NULL
2145169689Skan	  || step == chrec_dont_know
2146169689Skan	  || TREE_CODE (step) != INTEGER_CST
2147169689Skan	  || int_cst_value (step) != xstep)
2148169689Skan	continue;
2149169689Skan
2150169689Skan      /* Use REPLACEMENTS hash table to cache already created
2151169689Skan	 temporaries.  */
2152169689Skan      in.hash = htab_hash_pointer (use);
2153169689Skan      in.from = use;
2154169689Skan      h = htab_find_with_hash (replacements, &in, in.hash);
2155169689Skan      if (h != NULL)
2156169689Skan	{
2157169689Skan	  SET_USE (use_p, h->to);
2158169689Skan	  continue;
2159169689Skan	}
2160169689Skan
2161169689Skan      /* USE which has the same step as X should be replaced
2162169689Skan	 with a temporary set to Y + YINIT - INIT.  */
2163169689Skan      init = initial_condition_in_loop_num (scev, loop->num);
2164169689Skan      gcc_assert (init != NULL && init != chrec_dont_know);
2165169689Skan      if (TREE_TYPE (use) == TREE_TYPE (y))
2166169689Skan	{
2167169689Skan	  val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2168169689Skan	  val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2169169689Skan	  if (val == y)
2170169689Skan 	    {
2171169689Skan	      /* If X has the same type as USE, the same step
2172169689Skan		 and same initial value, it can be replaced by Y.  */
2173169689Skan	      SET_USE (use_p, y);
2174169689Skan	      continue;
2175169689Skan	    }
2176169689Skan	}
2177169689Skan      else
2178169689Skan	{
2179169689Skan	  val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2180169689Skan	  val = fold_convert (TREE_TYPE (use), val);
2181169689Skan	  val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2182169689Skan	}
2183169689Skan
2184169689Skan      /* Create a temporary variable and insert it at the beginning
2185169689Skan	 of the loop body basic block, right after the PHI node
2186169689Skan	 which sets Y.  */
2187169689Skan      var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2188169689Skan      add_referenced_var (var);
2189169689Skan      val = force_gimple_operand_bsi (firstbsi, val, false, NULL);
2190169689Skan      setstmt = build2 (MODIFY_EXPR, void_type_node, var, val);
2191169689Skan      var = make_ssa_name (var, setstmt);
2192169689Skan      TREE_OPERAND (setstmt, 0) = var;
2193169689Skan      bsi_insert_before (firstbsi, setstmt, BSI_SAME_STMT);
2194169689Skan      update_stmt (setstmt);
2195169689Skan      SET_USE (use_p, var);
2196169689Skan      h = ggc_alloc (sizeof (struct tree_map));
2197169689Skan      h->hash = in.hash;
2198169689Skan      h->from = use;
2199169689Skan      h->to = var;
2200169689Skan      loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2201169689Skan      gcc_assert ((*(struct tree_map **)loc) == NULL);
2202169689Skan      *(struct tree_map **) loc = h;
2203169689Skan    }
2204169689Skan}
2205169689Skan
2206169689Skan/* Return true if STMT is an exit PHI for LOOP */
2207169689Skan
2208169689Skanstatic bool
2209169689Skanexit_phi_for_loop_p (struct loop *loop, tree stmt)
2210169689Skan{
2211169689Skan
2212169689Skan  if (TREE_CODE (stmt) != PHI_NODE
2213169689Skan      || PHI_NUM_ARGS (stmt) != 1
2214169689Skan      || bb_for_stmt (stmt) != loop->single_exit->dest)
2215169689Skan    return false;
2216169689Skan
2217169689Skan  return true;
2218169689Skan}
2219169689Skan
2220169689Skan/* Return true if STMT can be put back into the loop INNER, by
2221169689Skan   copying it to the beginning of that loop and changing the uses.  */
2222169689Skan
2223169689Skanstatic bool
2224169689Skancan_put_in_inner_loop (struct loop *inner, tree stmt)
2225169689Skan{
2226169689Skan  imm_use_iterator imm_iter;
2227169689Skan  use_operand_p use_p;
2228169689Skan
2229169689Skan  gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
2230169689Skan  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2231169689Skan      || !expr_invariant_in_loop_p (inner, TREE_OPERAND (stmt, 1)))
2232169689Skan    return false;
2233169689Skan
2234169689Skan  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2235169689Skan    {
2236169689Skan      if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2237169689Skan	{
2238169689Skan	  basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2239169689Skan
2240169689Skan	  if (!flow_bb_inside_loop_p (inner, immbb))
2241169689Skan	    return false;
2242169689Skan	}
2243169689Skan    }
2244169689Skan  return true;
2245169689Skan}
2246169689Skan
2247169689Skan/* Return true if STMT can be put *after* the inner loop of LOOP.  */
2248169689Skanstatic bool
2249169689Skancan_put_after_inner_loop (struct loop *loop, tree stmt)
2250169689Skan{
2251169689Skan  imm_use_iterator imm_iter;
2252169689Skan  use_operand_p use_p;
2253169689Skan
2254169689Skan  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
2255169689Skan    return false;
2256169689Skan
2257169689Skan  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2258169689Skan    {
2259169689Skan      if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2260169689Skan	{
2261169689Skan	  basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2262169689Skan
2263169689Skan	  if (!dominated_by_p (CDI_DOMINATORS,
2264169689Skan			       immbb,
2265169689Skan			       loop->inner->header)
2266169689Skan	      && !can_put_in_inner_loop (loop->inner, stmt))
2267169689Skan	    return false;
2268169689Skan	}
2269169689Skan    }
2270169689Skan  return true;
2271169689Skan}
2272169689Skan
2273169689Skan
2274169689Skan
2275169689Skan/* Return TRUE if LOOP is an imperfect nest that we can convert to a
2276169689Skan   perfect one.  At the moment, we only handle imperfect nests of
2277169689Skan   depth 2, where all of the statements occur after the inner loop.  */
2278169689Skan
2279169689Skanstatic bool
2280169689Skancan_convert_to_perfect_nest (struct loop *loop)
2281169689Skan{
2282169689Skan  basic_block *bbs;
2283169689Skan  tree exit_condition, phi;
2284169689Skan  size_t i;
2285169689Skan  block_stmt_iterator bsi;
2286169689Skan  basic_block exitdest;
2287169689Skan
2288169689Skan  /* Can't handle triply nested+ loops yet.  */
2289169689Skan  if (!loop->inner || loop->inner->inner)
2290169689Skan    return false;
2291169689Skan
2292169689Skan  bbs = get_loop_body (loop);
2293169689Skan  exit_condition = get_loop_exit_condition (loop);
2294169689Skan  for (i = 0; i < loop->num_nodes; i++)
2295169689Skan    {
2296169689Skan      if (bbs[i]->loop_father == loop)
2297169689Skan	{
2298169689Skan	  for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2299169689Skan	    {
2300169689Skan	      tree stmt = bsi_stmt (bsi);
2301169689Skan
2302169689Skan	      if (stmt == exit_condition
2303169689Skan		  || not_interesting_stmt (stmt)
2304169689Skan		  || stmt_is_bumper_for_loop (loop, stmt))
2305169689Skan		continue;
2306169689Skan
2307169689Skan	      /* If this is a scalar operation that can be put back
2308169689Skan	         into the inner loop, or after the inner loop, through
2309169689Skan		 copying, then do so. This works on the theory that
2310169689Skan		 any amount of scalar code we have to reduplicate
2311169689Skan		 into or after the loops is less expensive that the
2312169689Skan		 win we get from rearranging the memory walk
2313169689Skan		 the loop is doing so that it has better
2314169689Skan		 cache behavior.  */
2315169689Skan	      if (TREE_CODE (stmt) == MODIFY_EXPR)
2316169689Skan		{
2317169689Skan		  use_operand_p use_a, use_b;
2318169689Skan		  imm_use_iterator imm_iter;
2319169689Skan		  ssa_op_iter op_iter, op_iter1;
2320169689Skan		  tree op0 = TREE_OPERAND (stmt, 0);
2321169689Skan		  tree scev = instantiate_parameters
2322169689Skan		    (loop, analyze_scalar_evolution (loop, op0));
2323169689Skan
2324169689Skan		  /* If the IV is simple, it can be duplicated.  */
2325169689Skan		  if (!automatically_generated_chrec_p (scev))
2326169689Skan		    {
2327169689Skan		      tree step = evolution_part_in_loop_num (scev, loop->num);
2328169689Skan		      if (step && step != chrec_dont_know
2329169689Skan			  && TREE_CODE (step) == INTEGER_CST)
2330169689Skan			continue;
2331169689Skan		    }
2332169689Skan
2333169689Skan		  /* The statement should not define a variable used
2334169689Skan		     in the inner loop.  */
2335169689Skan		  if (TREE_CODE (op0) == SSA_NAME)
2336169689Skan		    FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2337169689Skan		      if (bb_for_stmt (USE_STMT (use_a))->loop_father
2338169689Skan			  == loop->inner)
2339169689Skan			goto fail;
2340169689Skan
2341169689Skan		  FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2342169689Skan		    {
2343169689Skan		      tree node, op = USE_FROM_PTR (use_a);
2344169689Skan
2345169689Skan		      /* The variables should not be used in both loops.  */
2346169689Skan		      FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2347169689Skan		      if (bb_for_stmt (USE_STMT (use_b))->loop_father
2348169689Skan			  == loop->inner)
2349169689Skan			goto fail;
2350169689Skan
2351169689Skan		      /* The statement should not use the value of a
2352169689Skan			 scalar that was modified in the loop.  */
2353169689Skan		      node = SSA_NAME_DEF_STMT (op);
2354169689Skan		      if (TREE_CODE (node) == PHI_NODE)
2355169689Skan			FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2356169689Skan			  {
2357169689Skan			    tree arg = USE_FROM_PTR (use_b);
2358169689Skan
2359169689Skan			    if (TREE_CODE (arg) == SSA_NAME)
2360169689Skan			      {
2361169689Skan				tree arg_stmt = SSA_NAME_DEF_STMT (arg);
2362169689Skan
2363169689Skan				if (bb_for_stmt (arg_stmt)->loop_father
2364169689Skan				    == loop->inner)
2365169689Skan				  goto fail;
2366169689Skan			      }
2367169689Skan			  }
2368169689Skan		    }
2369169689Skan
2370169689Skan		  if (can_put_in_inner_loop (loop->inner, stmt)
2371169689Skan		      || can_put_after_inner_loop (loop, stmt))
2372169689Skan		    continue;
2373169689Skan		}
2374169689Skan
2375169689Skan	      /* Otherwise, if the bb of a statement we care about isn't
2376169689Skan		 dominated by the header of the inner loop, then we can't
2377169689Skan		 handle this case right now.  This test ensures that the
2378169689Skan		 statement comes completely *after* the inner loop.  */
2379169689Skan	      if (!dominated_by_p (CDI_DOMINATORS,
2380169689Skan				   bb_for_stmt (stmt),
2381169689Skan				   loop->inner->header))
2382169689Skan		goto fail;
2383169689Skan	    }
2384169689Skan	}
2385169689Skan    }
2386169689Skan
2387169689Skan  /* We also need to make sure the loop exit only has simple copy phis in it,
2388169689Skan     otherwise we don't know how to transform it into a perfect nest right
2389169689Skan     now.  */
2390169689Skan  exitdest = loop->single_exit->dest;
2391169689Skan
2392169689Skan  for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2393169689Skan    if (PHI_NUM_ARGS (phi) != 1)
2394169689Skan      goto fail;
2395169689Skan
2396169689Skan  free (bbs);
2397169689Skan  return true;
2398169689Skan
2399169689Skan fail:
2400169689Skan  free (bbs);
2401169689Skan  return false;
2402169689Skan}
2403169689Skan
2404169689Skan/* Transform the loop nest into a perfect nest, if possible.
2405169689Skan   LOOPS is the current struct loops *
2406169689Skan   LOOP is the loop nest to transform into a perfect nest
2407169689Skan   LBOUNDS are the lower bounds for the loops to transform
2408169689Skan   UBOUNDS are the upper bounds for the loops to transform
2409169689Skan   STEPS is the STEPS for the loops to transform.
2410169689Skan   LOOPIVS is the induction variables for the loops to transform.
2411169689Skan
2412169689Skan   Basically, for the case of
2413169689Skan
2414169689Skan   FOR (i = 0; i < 50; i++)
2415169689Skan    {
2416169689Skan     FOR (j =0; j < 50; j++)
2417169689Skan     {
2418169689Skan        <whatever>
2419169689Skan     }
2420169689Skan     <some code>
2421169689Skan    }
2422169689Skan
2423169689Skan   This function will transform it into a perfect loop nest by splitting the
2424169689Skan   outer loop into two loops, like so:
2425169689Skan
2426169689Skan   FOR (i = 0; i < 50; i++)
2427169689Skan   {
2428169689Skan     FOR (j = 0; j < 50; j++)
2429169689Skan     {
2430169689Skan         <whatever>
2431169689Skan     }
2432169689Skan   }
2433169689Skan
2434169689Skan   FOR (i = 0; i < 50; i ++)
2435169689Skan   {
2436169689Skan    <some code>
2437169689Skan   }
2438169689Skan
2439169689Skan   Return FALSE if we can't make this loop into a perfect nest.  */
2440169689Skan
2441169689Skanstatic bool
2442169689Skanperfect_nestify (struct loops *loops,
2443169689Skan		 struct loop *loop,
2444169689Skan		 VEC(tree,heap) *lbounds,
2445169689Skan		 VEC(tree,heap) *ubounds,
2446169689Skan		 VEC(int,heap) *steps,
2447169689Skan		 VEC(tree,heap) *loopivs)
2448169689Skan{
2449169689Skan  basic_block *bbs;
2450169689Skan  tree exit_condition;
2451169689Skan  tree then_label, else_label, cond_stmt;
2452169689Skan  basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2453169689Skan  int i;
2454169689Skan  block_stmt_iterator bsi, firstbsi;
2455169689Skan  bool insert_after;
2456169689Skan  edge e;
2457169689Skan  struct loop *newloop;
2458169689Skan  tree phi;
2459169689Skan  tree uboundvar;
2460169689Skan  tree stmt;
2461169689Skan  tree oldivvar, ivvar, ivvarinced;
2462169689Skan  VEC(tree,heap) *phis = NULL;
2463169689Skan  htab_t replacements = NULL;
2464169689Skan
2465169689Skan  /* Create the new loop.  */
2466169689Skan  olddest = loop->single_exit->dest;
2467169689Skan  preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
2468169689Skan  headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2469169689Skan
2470169689Skan  /* Push the exit phi nodes that we are moving.  */
2471169689Skan  for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2472169689Skan    {
2473169689Skan      VEC_reserve (tree, heap, phis, 2);
2474169689Skan      VEC_quick_push (tree, phis, PHI_RESULT (phi));
2475169689Skan      VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2476169689Skan    }
2477169689Skan  e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2478169689Skan
2479169689Skan  /* Remove the exit phis from the old basic block.  Make sure to set
2480169689Skan     PHI_RESULT to null so it doesn't get released.  */
2481169689Skan  while (phi_nodes (olddest) != NULL)
2482169689Skan    {
2483169689Skan      SET_PHI_RESULT (phi_nodes (olddest), NULL);
2484169689Skan      remove_phi_node (phi_nodes (olddest), NULL);
2485169689Skan    }
2486169689Skan
2487169689Skan  /* and add them back to the new basic block.  */
2488169689Skan  while (VEC_length (tree, phis) != 0)
2489169689Skan    {
2490169689Skan      tree def;
2491169689Skan      tree phiname;
2492169689Skan      def = VEC_pop (tree, phis);
2493169689Skan      phiname = VEC_pop (tree, phis);
2494169689Skan      phi = create_phi_node (phiname, preheaderbb);
2495169689Skan      add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2496169689Skan    }
2497169689Skan  flush_pending_stmts (e);
2498169689Skan  VEC_free (tree, heap, phis);
2499169689Skan
2500169689Skan  bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2501169689Skan  latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2502169689Skan  make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2503169689Skan  then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2504169689Skan  else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2505169689Skan  cond_stmt = build3 (COND_EXPR, void_type_node,
2506169689Skan		      build2 (NE_EXPR, boolean_type_node,
2507169689Skan			      integer_one_node,
2508169689Skan			      integer_zero_node),
2509169689Skan		      then_label, else_label);
2510169689Skan  bsi = bsi_start (bodybb);
2511169689Skan  bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2512169689Skan  e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2513169689Skan  make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2514169689Skan  make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2515169689Skan
2516169689Skan  /* Update the loop structures.  */
2517169689Skan  newloop = duplicate_loop (loops, loop, olddest->loop_father);
2518169689Skan  newloop->header = headerbb;
2519169689Skan  newloop->latch = latchbb;
2520169689Skan  newloop->single_exit = e;
2521169689Skan  add_bb_to_loop (latchbb, newloop);
2522169689Skan  add_bb_to_loop (bodybb, newloop);
2523169689Skan  add_bb_to_loop (headerbb, newloop);
2524169689Skan  set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2525169689Skan  set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2526169689Skan  set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2527169689Skan			   loop->single_exit->src);
2528169689Skan  set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2529169689Skan  set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2530169689Skan  /* Create the new iv.  */
2531169689Skan  oldivvar = VEC_index (tree, loopivs, 0);
2532169689Skan  ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2533169689Skan  add_referenced_var (ivvar);
2534169689Skan  standard_iv_increment_position (newloop, &bsi, &insert_after);
2535169689Skan  create_iv (VEC_index (tree, lbounds, 0),
2536169689Skan	     build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2537169689Skan	     ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2538169689Skan
2539169689Skan  /* Create the new upper bound.  This may be not just a variable, so we copy
2540169689Skan     it to one just in case.  */
2541169689Skan
2542169689Skan  exit_condition = get_loop_exit_condition (newloop);
2543169689Skan  uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2544169689Skan  add_referenced_var (uboundvar);
2545169689Skan  stmt = build2 (MODIFY_EXPR, void_type_node, uboundvar,
2546169689Skan		 VEC_index (tree, ubounds, 0));
2547169689Skan  uboundvar = make_ssa_name (uboundvar, stmt);
2548169689Skan  TREE_OPERAND (stmt, 0) = uboundvar;
2549169689Skan
2550169689Skan  if (insert_after)
2551169689Skan    bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2552169689Skan  else
2553169689Skan    bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2554169689Skan  update_stmt (stmt);
2555169689Skan  COND_EXPR_COND (exit_condition) = build2 (GE_EXPR,
2556169689Skan					    boolean_type_node,
2557169689Skan					    uboundvar,
2558169689Skan					    ivvarinced);
2559169689Skan  update_stmt (exit_condition);
2560169689Skan  replacements = htab_create_ggc (20, tree_map_hash,
2561169689Skan				  tree_map_eq, NULL);
2562169689Skan  bbs = get_loop_body_in_dom_order (loop);
2563169689Skan  /* Now move the statements, and replace the induction variable in the moved
2564169689Skan     statements with the correct loop induction variable.  */
2565169689Skan  oldivvar = VEC_index (tree, loopivs, 0);
2566169689Skan  firstbsi = bsi_start (bodybb);
2567169689Skan  for (i = loop->num_nodes - 1; i >= 0 ; i--)
2568169689Skan    {
2569169689Skan      block_stmt_iterator tobsi = bsi_last (bodybb);
2570169689Skan      if (bbs[i]->loop_father == loop)
2571169689Skan	{
2572169689Skan	  /* If this is true, we are *before* the inner loop.
2573169689Skan	     If this isn't true, we are *after* it.
2574169689Skan
2575169689Skan	     The only time can_convert_to_perfect_nest returns true when we
2576169689Skan	     have statements before the inner loop is if they can be moved
2577169689Skan	     into the inner loop.
2578169689Skan
2579169689Skan	     The only time can_convert_to_perfect_nest returns true when we
2580169689Skan	     have statements after the inner loop is if they can be moved into
2581169689Skan	     the new split loop.  */
2582169689Skan
2583169689Skan	  if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2584169689Skan	    {
2585169689Skan	      block_stmt_iterator header_bsi
2586169689Skan		= bsi_after_labels (loop->inner->header);
2587169689Skan
2588169689Skan	      for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2589169689Skan		{
2590169689Skan		  tree stmt = bsi_stmt (bsi);
2591169689Skan
2592169689Skan		  if (stmt == exit_condition
2593169689Skan		      || not_interesting_stmt (stmt)
2594169689Skan		      || stmt_is_bumper_for_loop (loop, stmt))
2595169689Skan		    {
2596169689Skan		      bsi_next (&bsi);
2597169689Skan		      continue;
2598169689Skan		    }
2599169689Skan
2600169689Skan		  bsi_move_before (&bsi, &header_bsi);
2601169689Skan		}
2602169689Skan	    }
2603169689Skan	  else
2604169689Skan	    {
2605169689Skan	      /* Note that the bsi only needs to be explicitly incremented
2606169689Skan		 when we don't move something, since it is automatically
2607169689Skan		 incremented when we do.  */
2608169689Skan	      for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2609169689Skan		{
2610169689Skan		  ssa_op_iter i;
2611169689Skan		  tree n, stmt = bsi_stmt (bsi);
2612169689Skan
2613169689Skan		  if (stmt == exit_condition
2614169689Skan		      || not_interesting_stmt (stmt)
2615169689Skan		      || stmt_is_bumper_for_loop (loop, stmt))
2616169689Skan		    {
2617169689Skan		      bsi_next (&bsi);
2618169689Skan		      continue;
2619169689Skan		    }
2620169689Skan
2621169689Skan		  replace_uses_equiv_to_x_with_y
2622169689Skan		    (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2623169689Skan		     VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2624169689Skan
2625169689Skan		  bsi_move_before (&bsi, &tobsi);
2626169689Skan
2627169689Skan		  /* If the statement has any virtual operands, they may
2628169689Skan		     need to be rewired because the original loop may
2629169689Skan		     still reference them.  */
2630169689Skan		  FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2631169689Skan		    mark_sym_for_renaming (SSA_NAME_VAR (n));
2632169689Skan		}
2633169689Skan	    }
2634169689Skan
2635169689Skan	}
2636169689Skan    }
2637169689Skan
2638169689Skan  free (bbs);
2639169689Skan  htab_delete (replacements);
2640169689Skan  return perfect_nest_p (loop);
2641169689Skan}
2642169689Skan
2643169689Skan/* Return true if TRANS is a legal transformation matrix that respects
2644169689Skan   the dependence vectors in DISTS and DIRS.  The conservative answer
2645169689Skan   is false.
2646169689Skan
2647169689Skan   "Wolfe proves that a unimodular transformation represented by the
2648169689Skan   matrix T is legal when applied to a loop nest with a set of
2649169689Skan   lexicographically non-negative distance vectors RDG if and only if
2650169689Skan   for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2651169689Skan   i.e.: if and only if it transforms the lexicographically positive
2652169689Skan   distance vectors to lexicographically positive vectors.  Note that
2653169689Skan   a unimodular matrix must transform the zero vector (and only it) to
2654169689Skan   the zero vector." S.Muchnick.  */
2655169689Skan
2656169689Skanbool
2657169689Skanlambda_transform_legal_p (lambda_trans_matrix trans,
2658169689Skan			  int nb_loops,
2659169689Skan			  VEC (ddr_p, heap) *dependence_relations)
2660169689Skan{
2661169689Skan  unsigned int i, j;
2662169689Skan  lambda_vector distres;
2663169689Skan  struct data_dependence_relation *ddr;
2664169689Skan
2665169689Skan  gcc_assert (LTM_COLSIZE (trans) == nb_loops
2666169689Skan	      && LTM_ROWSIZE (trans) == nb_loops);
2667169689Skan
2668169689Skan  /* When there is an unknown relation in the dependence_relations, we
2669169689Skan     know that it is no worth looking at this loop nest: give up.  */
2670169689Skan  ddr = VEC_index (ddr_p, dependence_relations, 0);
2671169689Skan  if (ddr == NULL)
2672169689Skan    return true;
2673169689Skan  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2674169689Skan    return false;
2675169689Skan
2676169689Skan  distres = lambda_vector_new (nb_loops);
2677169689Skan
2678169689Skan  /* For each distance vector in the dependence graph.  */
2679169689Skan  for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
2680169689Skan    {
2681169689Skan      /* Don't care about relations for which we know that there is no
2682169689Skan	 dependence, nor about read-read (aka. output-dependences):
2683169689Skan	 these data accesses can happen in any order.  */
2684169689Skan      if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2685169689Skan	  || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2686169689Skan	continue;
2687169689Skan
2688169689Skan      /* Conservatively answer: "this transformation is not valid".  */
2689169689Skan      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2690169689Skan	return false;
2691169689Skan
2692169689Skan      /* If the dependence could not be captured by a distance vector,
2693169689Skan	 conservatively answer that the transform is not valid.  */
2694169689Skan      if (DDR_NUM_DIST_VECTS (ddr) == 0)
2695169689Skan	return false;
2696169689Skan
2697169689Skan      /* Compute trans.dist_vect */
2698169689Skan      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2699169689Skan	{
2700169689Skan	  lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2701169689Skan				     DDR_DIST_VECT (ddr, j), distres);
2702169689Skan
2703169689Skan	  if (!lambda_vector_lexico_pos (distres, nb_loops))
2704169689Skan	    return false;
2705169689Skan	}
2706169689Skan    }
2707169689Skan  return true;
2708169689Skan}
2709