1/*  Loop transformation code generation
2    Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dberlin@dberlin.org>
4
5    This file is part of GCC.
6
7    GCC is free software; you can redistribute it and/or modify it under
8    the terms of the GNU General Public License as published by the Free
9    Software Foundation; either version 2, or (at your option) any later
10    version.
11
12    GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or
14    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15    for more details.
16
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING.  If not, write to the Free
19    Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20    02110-1301, USA.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "ggc.h"
27#include "tree.h"
28#include "target.h"
29#include "rtl.h"
30#include "basic-block.h"
31#include "diagnostic.h"
32#include "tree-flow.h"
33#include "tree-dump.h"
34#include "timevar.h"
35#include "cfgloop.h"
36#include "expr.h"
37#include "optabs.h"
38#include "tree-chrec.h"
39#include "tree-data-ref.h"
40#include "tree-pass.h"
41#include "tree-scalar-evolution.h"
42#include "vec.h"
43#include "lambda.h"
44#include "vecprim.h"
45
46/* This loop nest code generation is based on non-singular matrix
47   math.
48
49 A little terminology and a general sketch of the algorithm.  See "A singular
50 loop transformation framework based on non-singular matrices" by Wei Li and
51 Keshav Pingali for formal proofs that the various statements below are
52 correct.
53
54 A loop iteration space represents the points traversed by the loop.  A point in the
55 iteration space can be represented by a vector of size <loop depth>.  You can
56 therefore represent the iteration space as an integral combinations of a set
57 of basis vectors.
58
59 A loop iteration space is dense if every integer point between the loop
60 bounds is a point in the iteration space.  Every loop with a step of 1
61 therefore has a dense iteration space.
62
63 for i = 1 to 3, step 1 is a dense iteration space.
64
65 A loop iteration space is sparse if it is not dense.  That is, the iteration
66 space skips integer points that are within the loop bounds.
67
68 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
69 2 is skipped.
70
71 Dense source spaces are easy to transform, because they don't skip any
72 points to begin with.  Thus we can compute the exact bounds of the target
73 space using min/max and floor/ceil.
74
75 For a dense source space, we take the transformation matrix, decompose it
76 into a lower triangular part (H) and a unimodular part (U).
77 We then compute the auxiliary space from the unimodular part (source loop
78 nest . U = auxiliary space) , which has two important properties:
79  1. It traverses the iterations in the same lexicographic order as the source
80  space.
81  2. It is a dense space when the source is a dense space (even if the target
82  space is going to be sparse).
83
84 Given the auxiliary space, we use the lower triangular part to compute the
85 bounds in the target space by simple matrix multiplication.
86 The gaps in the target space (IE the new loop step sizes) will be the
87 diagonals of the H matrix.
88
89 Sparse source spaces require another step, because you can't directly compute
90 the exact bounds of the auxiliary and target space from the sparse space.
91 Rather than try to come up with a separate algorithm to handle sparse source
92 spaces directly, we just find a legal transformation matrix that gives you
93 the sparse source space, from a dense space, and then transform the dense
94 space.
95
96 For a regular sparse space, you can represent the source space as an integer
97 lattice, and the base space of that lattice will always be dense.  Thus, we
98 effectively use the lattice to figure out the transformation from the lattice
99 base space, to the sparse iteration space (IE what transform was applied to
100 the dense space to make it sparse).  We then compose this transform with the
101 transformation matrix specified by the user (since our matrix transformations
102 are closed under composition, this is okay).  We can then use the base space
103 (which is dense) plus the composed transformation matrix, to compute the rest
104 of the transform using the dense space algorithm above.
105
106 In other words, our sparse source space (B) is decomposed into a dense base
107 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
108 We then compute the composition of L and the user transformation matrix (T),
109 so that T is now a transform from A to the result, instead of from B to the
110 result.
111 IE A.(LT) = result instead of B.T = result
112 Since A is now a dense source space, we can use the dense source space
113 algorithm above to compute the result of applying transform (LT) to A.
114
115 Fourier-Motzkin elimination is used to compute the bounds of the base space
116 of the lattice.  */
117
118static bool perfect_nestify (struct loops *,
119			     struct loop *, VEC(tree,heap) *,
120			     VEC(tree,heap) *, VEC(int,heap) *,
121			     VEC(tree,heap) *);
122/* Lattice stuff that is internal to the code generation algorithm.  */
123
124typedef struct
125{
126  /* Lattice base matrix.  */
127  lambda_matrix base;
128  /* Lattice dimension.  */
129  int dimension;
130  /* Origin vector for the coefficients.  */
131  lambda_vector origin;
132  /* Origin matrix for the invariants.  */
133  lambda_matrix origin_invariants;
134  /* Number of invariants.  */
135  int invariants;
136} *lambda_lattice;
137
138#define LATTICE_BASE(T) ((T)->base)
139#define LATTICE_DIMENSION(T) ((T)->dimension)
140#define LATTICE_ORIGIN(T) ((T)->origin)
141#define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
142#define LATTICE_INVARIANTS(T) ((T)->invariants)
143
144static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
145		       int, int);
146static lambda_lattice lambda_lattice_new (int, int);
147static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
148
149static tree find_induction_var_from_exit_cond (struct loop *);
150static bool can_convert_to_perfect_nest (struct loop *);
151
152/* Create a new lambda body vector.  */
153
154lambda_body_vector
155lambda_body_vector_new (int size)
156{
157  lambda_body_vector ret;
158
159  ret = ggc_alloc (sizeof (*ret));
160  LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
161  LBV_SIZE (ret) = size;
162  LBV_DENOMINATOR (ret) = 1;
163  return ret;
164}
165
166/* Compute the new coefficients for the vector based on the
167  *inverse* of the transformation matrix.  */
168
169lambda_body_vector
170lambda_body_vector_compute_new (lambda_trans_matrix transform,
171				lambda_body_vector vect)
172{
173  lambda_body_vector temp;
174  int depth;
175
176  /* Make sure the matrix is square.  */
177  gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
178
179  depth = LTM_ROWSIZE (transform);
180
181  temp = lambda_body_vector_new (depth);
182  LBV_DENOMINATOR (temp) =
183    LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
184  lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
185			     LTM_MATRIX (transform), depth,
186			     LBV_COEFFICIENTS (temp));
187  LBV_SIZE (temp) = LBV_SIZE (vect);
188  return temp;
189}
190
191/* Print out a lambda body vector.  */
192
193void
194print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
195{
196  print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
197}
198
199/* Return TRUE if two linear expressions are equal.  */
200
201static bool
202lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
203	   int depth, int invariants)
204{
205  int i;
206
207  if (lle1 == NULL || lle2 == NULL)
208    return false;
209  if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
210    return false;
211  if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
212    return false;
213  for (i = 0; i < depth; i++)
214    if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
215      return false;
216  for (i = 0; i < invariants; i++)
217    if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
218	LLE_INVARIANT_COEFFICIENTS (lle2)[i])
219      return false;
220  return true;
221}
222
223/* Create a new linear expression with dimension DIM, and total number
224   of invariants INVARIANTS.  */
225
226lambda_linear_expression
227lambda_linear_expression_new (int dim, int invariants)
228{
229  lambda_linear_expression ret;
230
231  ret = ggc_alloc_cleared (sizeof (*ret));
232
233  LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
234  LLE_CONSTANT (ret) = 0;
235  LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
236  LLE_DENOMINATOR (ret) = 1;
237  LLE_NEXT (ret) = NULL;
238
239  return ret;
240}
241
242/* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
243   The starting letter used for variable names is START.  */
244
245static void
246print_linear_expression (FILE * outfile, lambda_vector expr, int size,
247			 char start)
248{
249  int i;
250  bool first = true;
251  for (i = 0; i < size; i++)
252    {
253      if (expr[i] != 0)
254	{
255	  if (first)
256	    {
257	      if (expr[i] < 0)
258		fprintf (outfile, "-");
259	      first = false;
260	    }
261	  else if (expr[i] > 0)
262	    fprintf (outfile, " + ");
263	  else
264	    fprintf (outfile, " - ");
265	  if (abs (expr[i]) == 1)
266	    fprintf (outfile, "%c", start + i);
267	  else
268	    fprintf (outfile, "%d%c", abs (expr[i]), start + i);
269	}
270    }
271}
272
273/* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
274   depth/number of coefficients is given by DEPTH, the number of invariants is
275   given by INVARIANTS, and the character to start variable names with is given
276   by START.  */
277
278void
279print_lambda_linear_expression (FILE * outfile,
280				lambda_linear_expression expr,
281				int depth, int invariants, char start)
282{
283  fprintf (outfile, "\tLinear expression: ");
284  print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
285  fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
286  fprintf (outfile, "  invariants: ");
287  print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
288			   invariants, 'A');
289  fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
290}
291
292/* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
293   coefficients is given by DEPTH, the number of invariants is
294   given by INVARIANTS, and the character to start variable names with is given
295   by START.  */
296
297void
298print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
299		   int invariants, char start)
300{
301  int step;
302  lambda_linear_expression expr;
303
304  gcc_assert (loop);
305
306  expr = LL_LINEAR_OFFSET (loop);
307  step = LL_STEP (loop);
308  fprintf (outfile, "  step size = %d \n", step);
309
310  if (expr)
311    {
312      fprintf (outfile, "  linear offset: \n");
313      print_lambda_linear_expression (outfile, expr, depth, invariants,
314				      start);
315    }
316
317  fprintf (outfile, "  lower bound: \n");
318  for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
319    print_lambda_linear_expression (outfile, expr, depth, invariants, start);
320  fprintf (outfile, "  upper bound: \n");
321  for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
322    print_lambda_linear_expression (outfile, expr, depth, invariants, start);
323}
324
325/* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
326   number of invariants.  */
327
328lambda_loopnest
329lambda_loopnest_new (int depth, int invariants)
330{
331  lambda_loopnest ret;
332  ret = ggc_alloc (sizeof (*ret));
333
334  LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
335  LN_DEPTH (ret) = depth;
336  LN_INVARIANTS (ret) = invariants;
337
338  return ret;
339}
340
341/* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
342   character to use for loop names is given by START.  */
343
344void
345print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
346{
347  int i;
348  for (i = 0; i < LN_DEPTH (nest); i++)
349    {
350      fprintf (outfile, "Loop %c\n", start + i);
351      print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
352			 LN_INVARIANTS (nest), 'i');
353      fprintf (outfile, "\n");
354    }
355}
356
357/* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
358   of invariants.  */
359
360static lambda_lattice
361lambda_lattice_new (int depth, int invariants)
362{
363  lambda_lattice ret;
364  ret = ggc_alloc (sizeof (*ret));
365  LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
366  LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
367  LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
368  LATTICE_DIMENSION (ret) = depth;
369  LATTICE_INVARIANTS (ret) = invariants;
370  return ret;
371}
372
373/* Compute the lattice base for NEST.  The lattice base is essentially a
374   non-singular transform from a dense base space to a sparse iteration space.
375   We use it so that we don't have to specially handle the case of a sparse
376   iteration space in other parts of the algorithm.  As a result, this routine
377   only does something interesting (IE produce a matrix that isn't the
378   identity matrix) if NEST is a sparse space.  */
379
380static lambda_lattice
381lambda_lattice_compute_base (lambda_loopnest nest)
382{
383  lambda_lattice ret;
384  int depth, invariants;
385  lambda_matrix base;
386
387  int i, j, step;
388  lambda_loop loop;
389  lambda_linear_expression expression;
390
391  depth = LN_DEPTH (nest);
392  invariants = LN_INVARIANTS (nest);
393
394  ret = lambda_lattice_new (depth, invariants);
395  base = LATTICE_BASE (ret);
396  for (i = 0; i < depth; i++)
397    {
398      loop = LN_LOOPS (nest)[i];
399      gcc_assert (loop);
400      step = LL_STEP (loop);
401      /* If we have a step of 1, then the base is one, and the
402         origin and invariant coefficients are 0.  */
403      if (step == 1)
404	{
405	  for (j = 0; j < depth; j++)
406	    base[i][j] = 0;
407	  base[i][i] = 1;
408	  LATTICE_ORIGIN (ret)[i] = 0;
409	  for (j = 0; j < invariants; j++)
410	    LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
411	}
412      else
413	{
414	  /* Otherwise, we need the lower bound expression (which must
415	     be an affine function)  to determine the base.  */
416	  expression = LL_LOWER_BOUND (loop);
417	  gcc_assert (expression && !LLE_NEXT (expression)
418		      && LLE_DENOMINATOR (expression) == 1);
419
420	  /* The lower triangular portion of the base is going to be the
421	     coefficient times the step */
422	  for (j = 0; j < i; j++)
423	    base[i][j] = LLE_COEFFICIENTS (expression)[j]
424	      * LL_STEP (LN_LOOPS (nest)[j]);
425	  base[i][i] = step;
426	  for (j = i + 1; j < depth; j++)
427	    base[i][j] = 0;
428
429	  /* Origin for this loop is the constant of the lower bound
430	     expression.  */
431	  LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
432
433	  /* Coefficient for the invariants are equal to the invariant
434	     coefficients in the expression.  */
435	  for (j = 0; j < invariants; j++)
436	    LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
437	      LLE_INVARIANT_COEFFICIENTS (expression)[j];
438	}
439    }
440  return ret;
441}
442
443/* Compute the least common multiple of two numbers A and B .  */
444
445static int
446lcm (int a, int b)
447{
448  return (abs (a) * abs (b) / gcd (a, b));
449}
450
451/* Perform Fourier-Motzkin elimination to calculate the bounds of the
452   auxiliary nest.
453   Fourier-Motzkin is a way of reducing systems of linear inequalities so that
454   it is easy to calculate the answer and bounds.
455   A sketch of how it works:
456   Given a system of linear inequalities, ai * xj >= bk, you can always
457   rewrite the constraints so they are all of the form
458   a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
459   in b1 ... bk, and some a in a1...ai)
460   You can then eliminate this x from the non-constant inequalities by
461   rewriting these as a <= b, x >= constant, and delete the x variable.
462   You can then repeat this for any remaining x variables, and then we have
463   an easy to use variable <= constant (or no variables at all) form that we
464   can construct our bounds from.
465
466   In our case, each time we eliminate, we construct part of the bound from
467   the ith variable, then delete the ith variable.
468
469   Remember the constant are in our vector a, our coefficient matrix is A,
470   and our invariant coefficient matrix is B.
471
472   SIZE is the size of the matrices being passed.
473   DEPTH is the loop nest depth.
474   INVARIANTS is the number of loop invariants.
475   A, B, and a are the coefficient matrix, invariant coefficient, and a
476   vector of constants, respectively.  */
477
478static lambda_loopnest
479compute_nest_using_fourier_motzkin (int size,
480				    int depth,
481				    int invariants,
482				    lambda_matrix A,
483				    lambda_matrix B,
484				    lambda_vector a)
485{
486
487  int multiple, f1, f2;
488  int i, j, k;
489  lambda_linear_expression expression;
490  lambda_loop loop;
491  lambda_loopnest auxillary_nest;
492  lambda_matrix swapmatrix, A1, B1;
493  lambda_vector swapvector, a1;
494  int newsize;
495
496  A1 = lambda_matrix_new (128, depth);
497  B1 = lambda_matrix_new (128, invariants);
498  a1 = lambda_vector_new (128);
499
500  auxillary_nest = lambda_loopnest_new (depth, invariants);
501
502  for (i = depth - 1; i >= 0; i--)
503    {
504      loop = lambda_loop_new ();
505      LN_LOOPS (auxillary_nest)[i] = loop;
506      LL_STEP (loop) = 1;
507
508      for (j = 0; j < size; j++)
509	{
510	  if (A[j][i] < 0)
511	    {
512	      /* Any linear expression in the matrix with a coefficient less
513		 than 0 becomes part of the new lower bound.  */
514	      expression = lambda_linear_expression_new (depth, invariants);
515
516	      for (k = 0; k < i; k++)
517		LLE_COEFFICIENTS (expression)[k] = A[j][k];
518
519	      for (k = 0; k < invariants; k++)
520		LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
521
522	      LLE_DENOMINATOR (expression) = -1 * A[j][i];
523	      LLE_CONSTANT (expression) = -1 * a[j];
524
525	      /* Ignore if identical to the existing lower bound.  */
526	      if (!lle_equal (LL_LOWER_BOUND (loop),
527			      expression, depth, invariants))
528		{
529		  LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
530		  LL_LOWER_BOUND (loop) = expression;
531		}
532
533	    }
534	  else if (A[j][i] > 0)
535	    {
536	      /* Any linear expression with a coefficient greater than 0
537		 becomes part of the new upper bound.  */
538	      expression = lambda_linear_expression_new (depth, invariants);
539	      for (k = 0; k < i; k++)
540		LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
541
542	      for (k = 0; k < invariants; k++)
543		LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
544
545	      LLE_DENOMINATOR (expression) = A[j][i];
546	      LLE_CONSTANT (expression) = a[j];
547
548	      /* Ignore if identical to the existing upper bound.  */
549	      if (!lle_equal (LL_UPPER_BOUND (loop),
550			      expression, depth, invariants))
551		{
552		  LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
553		  LL_UPPER_BOUND (loop) = expression;
554		}
555
556	    }
557	}
558
559      /* This portion creates a new system of linear inequalities by deleting
560	 the i'th variable, reducing the system by one variable.  */
561      newsize = 0;
562      for (j = 0; j < size; j++)
563	{
564	  /* If the coefficient for the i'th variable is 0, then we can just
565	     eliminate the variable straightaway.  Otherwise, we have to
566	     multiply through by the coefficients we are eliminating.  */
567	  if (A[j][i] == 0)
568	    {
569	      lambda_vector_copy (A[j], A1[newsize], depth);
570	      lambda_vector_copy (B[j], B1[newsize], invariants);
571	      a1[newsize] = a[j];
572	      newsize++;
573	    }
574	  else if (A[j][i] > 0)
575	    {
576	      for (k = 0; k < size; k++)
577		{
578		  if (A[k][i] < 0)
579		    {
580		      multiple = lcm (A[j][i], A[k][i]);
581		      f1 = multiple / A[j][i];
582		      f2 = -1 * multiple / A[k][i];
583
584		      lambda_vector_add_mc (A[j], f1, A[k], f2,
585					    A1[newsize], depth);
586		      lambda_vector_add_mc (B[j], f1, B[k], f2,
587					    B1[newsize], invariants);
588		      a1[newsize] = f1 * a[j] + f2 * a[k];
589		      newsize++;
590		    }
591		}
592	    }
593	}
594
595      swapmatrix = A;
596      A = A1;
597      A1 = swapmatrix;
598
599      swapmatrix = B;
600      B = B1;
601      B1 = swapmatrix;
602
603      swapvector = a;
604      a = a1;
605      a1 = swapvector;
606
607      size = newsize;
608    }
609
610  return auxillary_nest;
611}
612
613/* Compute the loop bounds for the auxiliary space NEST.
614   Input system used is Ax <= b.  TRANS is the unimodular transformation.
615   Given the original nest, this function will
616   1. Convert the nest into matrix form, which consists of a matrix for the
617   coefficients, a matrix for the
618   invariant coefficients, and a vector for the constants.
619   2. Use the matrix form to calculate the lattice base for the nest (which is
620   a dense space)
621   3. Compose the dense space transform with the user specified transform, to
622   get a transform we can easily calculate transformed bounds for.
623   4. Multiply the composed transformation matrix times the matrix form of the
624   loop.
625   5. Transform the newly created matrix (from step 4) back into a loop nest
626   using Fourier-Motzkin elimination to figure out the bounds.  */
627
628static lambda_loopnest
629lambda_compute_auxillary_space (lambda_loopnest nest,
630				lambda_trans_matrix trans)
631{
632  lambda_matrix A, B, A1, B1;
633  lambda_vector a, a1;
634  lambda_matrix invertedtrans;
635  int depth, invariants, size;
636  int i, j;
637  lambda_loop loop;
638  lambda_linear_expression expression;
639  lambda_lattice lattice;
640
641  depth = LN_DEPTH (nest);
642  invariants = LN_INVARIANTS (nest);
643
644  /* Unfortunately, we can't know the number of constraints we'll have
645     ahead of time, but this should be enough even in ridiculous loop nest
646     cases. We must not go over this limit.  */
647  A = lambda_matrix_new (128, depth);
648  B = lambda_matrix_new (128, invariants);
649  a = lambda_vector_new (128);
650
651  A1 = lambda_matrix_new (128, depth);
652  B1 = lambda_matrix_new (128, invariants);
653  a1 = lambda_vector_new (128);
654
655  /* Store the bounds in the equation matrix A, constant vector a, and
656     invariant matrix B, so that we have Ax <= a + B.
657     This requires a little equation rearranging so that everything is on the
658     correct side of the inequality.  */
659  size = 0;
660  for (i = 0; i < depth; i++)
661    {
662      loop = LN_LOOPS (nest)[i];
663
664      /* First we do the lower bound.  */
665      if (LL_STEP (loop) > 0)
666	expression = LL_LOWER_BOUND (loop);
667      else
668	expression = LL_UPPER_BOUND (loop);
669
670      for (; expression != NULL; expression = LLE_NEXT (expression))
671	{
672	  /* Fill in the coefficient.  */
673	  for (j = 0; j < i; j++)
674	    A[size][j] = LLE_COEFFICIENTS (expression)[j];
675
676	  /* And the invariant coefficient.  */
677	  for (j = 0; j < invariants; j++)
678	    B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
679
680	  /* And the constant.  */
681	  a[size] = LLE_CONSTANT (expression);
682
683	  /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
684	     constants and single variables on   */
685	  A[size][i] = -1 * LLE_DENOMINATOR (expression);
686	  a[size] *= -1;
687	  for (j = 0; j < invariants; j++)
688	    B[size][j] *= -1;
689
690	  size++;
691	  /* Need to increase matrix sizes above.  */
692	  gcc_assert (size <= 127);
693
694	}
695
696      /* Then do the exact same thing for the upper bounds.  */
697      if (LL_STEP (loop) > 0)
698	expression = LL_UPPER_BOUND (loop);
699      else
700	expression = LL_LOWER_BOUND (loop);
701
702      for (; expression != NULL; expression = LLE_NEXT (expression))
703	{
704	  /* Fill in the coefficient.  */
705	  for (j = 0; j < i; j++)
706	    A[size][j] = LLE_COEFFICIENTS (expression)[j];
707
708	  /* And the invariant coefficient.  */
709	  for (j = 0; j < invariants; j++)
710	    B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
711
712	  /* And the constant.  */
713	  a[size] = LLE_CONSTANT (expression);
714
715	  /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
716	  for (j = 0; j < i; j++)
717	    A[size][j] *= -1;
718	  A[size][i] = LLE_DENOMINATOR (expression);
719	  size++;
720	  /* Need to increase matrix sizes above.  */
721	  gcc_assert (size <= 127);
722
723	}
724    }
725
726  /* Compute the lattice base x = base * y + origin, where y is the
727     base space.  */
728  lattice = lambda_lattice_compute_base (nest);
729
730  /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
731
732  /* A1 = A * L */
733  lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
734
735  /* a1 = a - A * origin constant.  */
736  lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
737  lambda_vector_add_mc (a, 1, a1, -1, a1, size);
738
739  /* B1 = B - A * origin invariant.  */
740  lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
741		      invariants);
742  lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
743
744  /* Now compute the auxiliary space bounds by first inverting U, multiplying
745     it by A1, then performing Fourier-Motzkin.  */
746
747  invertedtrans = lambda_matrix_new (depth, depth);
748
749  /* Compute the inverse of U.  */
750  lambda_matrix_inverse (LTM_MATRIX (trans),
751			 invertedtrans, depth);
752
753  /* A = A1 inv(U).  */
754  lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
755
756  return compute_nest_using_fourier_motzkin (size, depth, invariants,
757					     A, B1, a1);
758}
759
760/* Compute the loop bounds for the target space, using the bounds of
761   the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
762   The target space loop bounds are computed by multiplying the triangular
763   matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
764   the loop steps (positive or negative) is then used to swap the bounds if
765   the loop counts downwards.
766   Return the target loopnest.  */
767
768static lambda_loopnest
769lambda_compute_target_space (lambda_loopnest auxillary_nest,
770			     lambda_trans_matrix H, lambda_vector stepsigns)
771{
772  lambda_matrix inverse, H1;
773  int determinant, i, j;
774  int gcd1, gcd2;
775  int factor;
776
777  lambda_loopnest target_nest;
778  int depth, invariants;
779  lambda_matrix target;
780
781  lambda_loop auxillary_loop, target_loop;
782  lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
783
784  depth = LN_DEPTH (auxillary_nest);
785  invariants = LN_INVARIANTS (auxillary_nest);
786
787  inverse = lambda_matrix_new (depth, depth);
788  determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
789
790  /* H1 is H excluding its diagonal.  */
791  H1 = lambda_matrix_new (depth, depth);
792  lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
793
794  for (i = 0; i < depth; i++)
795    H1[i][i] = 0;
796
797  /* Computes the linear offsets of the loop bounds.  */
798  target = lambda_matrix_new (depth, depth);
799  lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
800
801  target_nest = lambda_loopnest_new (depth, invariants);
802
803  for (i = 0; i < depth; i++)
804    {
805
806      /* Get a new loop structure.  */
807      target_loop = lambda_loop_new ();
808      LN_LOOPS (target_nest)[i] = target_loop;
809
810      /* Computes the gcd of the coefficients of the linear part.  */
811      gcd1 = lambda_vector_gcd (target[i], i);
812
813      /* Include the denominator in the GCD.  */
814      gcd1 = gcd (gcd1, determinant);
815
816      /* Now divide through by the gcd.  */
817      for (j = 0; j < i; j++)
818	target[i][j] = target[i][j] / gcd1;
819
820      expression = lambda_linear_expression_new (depth, invariants);
821      lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
822      LLE_DENOMINATOR (expression) = determinant / gcd1;
823      LLE_CONSTANT (expression) = 0;
824      lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
825			   invariants);
826      LL_LINEAR_OFFSET (target_loop) = expression;
827    }
828
829  /* For each loop, compute the new bounds from H.  */
830  for (i = 0; i < depth; i++)
831    {
832      auxillary_loop = LN_LOOPS (auxillary_nest)[i];
833      target_loop = LN_LOOPS (target_nest)[i];
834      LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
835      factor = LTM_MATRIX (H)[i][i];
836
837      /* First we do the lower bound.  */
838      auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
839
840      for (; auxillary_expr != NULL;
841	   auxillary_expr = LLE_NEXT (auxillary_expr))
842	{
843	  target_expr = lambda_linear_expression_new (depth, invariants);
844	  lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
845				     depth, inverse, depth,
846				     LLE_COEFFICIENTS (target_expr));
847	  lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
848				    LLE_COEFFICIENTS (target_expr), depth,
849				    factor);
850
851	  LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
852	  lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
853			      LLE_INVARIANT_COEFFICIENTS (target_expr),
854			      invariants);
855	  lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
856				    LLE_INVARIANT_COEFFICIENTS (target_expr),
857				    invariants, factor);
858	  LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
859
860	  if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
861	    {
862	      LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
863		* determinant;
864	      lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
865					(target_expr),
866					LLE_INVARIANT_COEFFICIENTS
867					(target_expr), invariants,
868					determinant);
869	      LLE_DENOMINATOR (target_expr) =
870		LLE_DENOMINATOR (target_expr) * determinant;
871	    }
872	  /* Find the gcd and divide by it here, rather than doing it
873	     at the tree level.  */
874	  gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
875	  gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
876				    invariants);
877	  gcd1 = gcd (gcd1, gcd2);
878	  gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
879	  gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
880	  for (j = 0; j < depth; j++)
881	    LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
882	  for (j = 0; j < invariants; j++)
883	    LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
884	  LLE_CONSTANT (target_expr) /= gcd1;
885	  LLE_DENOMINATOR (target_expr) /= gcd1;
886	  /* Ignore if identical to existing bound.  */
887	  if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
888			  invariants))
889	    {
890	      LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
891	      LL_LOWER_BOUND (target_loop) = target_expr;
892	    }
893	}
894      /* Now do the upper bound.  */
895      auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
896
897      for (; auxillary_expr != NULL;
898	   auxillary_expr = LLE_NEXT (auxillary_expr))
899	{
900	  target_expr = lambda_linear_expression_new (depth, invariants);
901	  lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
902				     depth, inverse, depth,
903				     LLE_COEFFICIENTS (target_expr));
904	  lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
905				    LLE_COEFFICIENTS (target_expr), depth,
906				    factor);
907	  LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
908	  lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
909			      LLE_INVARIANT_COEFFICIENTS (target_expr),
910			      invariants);
911	  lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
912				    LLE_INVARIANT_COEFFICIENTS (target_expr),
913				    invariants, factor);
914	  LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
915
916	  if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
917	    {
918	      LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
919		* determinant;
920	      lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
921					(target_expr),
922					LLE_INVARIANT_COEFFICIENTS
923					(target_expr), invariants,
924					determinant);
925	      LLE_DENOMINATOR (target_expr) =
926		LLE_DENOMINATOR (target_expr) * determinant;
927	    }
928	  /* Find the gcd and divide by it here, instead of at the
929	     tree level.  */
930	  gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
931	  gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
932				    invariants);
933	  gcd1 = gcd (gcd1, gcd2);
934	  gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
935	  gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
936	  for (j = 0; j < depth; j++)
937	    LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
938	  for (j = 0; j < invariants; j++)
939	    LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
940	  LLE_CONSTANT (target_expr) /= gcd1;
941	  LLE_DENOMINATOR (target_expr) /= gcd1;
942	  /* Ignore if equal to existing bound.  */
943	  if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
944			  invariants))
945	    {
946	      LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
947	      LL_UPPER_BOUND (target_loop) = target_expr;
948	    }
949	}
950    }
951  for (i = 0; i < depth; i++)
952    {
953      target_loop = LN_LOOPS (target_nest)[i];
954      /* If necessary, exchange the upper and lower bounds and negate
955         the step size.  */
956      if (stepsigns[i] < 0)
957	{
958	  LL_STEP (target_loop) *= -1;
959	  tmp_expr = LL_LOWER_BOUND (target_loop);
960	  LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
961	  LL_UPPER_BOUND (target_loop) = tmp_expr;
962	}
963    }
964  return target_nest;
965}
966
967/* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
968   result.  */
969
970static lambda_vector
971lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
972{
973  lambda_matrix matrix, H;
974  int size;
975  lambda_vector newsteps;
976  int i, j, factor, minimum_column;
977  int temp;
978
979  matrix = LTM_MATRIX (trans);
980  size = LTM_ROWSIZE (trans);
981  H = lambda_matrix_new (size, size);
982
983  newsteps = lambda_vector_new (size);
984  lambda_vector_copy (stepsigns, newsteps, size);
985
986  lambda_matrix_copy (matrix, H, size, size);
987
988  for (j = 0; j < size; j++)
989    {
990      lambda_vector row;
991      row = H[j];
992      for (i = j; i < size; i++)
993	if (row[i] < 0)
994	  lambda_matrix_col_negate (H, size, i);
995      while (lambda_vector_first_nz (row, size, j + 1) < size)
996	{
997	  minimum_column = lambda_vector_min_nz (row, size, j);
998	  lambda_matrix_col_exchange (H, size, j, minimum_column);
999
1000	  temp = newsteps[j];
1001	  newsteps[j] = newsteps[minimum_column];
1002	  newsteps[minimum_column] = temp;
1003
1004	  for (i = j + 1; i < size; i++)
1005	    {
1006	      factor = row[i] / row[j];
1007	      lambda_matrix_col_add (H, size, j, i, -1 * factor);
1008	    }
1009	}
1010    }
1011  return newsteps;
1012}
1013
1014/* Transform NEST according to TRANS, and return the new loopnest.
1015   This involves
1016   1. Computing a lattice base for the transformation
1017   2. Composing the dense base with the specified transformation (TRANS)
1018   3. Decomposing the combined transformation into a lower triangular portion,
1019   and a unimodular portion.
1020   4. Computing the auxiliary nest using the unimodular portion.
1021   5. Computing the target nest using the auxiliary nest and the lower
1022   triangular portion.  */
1023
1024lambda_loopnest
1025lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1026{
1027  lambda_loopnest auxillary_nest, target_nest;
1028
1029  int depth, invariants;
1030  int i, j;
1031  lambda_lattice lattice;
1032  lambda_trans_matrix trans1, H, U;
1033  lambda_loop loop;
1034  lambda_linear_expression expression;
1035  lambda_vector origin;
1036  lambda_matrix origin_invariants;
1037  lambda_vector stepsigns;
1038  int f;
1039
1040  depth = LN_DEPTH (nest);
1041  invariants = LN_INVARIANTS (nest);
1042
1043  /* Keep track of the signs of the loop steps.  */
1044  stepsigns = lambda_vector_new (depth);
1045  for (i = 0; i < depth; i++)
1046    {
1047      if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1048	stepsigns[i] = 1;
1049      else
1050	stepsigns[i] = -1;
1051    }
1052
1053  /* Compute the lattice base.  */
1054  lattice = lambda_lattice_compute_base (nest);
1055  trans1 = lambda_trans_matrix_new (depth, depth);
1056
1057  /* Multiply the transformation matrix by the lattice base.  */
1058
1059  lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1060		      LTM_MATRIX (trans1), depth, depth, depth);
1061
1062  /* Compute the Hermite normal form for the new transformation matrix.  */
1063  H = lambda_trans_matrix_new (depth, depth);
1064  U = lambda_trans_matrix_new (depth, depth);
1065  lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1066			 LTM_MATRIX (U));
1067
1068  /* Compute the auxiliary loop nest's space from the unimodular
1069     portion.  */
1070  auxillary_nest = lambda_compute_auxillary_space (nest, U);
1071
1072  /* Compute the loop step signs from the old step signs and the
1073     transformation matrix.  */
1074  stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1075
1076  /* Compute the target loop nest space from the auxiliary nest and
1077     the lower triangular matrix H.  */
1078  target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1079  origin = lambda_vector_new (depth);
1080  origin_invariants = lambda_matrix_new (depth, invariants);
1081  lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1082			     LATTICE_ORIGIN (lattice), origin);
1083  lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1084		      origin_invariants, depth, depth, invariants);
1085
1086  for (i = 0; i < depth; i++)
1087    {
1088      loop = LN_LOOPS (target_nest)[i];
1089      expression = LL_LINEAR_OFFSET (loop);
1090      if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1091	f = 1;
1092      else
1093	f = LLE_DENOMINATOR (expression);
1094
1095      LLE_CONSTANT (expression) += f * origin[i];
1096
1097      for (j = 0; j < invariants; j++)
1098	LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1099	  f * origin_invariants[i][j];
1100    }
1101
1102  return target_nest;
1103
1104}
1105
1106/* Convert a gcc tree expression EXPR to a lambda linear expression, and
1107   return the new expression.  DEPTH is the depth of the loopnest.
1108   OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1109   in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1110   is the amount we have to add/subtract from the expression because of the
1111   type of comparison it is used in.  */
1112
1113static lambda_linear_expression
1114gcc_tree_to_linear_expression (int depth, tree expr,
1115			       VEC(tree,heap) *outerinductionvars,
1116			       VEC(tree,heap) *invariants, int extra)
1117{
1118  lambda_linear_expression lle = NULL;
1119  switch (TREE_CODE (expr))
1120    {
1121    case INTEGER_CST:
1122      {
1123	lle = lambda_linear_expression_new (depth, 2 * depth);
1124	LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1125	if (extra != 0)
1126	  LLE_CONSTANT (lle) += extra;
1127
1128	LLE_DENOMINATOR (lle) = 1;
1129      }
1130      break;
1131    case SSA_NAME:
1132      {
1133	tree iv, invar;
1134	size_t i;
1135	for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1136	  if (iv != NULL)
1137	    {
1138	      if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1139		{
1140		  lle = lambda_linear_expression_new (depth, 2 * depth);
1141		  LLE_COEFFICIENTS (lle)[i] = 1;
1142		  if (extra != 0)
1143		    LLE_CONSTANT (lle) = extra;
1144
1145		  LLE_DENOMINATOR (lle) = 1;
1146		}
1147	    }
1148	for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1149	  if (invar != NULL)
1150	    {
1151	      if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1152		{
1153		  lle = lambda_linear_expression_new (depth, 2 * depth);
1154		  LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1155		  if (extra != 0)
1156		    LLE_CONSTANT (lle) = extra;
1157		  LLE_DENOMINATOR (lle) = 1;
1158		}
1159	    }
1160      }
1161      break;
1162    default:
1163      return NULL;
1164    }
1165
1166  return lle;
1167}
1168
1169/* Return the depth of the loopnest NEST */
1170
1171static int
1172depth_of_nest (struct loop *nest)
1173{
1174  size_t depth = 0;
1175  while (nest)
1176    {
1177      depth++;
1178      nest = nest->inner;
1179    }
1180  return depth;
1181}
1182
1183
1184/* Return true if OP is invariant in LOOP and all outer loops.  */
1185
1186static bool
1187invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1188{
1189  if (is_gimple_min_invariant (op))
1190    return true;
1191  if (loop->depth == 0)
1192    return true;
1193  if (!expr_invariant_in_loop_p (loop, op))
1194    return false;
1195  if (loop->outer
1196      && !invariant_in_loop_and_outer_loops (loop->outer, op))
1197    return false;
1198  return true;
1199}
1200
1201/* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1202   or NULL if it could not be converted.
1203   DEPTH is the depth of the loop.
1204   INVARIANTS is a pointer to the array of loop invariants.
1205   The induction variable for this loop should be stored in the parameter
1206   OURINDUCTIONVAR.
1207   OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1208
1209static lambda_loop
1210gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1211			 VEC(tree,heap) ** invariants,
1212			 tree * ourinductionvar,
1213			 VEC(tree,heap) * outerinductionvars,
1214			 VEC(tree,heap) ** lboundvars,
1215			 VEC(tree,heap) ** uboundvars,
1216			 VEC(int,heap) ** steps)
1217{
1218  tree phi;
1219  tree exit_cond;
1220  tree access_fn, inductionvar;
1221  tree step;
1222  lambda_loop lloop = NULL;
1223  lambda_linear_expression lbound, ubound;
1224  tree test;
1225  int stepint;
1226  int extra = 0;
1227  tree lboundvar, uboundvar, uboundresult;
1228
1229  /* Find out induction var and exit condition.  */
1230  inductionvar = find_induction_var_from_exit_cond (loop);
1231  exit_cond = get_loop_exit_condition (loop);
1232
1233  if (inductionvar == NULL || exit_cond == NULL)
1234    {
1235      if (dump_file && (dump_flags & TDF_DETAILS))
1236	fprintf (dump_file,
1237		 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1238      return NULL;
1239    }
1240
1241  test = TREE_OPERAND (exit_cond, 0);
1242
1243  if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1244    {
1245
1246      if (dump_file && (dump_flags & TDF_DETAILS))
1247	fprintf (dump_file,
1248		 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1249
1250      return NULL;
1251    }
1252
1253  phi = SSA_NAME_DEF_STMT (inductionvar);
1254  if (TREE_CODE (phi) != PHI_NODE)
1255    {
1256      phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1257      if (!phi)
1258	{
1259
1260	  if (dump_file && (dump_flags & TDF_DETAILS))
1261	    fprintf (dump_file,
1262		     "Unable to convert loop: Cannot find PHI node for induction variable\n");
1263
1264	  return NULL;
1265	}
1266
1267      phi = SSA_NAME_DEF_STMT (phi);
1268      if (TREE_CODE (phi) != PHI_NODE)
1269	{
1270
1271	  if (dump_file && (dump_flags & TDF_DETAILS))
1272	    fprintf (dump_file,
1273		     "Unable to convert loop: Cannot find PHI node for induction variable\n");
1274	  return NULL;
1275	}
1276
1277    }
1278
1279  /* The induction variable name/version we want to put in the array is the
1280     result of the induction variable phi node.  */
1281  *ourinductionvar = PHI_RESULT (phi);
1282  access_fn = instantiate_parameters
1283    (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1284  if (access_fn == chrec_dont_know)
1285    {
1286      if (dump_file && (dump_flags & TDF_DETAILS))
1287	fprintf (dump_file,
1288		 "Unable to convert loop: Access function for induction variable phi is unknown\n");
1289
1290      return NULL;
1291    }
1292
1293  step = evolution_part_in_loop_num (access_fn, loop->num);
1294  if (!step || step == chrec_dont_know)
1295    {
1296      if (dump_file && (dump_flags & TDF_DETAILS))
1297	fprintf (dump_file,
1298		 "Unable to convert loop: Cannot determine step of loop.\n");
1299
1300      return NULL;
1301    }
1302  if (TREE_CODE (step) != INTEGER_CST)
1303    {
1304
1305      if (dump_file && (dump_flags & TDF_DETAILS))
1306	fprintf (dump_file,
1307		 "Unable to convert loop: Step of loop is not integer.\n");
1308      return NULL;
1309    }
1310
1311  stepint = TREE_INT_CST_LOW (step);
1312
1313  /* Only want phis for induction vars, which will have two
1314     arguments.  */
1315  if (PHI_NUM_ARGS (phi) != 2)
1316    {
1317      if (dump_file && (dump_flags & TDF_DETAILS))
1318	fprintf (dump_file,
1319		 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1320      return NULL;
1321    }
1322
1323  /* Another induction variable check. One argument's source should be
1324     in the loop, one outside the loop.  */
1325  if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1326      && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1327    {
1328
1329      if (dump_file && (dump_flags & TDF_DETAILS))
1330	fprintf (dump_file,
1331		 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1332
1333      return NULL;
1334    }
1335
1336  if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1337    {
1338      lboundvar = PHI_ARG_DEF (phi, 1);
1339      lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1340					      outerinductionvars, *invariants,
1341					      0);
1342    }
1343  else
1344    {
1345      lboundvar = PHI_ARG_DEF (phi, 0);
1346      lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1347					      outerinductionvars, *invariants,
1348					      0);
1349    }
1350
1351  if (!lbound)
1352    {
1353
1354      if (dump_file && (dump_flags & TDF_DETAILS))
1355	fprintf (dump_file,
1356		 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1357
1358      return NULL;
1359    }
1360  /* One part of the test may be a loop invariant tree.  */
1361  VEC_reserve (tree, heap, *invariants, 1);
1362  if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1363      && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1364    VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1365  else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1366	   && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1367    VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1368
1369  /* The non-induction variable part of the test is the upper bound variable.
1370   */
1371  if (TREE_OPERAND (test, 0) == inductionvar)
1372    uboundvar = TREE_OPERAND (test, 1);
1373  else
1374    uboundvar = TREE_OPERAND (test, 0);
1375
1376
1377  /* We only size the vectors assuming we have, at max, 2 times as many
1378     invariants as we do loops (one for each bound).
1379     This is just an arbitrary number, but it has to be matched against the
1380     code below.  */
1381  gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1382
1383
1384  /* We might have some leftover.  */
1385  if (TREE_CODE (test) == LT_EXPR)
1386    extra = -1 * stepint;
1387  else if (TREE_CODE (test) == NE_EXPR)
1388    extra = -1 * stepint;
1389  else if (TREE_CODE (test) == GT_EXPR)
1390    extra = -1 * stepint;
1391  else if (TREE_CODE (test) == EQ_EXPR)
1392    extra = 1 * stepint;
1393
1394  ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1395					  outerinductionvars,
1396					  *invariants, extra);
1397  uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1398			 build_int_cst (TREE_TYPE (uboundvar), extra));
1399  VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1400  VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1401  VEC_safe_push (int, heap, *steps, stepint);
1402  if (!ubound)
1403    {
1404      if (dump_file && (dump_flags & TDF_DETAILS))
1405	fprintf (dump_file,
1406		 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1407      return NULL;
1408    }
1409
1410  lloop = lambda_loop_new ();
1411  LL_STEP (lloop) = stepint;
1412  LL_LOWER_BOUND (lloop) = lbound;
1413  LL_UPPER_BOUND (lloop) = ubound;
1414  return lloop;
1415}
1416
1417/* Given a LOOP, find the induction variable it is testing against in the exit
1418   condition.  Return the induction variable if found, NULL otherwise.  */
1419
1420static tree
1421find_induction_var_from_exit_cond (struct loop *loop)
1422{
1423  tree expr = get_loop_exit_condition (loop);
1424  tree ivarop;
1425  tree test;
1426  if (expr == NULL_TREE)
1427    return NULL_TREE;
1428  if (TREE_CODE (expr) != COND_EXPR)
1429    return NULL_TREE;
1430  test = TREE_OPERAND (expr, 0);
1431  if (!COMPARISON_CLASS_P (test))
1432    return NULL_TREE;
1433
1434  /* Find the side that is invariant in this loop. The ivar must be the other
1435     side.  */
1436
1437  if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1438      ivarop = TREE_OPERAND (test, 1);
1439  else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1440      ivarop = TREE_OPERAND (test, 0);
1441  else
1442    return NULL_TREE;
1443
1444  if (TREE_CODE (ivarop) != SSA_NAME)
1445    return NULL_TREE;
1446  return ivarop;
1447}
1448
1449DEF_VEC_P(lambda_loop);
1450DEF_VEC_ALLOC_P(lambda_loop,heap);
1451
1452/* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1453   Return the new loop nest.
1454   INDUCTIONVARS is a pointer to an array of induction variables for the
1455   loopnest that will be filled in during this process.
1456   INVARIANTS is a pointer to an array of invariants that will be filled in
1457   during this process.  */
1458
1459lambda_loopnest
1460gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1461				 struct loop *loop_nest,
1462				 VEC(tree,heap) **inductionvars,
1463				 VEC(tree,heap) **invariants)
1464{
1465  lambda_loopnest ret = NULL;
1466  struct loop *temp = loop_nest;
1467  int depth = depth_of_nest (loop_nest);
1468  size_t i;
1469  VEC(lambda_loop,heap) *loops = NULL;
1470  VEC(tree,heap) *uboundvars = NULL;
1471  VEC(tree,heap) *lboundvars  = NULL;
1472  VEC(int,heap) *steps = NULL;
1473  lambda_loop newloop;
1474  tree inductionvar = NULL;
1475  bool perfect_nest = perfect_nest_p (loop_nest);
1476
1477  if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1478    goto fail;
1479
1480  while (temp)
1481    {
1482      newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1483					 &inductionvar, *inductionvars,
1484					 &lboundvars, &uboundvars,
1485					 &steps);
1486      if (!newloop)
1487	goto fail;
1488
1489      VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1490      VEC_safe_push (lambda_loop, heap, loops, newloop);
1491      temp = temp->inner;
1492    }
1493
1494  if (!perfect_nest)
1495    {
1496      if (!perfect_nestify (currloops, loop_nest,
1497			    lboundvars, uboundvars, steps, *inductionvars))
1498	{
1499	  if (dump_file)
1500	    fprintf (dump_file,
1501		     "Not a perfect loop nest and couldn't convert to one.\n");
1502	  goto fail;
1503	}
1504      else if (dump_file)
1505	fprintf (dump_file,
1506		 "Successfully converted loop nest to perfect loop nest.\n");
1507    }
1508
1509  ret = lambda_loopnest_new (depth, 2 * depth);
1510
1511  for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1512    LN_LOOPS (ret)[i] = newloop;
1513
1514 fail:
1515  VEC_free (lambda_loop, heap, loops);
1516  VEC_free (tree, heap, uboundvars);
1517  VEC_free (tree, heap, lboundvars);
1518  VEC_free (int, heap, steps);
1519
1520  return ret;
1521}
1522
1523/* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1524   STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1525   inserted for us are stored.  INDUCTION_VARS is the array of induction
1526   variables for the loop this LBV is from.  TYPE is the tree type to use for
1527   the variables and trees involved.  */
1528
1529static tree
1530lbv_to_gcc_expression (lambda_body_vector lbv,
1531		       tree type, VEC(tree,heap) *induction_vars,
1532		       tree *stmts_to_insert)
1533{
1534  tree stmts, stmt, resvar, name;
1535  tree iv;
1536  size_t i;
1537  tree_stmt_iterator tsi;
1538
1539  /* Create a statement list and a linear expression temporary.  */
1540  stmts = alloc_stmt_list ();
1541  resvar = create_tmp_var (type, "lbvtmp");
1542  add_referenced_var (resvar);
1543
1544  /* Start at 0.  */
1545  stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1546  name = make_ssa_name (resvar, stmt);
1547  TREE_OPERAND (stmt, 0) = name;
1548  tsi = tsi_last (stmts);
1549  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1550
1551  for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1552    {
1553      if (LBV_COEFFICIENTS (lbv)[i] != 0)
1554	{
1555	  tree newname;
1556	  tree coeffmult;
1557
1558	  /* newname = coefficient * induction_variable */
1559	  coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1560	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1561			 fold_build2 (MULT_EXPR, type, iv, coeffmult));
1562
1563	  newname = make_ssa_name (resvar, stmt);
1564	  TREE_OPERAND (stmt, 0) = newname;
1565	  fold_stmt (&stmt);
1566	  tsi = tsi_last (stmts);
1567	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1568
1569	  /* name = name + newname */
1570	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1571			 build2 (PLUS_EXPR, type, name, newname));
1572	  name = make_ssa_name (resvar, stmt);
1573	  TREE_OPERAND (stmt, 0) = name;
1574	  fold_stmt (&stmt);
1575	  tsi = tsi_last (stmts);
1576	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1577
1578	}
1579    }
1580
1581  /* Handle any denominator that occurs.  */
1582  if (LBV_DENOMINATOR (lbv) != 1)
1583    {
1584      tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1585      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1586		     build2 (CEIL_DIV_EXPR, type, name, denominator));
1587      name = make_ssa_name (resvar, stmt);
1588      TREE_OPERAND (stmt, 0) = name;
1589      fold_stmt (&stmt);
1590      tsi = tsi_last (stmts);
1591      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1592    }
1593  *stmts_to_insert = stmts;
1594  return name;
1595}
1596
1597/* Convert a linear expression from coefficient and constant form to a
1598   gcc tree.
1599   Return the tree that represents the final value of the expression.
1600   LLE is the linear expression to convert.
1601   OFFSET is the linear offset to apply to the expression.
1602   TYPE is the tree type to use for the variables and math.
1603   INDUCTION_VARS is a vector of induction variables for the loops.
1604   INVARIANTS is a vector of the loop nest invariants.
1605   WRAP specifies what tree code to wrap the results in, if there is more than
1606   one (it is either MAX_EXPR, or MIN_EXPR).
1607   STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1608   statements that need to be inserted for the linear expression.  */
1609
1610static tree
1611lle_to_gcc_expression (lambda_linear_expression lle,
1612		       lambda_linear_expression offset,
1613		       tree type,
1614		       VEC(tree,heap) *induction_vars,
1615		       VEC(tree,heap) *invariants,
1616		       enum tree_code wrap, tree *stmts_to_insert)
1617{
1618  tree stmts, stmt, resvar, name;
1619  size_t i;
1620  tree_stmt_iterator tsi;
1621  tree iv, invar;
1622  VEC(tree,heap) *results = NULL;
1623
1624  gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1625  name = NULL_TREE;
1626  /* Create a statement list and a linear expression temporary.  */
1627  stmts = alloc_stmt_list ();
1628  resvar = create_tmp_var (type, "lletmp");
1629  add_referenced_var (resvar);
1630
1631  /* Build up the linear expressions, and put the variable representing the
1632     result in the results array.  */
1633  for (; lle != NULL; lle = LLE_NEXT (lle))
1634    {
1635      /* Start at name = 0.  */
1636      stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1637      name = make_ssa_name (resvar, stmt);
1638      TREE_OPERAND (stmt, 0) = name;
1639      fold_stmt (&stmt);
1640      tsi = tsi_last (stmts);
1641      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1642
1643      /* First do the induction variables.
1644         at the end, name = name + all the induction variables added
1645         together.  */
1646      for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1647	{
1648	  if (LLE_COEFFICIENTS (lle)[i] != 0)
1649	    {
1650	      tree newname;
1651	      tree mult;
1652	      tree coeff;
1653
1654	      /* mult = induction variable * coefficient.  */
1655	      if (LLE_COEFFICIENTS (lle)[i] == 1)
1656		{
1657		  mult = VEC_index (tree, induction_vars, i);
1658		}
1659	      else
1660		{
1661		  coeff = build_int_cst (type,
1662					 LLE_COEFFICIENTS (lle)[i]);
1663		  mult = fold_build2 (MULT_EXPR, type, iv, coeff);
1664		}
1665
1666	      /* newname = mult */
1667	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult);
1668	      newname = make_ssa_name (resvar, stmt);
1669	      TREE_OPERAND (stmt, 0) = newname;
1670	      fold_stmt (&stmt);
1671	      tsi = tsi_last (stmts);
1672	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1673
1674	      /* name = name + newname */
1675	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1676			     build2 (PLUS_EXPR, type, name, newname));
1677	      name = make_ssa_name (resvar, stmt);
1678	      TREE_OPERAND (stmt, 0) = name;
1679	      fold_stmt (&stmt);
1680	      tsi = tsi_last (stmts);
1681	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1682	    }
1683	}
1684
1685      /* Handle our invariants.
1686         At the end, we have name = name + result of adding all multiplied
1687         invariants.  */
1688      for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1689	{
1690	  if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1691	    {
1692	      tree newname;
1693	      tree mult;
1694	      tree coeff;
1695	      int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1696	      /* mult = invariant * coefficient  */
1697	      if (invcoeff == 1)
1698		{
1699		  mult = invar;
1700		}
1701	      else
1702		{
1703		  coeff = build_int_cst (type, invcoeff);
1704		  mult = fold_build2 (MULT_EXPR, type, invar, coeff);
1705		}
1706
1707	      /* newname = mult */
1708	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult);
1709	      newname = make_ssa_name (resvar, stmt);
1710	      TREE_OPERAND (stmt, 0) = newname;
1711	      fold_stmt (&stmt);
1712	      tsi = tsi_last (stmts);
1713	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1714
1715	      /* name = name + newname */
1716	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1717			     build2 (PLUS_EXPR, type, name, newname));
1718	      name = make_ssa_name (resvar, stmt);
1719	      TREE_OPERAND (stmt, 0) = name;
1720	      fold_stmt (&stmt);
1721	      tsi = tsi_last (stmts);
1722	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1723	    }
1724	}
1725
1726      /* Now handle the constant.
1727         name = name + constant.  */
1728      if (LLE_CONSTANT (lle) != 0)
1729	{
1730	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1731			 build2 (PLUS_EXPR, type, name,
1732			         build_int_cst (type, LLE_CONSTANT (lle))));
1733	  name = make_ssa_name (resvar, stmt);
1734	  TREE_OPERAND (stmt, 0) = name;
1735	  fold_stmt (&stmt);
1736	  tsi = tsi_last (stmts);
1737	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1738	}
1739
1740      /* Now handle the offset.
1741         name = name + linear offset.  */
1742      if (LLE_CONSTANT (offset) != 0)
1743	{
1744	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1745			 build2 (PLUS_EXPR, type, name,
1746			         build_int_cst (type, LLE_CONSTANT (offset))));
1747	  name = make_ssa_name (resvar, stmt);
1748	  TREE_OPERAND (stmt, 0) = name;
1749	  fold_stmt (&stmt);
1750	  tsi = tsi_last (stmts);
1751	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1752	}
1753
1754      /* Handle any denominator that occurs.  */
1755      if (LLE_DENOMINATOR (lle) != 1)
1756	{
1757	  stmt = build_int_cst (type, LLE_DENOMINATOR (lle));
1758	  stmt = build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1759			 type, name, stmt);
1760	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar, stmt);
1761
1762	  /* name = {ceil, floor}(name/denominator) */
1763	  name = make_ssa_name (resvar, stmt);
1764	  TREE_OPERAND (stmt, 0) = name;
1765	  tsi = tsi_last (stmts);
1766	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1767	}
1768      VEC_safe_push (tree, heap, results, name);
1769    }
1770
1771  /* Again, out of laziness, we don't handle this case yet.  It's not
1772     hard, it just hasn't occurred.  */
1773  gcc_assert (VEC_length (tree, results) <= 2);
1774
1775  /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1776  if (VEC_length (tree, results) > 1)
1777    {
1778      tree op1 = VEC_index (tree, results, 0);
1779      tree op2 = VEC_index (tree, results, 1);
1780      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
1781		     build2 (wrap, type, op1, op2));
1782      name = make_ssa_name (resvar, stmt);
1783      TREE_OPERAND (stmt, 0) = name;
1784      tsi = tsi_last (stmts);
1785      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1786    }
1787
1788  VEC_free (tree, heap, results);
1789
1790  *stmts_to_insert = stmts;
1791  return name;
1792}
1793
1794/* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1795   it, back into gcc code.  This changes the
1796   loops, their induction variables, and their bodies, so that they
1797   match the transformed loopnest.
1798   OLD_LOOPNEST is the loopnest before we've replaced it with the new
1799   loopnest.
1800   OLD_IVS is a vector of induction variables from the old loopnest.
1801   INVARIANTS is a vector of loop invariants from the old loopnest.
1802   NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1803   TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1804   NEW_LOOPNEST.  */
1805
1806void
1807lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1808				 VEC(tree,heap) *old_ivs,
1809				 VEC(tree,heap) *invariants,
1810				 lambda_loopnest new_loopnest,
1811				 lambda_trans_matrix transform)
1812{
1813  struct loop *temp;
1814  size_t i = 0;
1815  size_t depth = 0;
1816  VEC(tree,heap) *new_ivs = NULL;
1817  tree oldiv;
1818
1819  block_stmt_iterator bsi;
1820
1821  if (dump_file)
1822    {
1823      transform = lambda_trans_matrix_inverse (transform);
1824      fprintf (dump_file, "Inverse of transformation matrix:\n");
1825      print_lambda_trans_matrix (dump_file, transform);
1826    }
1827  depth = depth_of_nest (old_loopnest);
1828  temp = old_loopnest;
1829
1830  while (temp)
1831    {
1832      lambda_loop newloop;
1833      basic_block bb;
1834      edge exit;
1835      tree ivvar, ivvarinced, exitcond, stmts;
1836      enum tree_code testtype;
1837      tree newupperbound, newlowerbound;
1838      lambda_linear_expression offset;
1839      tree type;
1840      bool insert_after;
1841      tree inc_stmt;
1842
1843      oldiv = VEC_index (tree, old_ivs, i);
1844      type = TREE_TYPE (oldiv);
1845
1846      /* First, build the new induction variable temporary  */
1847
1848      ivvar = create_tmp_var (type, "lnivtmp");
1849      add_referenced_var (ivvar);
1850
1851      VEC_safe_push (tree, heap, new_ivs, ivvar);
1852
1853      newloop = LN_LOOPS (new_loopnest)[i];
1854
1855      /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1856         cases for now.  */
1857      offset = LL_LINEAR_OFFSET (newloop);
1858
1859      gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1860		  lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1861
1862      /* Now build the  new lower bounds, and insert the statements
1863         necessary to generate it on the loop preheader.  */
1864      newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1865					     LL_LINEAR_OFFSET (newloop),
1866					     type,
1867					     new_ivs,
1868					     invariants, MAX_EXPR, &stmts);
1869      bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1870      bsi_commit_edge_inserts ();
1871      /* Build the new upper bound and insert its statements in the
1872         basic block of the exit condition */
1873      newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1874					     LL_LINEAR_OFFSET (newloop),
1875					     type,
1876					     new_ivs,
1877					     invariants, MIN_EXPR, &stmts);
1878      exit = temp->single_exit;
1879      exitcond = get_loop_exit_condition (temp);
1880      bb = bb_for_stmt (exitcond);
1881      bsi = bsi_start (bb);
1882      bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1883
1884      /* Create the new iv.  */
1885
1886      standard_iv_increment_position (temp, &bsi, &insert_after);
1887      create_iv (newlowerbound,
1888		 build_int_cst (type, LL_STEP (newloop)),
1889		 ivvar, temp, &bsi, insert_after, &ivvar,
1890		 NULL);
1891
1892      /* Unfortunately, the incremented ivvar that create_iv inserted may not
1893	 dominate the block containing the exit condition.
1894	 So we simply create our own incremented iv to use in the new exit
1895	 test,  and let redundancy elimination sort it out.  */
1896      inc_stmt = build2 (PLUS_EXPR, type,
1897			 ivvar, build_int_cst (type, LL_STEP (newloop)));
1898      inc_stmt = build2 (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1899			 inc_stmt);
1900      ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1901      TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1902      bsi = bsi_for_stmt (exitcond);
1903      bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1904
1905      /* Replace the exit condition with the new upper bound
1906         comparison.  */
1907
1908      testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1909
1910      /* We want to build a conditional where true means exit the loop, and
1911	 false means continue the loop.
1912	 So swap the testtype if this isn't the way things are.*/
1913
1914      if (exit->flags & EDGE_FALSE_VALUE)
1915	testtype = swap_tree_comparison (testtype);
1916
1917      COND_EXPR_COND (exitcond) = build2 (testtype,
1918					  boolean_type_node,
1919					  newupperbound, ivvarinced);
1920      update_stmt (exitcond);
1921      VEC_replace (tree, new_ivs, i, ivvar);
1922
1923      i++;
1924      temp = temp->inner;
1925    }
1926
1927  /* Rewrite uses of the old ivs so that they are now specified in terms of
1928     the new ivs.  */
1929
1930  for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1931    {
1932      imm_use_iterator imm_iter;
1933      use_operand_p use_p;
1934      tree oldiv_def;
1935      tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1936      tree stmt;
1937
1938      if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1939        oldiv_def = PHI_RESULT (oldiv_stmt);
1940      else
1941	oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1942      gcc_assert (oldiv_def != NULL_TREE);
1943
1944      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
1945        {
1946	  tree newiv, stmts;
1947	  lambda_body_vector lbv, newlbv;
1948
1949	  gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1950
1951	  /* Compute the new expression for the induction
1952	     variable.  */
1953	  depth = VEC_length (tree, new_ivs);
1954	  lbv = lambda_body_vector_new (depth);
1955	  LBV_COEFFICIENTS (lbv)[i] = 1;
1956
1957	  newlbv = lambda_body_vector_compute_new (transform, lbv);
1958
1959	  newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
1960					 new_ivs, &stmts);
1961	  bsi = bsi_for_stmt (stmt);
1962	  /* Insert the statements to build that
1963	     expression.  */
1964	  bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
1965
1966	  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1967	    propagate_value (use_p, newiv);
1968	  update_stmt (stmt);
1969	}
1970    }
1971  VEC_free (tree, heap, new_ivs);
1972}
1973
1974/* Return TRUE if this is not interesting statement from the perspective of
1975   determining if we have a perfect loop nest.  */
1976
1977static bool
1978not_interesting_stmt (tree stmt)
1979{
1980  /* Note that COND_EXPR's aren't interesting because if they were exiting the
1981     loop, we would have already failed the number of exits tests.  */
1982  if (TREE_CODE (stmt) == LABEL_EXPR
1983      || TREE_CODE (stmt) == GOTO_EXPR
1984      || TREE_CODE (stmt) == COND_EXPR)
1985    return true;
1986  return false;
1987}
1988
1989/* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
1990
1991static bool
1992phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
1993{
1994  int i;
1995  for (i = 0; i < PHI_NUM_ARGS (phi); i++)
1996    if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
1997      if (PHI_ARG_DEF (phi, i) == def)
1998	return true;
1999  return false;
2000}
2001
2002/* Return TRUE if STMT is a use of PHI_RESULT.  */
2003
2004static bool
2005stmt_uses_phi_result (tree stmt, tree phi_result)
2006{
2007  tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
2008
2009  /* This is conservatively true, because we only want SIMPLE bumpers
2010     of the form x +- constant for our pass.  */
2011  return (use == phi_result);
2012}
2013
2014/* STMT is a bumper stmt for LOOP if the version it defines is used in the
2015   in-loop-edge in a phi node, and the operand it uses is the result of that
2016   phi node.
2017   I.E. i_29 = i_3 + 1
2018        i_3 = PHI (0, i_29);  */
2019
2020static bool
2021stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2022{
2023  tree use;
2024  tree def;
2025  imm_use_iterator iter;
2026  use_operand_p use_p;
2027
2028  def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2029  if (!def)
2030    return false;
2031
2032  FOR_EACH_IMM_USE_FAST (use_p, iter, def)
2033    {
2034      use = USE_STMT (use_p);
2035      if (TREE_CODE (use) == PHI_NODE)
2036	{
2037	  if (phi_loop_edge_uses_def (loop, use, def))
2038	    if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2039	      return true;
2040	}
2041    }
2042  return false;
2043}
2044
2045
2046/* Return true if LOOP is a perfect loop nest.
2047   Perfect loop nests are those loop nests where all code occurs in the
2048   innermost loop body.
2049   If S is a program statement, then
2050
2051   i.e.
2052   DO I = 1, 20
2053       S1
2054       DO J = 1, 20
2055       ...
2056       END DO
2057   END DO
2058   is not a perfect loop nest because of S1.
2059
2060   DO I = 1, 20
2061      DO J = 1, 20
2062        S1
2063	...
2064      END DO
2065   END DO
2066   is a perfect loop nest.
2067
2068   Since we don't have high level loops anymore, we basically have to walk our
2069   statements and ignore those that are there because the loop needs them (IE
2070   the induction variable increment, and jump back to the top of the loop).  */
2071
2072bool
2073perfect_nest_p (struct loop *loop)
2074{
2075  basic_block *bbs;
2076  size_t i;
2077  tree exit_cond;
2078
2079  if (!loop->inner)
2080    return true;
2081  bbs = get_loop_body (loop);
2082  exit_cond = get_loop_exit_condition (loop);
2083  for (i = 0; i < loop->num_nodes; i++)
2084    {
2085      if (bbs[i]->loop_father == loop)
2086	{
2087	  block_stmt_iterator bsi;
2088	  for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2089	    {
2090	      tree stmt = bsi_stmt (bsi);
2091	      if (stmt == exit_cond
2092		  || not_interesting_stmt (stmt)
2093		  || stmt_is_bumper_for_loop (loop, stmt))
2094		continue;
2095	      free (bbs);
2096	      return false;
2097	    }
2098	}
2099    }
2100  free (bbs);
2101  /* See if the inner loops are perfectly nested as well.  */
2102  if (loop->inner)
2103    return perfect_nest_p (loop->inner);
2104  return true;
2105}
2106
2107/* Replace the USES of X in STMT, or uses with the same step as X with Y.
2108   YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2109   avoid creating duplicate temporaries and FIRSTBSI is statement
2110   iterator where new temporaries should be inserted at the beginning
2111   of body basic block.  */
2112
2113static void
2114replace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
2115				int xstep, tree y, tree yinit,
2116				htab_t replacements,
2117				block_stmt_iterator *firstbsi)
2118{
2119  ssa_op_iter iter;
2120  use_operand_p use_p;
2121
2122  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2123    {
2124      tree use = USE_FROM_PTR (use_p);
2125      tree step = NULL_TREE;
2126      tree scev, init, val, var, setstmt;
2127      struct tree_map *h, in;
2128      void **loc;
2129
2130      /* Replace uses of X with Y right away.  */
2131      if (use == x)
2132	{
2133	  SET_USE (use_p, y);
2134	  continue;
2135	}
2136
2137      scev = instantiate_parameters (loop,
2138				     analyze_scalar_evolution (loop, use));
2139
2140      if (scev == NULL || scev == chrec_dont_know)
2141	continue;
2142
2143      step = evolution_part_in_loop_num (scev, loop->num);
2144      if (step == NULL
2145	  || step == chrec_dont_know
2146	  || TREE_CODE (step) != INTEGER_CST
2147	  || int_cst_value (step) != xstep)
2148	continue;
2149
2150      /* Use REPLACEMENTS hash table to cache already created
2151	 temporaries.  */
2152      in.hash = htab_hash_pointer (use);
2153      in.from = use;
2154      h = htab_find_with_hash (replacements, &in, in.hash);
2155      if (h != NULL)
2156	{
2157	  SET_USE (use_p, h->to);
2158	  continue;
2159	}
2160
2161      /* USE which has the same step as X should be replaced
2162	 with a temporary set to Y + YINIT - INIT.  */
2163      init = initial_condition_in_loop_num (scev, loop->num);
2164      gcc_assert (init != NULL && init != chrec_dont_know);
2165      if (TREE_TYPE (use) == TREE_TYPE (y))
2166	{
2167	  val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2168	  val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2169	  if (val == y)
2170 	    {
2171	      /* If X has the same type as USE, the same step
2172		 and same initial value, it can be replaced by Y.  */
2173	      SET_USE (use_p, y);
2174	      continue;
2175	    }
2176	}
2177      else
2178	{
2179	  val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2180	  val = fold_convert (TREE_TYPE (use), val);
2181	  val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2182	}
2183
2184      /* Create a temporary variable and insert it at the beginning
2185	 of the loop body basic block, right after the PHI node
2186	 which sets Y.  */
2187      var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2188      add_referenced_var (var);
2189      val = force_gimple_operand_bsi (firstbsi, val, false, NULL);
2190      setstmt = build2 (MODIFY_EXPR, void_type_node, var, val);
2191      var = make_ssa_name (var, setstmt);
2192      TREE_OPERAND (setstmt, 0) = var;
2193      bsi_insert_before (firstbsi, setstmt, BSI_SAME_STMT);
2194      update_stmt (setstmt);
2195      SET_USE (use_p, var);
2196      h = ggc_alloc (sizeof (struct tree_map));
2197      h->hash = in.hash;
2198      h->from = use;
2199      h->to = var;
2200      loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2201      gcc_assert ((*(struct tree_map **)loc) == NULL);
2202      *(struct tree_map **) loc = h;
2203    }
2204}
2205
2206/* Return true if STMT is an exit PHI for LOOP */
2207
2208static bool
2209exit_phi_for_loop_p (struct loop *loop, tree stmt)
2210{
2211
2212  if (TREE_CODE (stmt) != PHI_NODE
2213      || PHI_NUM_ARGS (stmt) != 1
2214      || bb_for_stmt (stmt) != loop->single_exit->dest)
2215    return false;
2216
2217  return true;
2218}
2219
2220/* Return true if STMT can be put back into the loop INNER, by
2221   copying it to the beginning of that loop and changing the uses.  */
2222
2223static bool
2224can_put_in_inner_loop (struct loop *inner, tree stmt)
2225{
2226  imm_use_iterator imm_iter;
2227  use_operand_p use_p;
2228
2229  gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
2230  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2231      || !expr_invariant_in_loop_p (inner, TREE_OPERAND (stmt, 1)))
2232    return false;
2233
2234  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2235    {
2236      if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2237	{
2238	  basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2239
2240	  if (!flow_bb_inside_loop_p (inner, immbb))
2241	    return false;
2242	}
2243    }
2244  return true;
2245}
2246
2247/* Return true if STMT can be put *after* the inner loop of LOOP.  */
2248static bool
2249can_put_after_inner_loop (struct loop *loop, tree stmt)
2250{
2251  imm_use_iterator imm_iter;
2252  use_operand_p use_p;
2253
2254  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
2255    return false;
2256
2257  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2258    {
2259      if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2260	{
2261	  basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2262
2263	  if (!dominated_by_p (CDI_DOMINATORS,
2264			       immbb,
2265			       loop->inner->header)
2266	      && !can_put_in_inner_loop (loop->inner, stmt))
2267	    return false;
2268	}
2269    }
2270  return true;
2271}
2272
2273
2274
2275/* Return TRUE if LOOP is an imperfect nest that we can convert to a
2276   perfect one.  At the moment, we only handle imperfect nests of
2277   depth 2, where all of the statements occur after the inner loop.  */
2278
2279static bool
2280can_convert_to_perfect_nest (struct loop *loop)
2281{
2282  basic_block *bbs;
2283  tree exit_condition, phi;
2284  size_t i;
2285  block_stmt_iterator bsi;
2286  basic_block exitdest;
2287
2288  /* Can't handle triply nested+ loops yet.  */
2289  if (!loop->inner || loop->inner->inner)
2290    return false;
2291
2292  bbs = get_loop_body (loop);
2293  exit_condition = get_loop_exit_condition (loop);
2294  for (i = 0; i < loop->num_nodes; i++)
2295    {
2296      if (bbs[i]->loop_father == loop)
2297	{
2298	  for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2299	    {
2300	      tree stmt = bsi_stmt (bsi);
2301
2302	      if (stmt == exit_condition
2303		  || not_interesting_stmt (stmt)
2304		  || stmt_is_bumper_for_loop (loop, stmt))
2305		continue;
2306
2307	      /* If this is a scalar operation that can be put back
2308	         into the inner loop, or after the inner loop, through
2309		 copying, then do so. This works on the theory that
2310		 any amount of scalar code we have to reduplicate
2311		 into or after the loops is less expensive that the
2312		 win we get from rearranging the memory walk
2313		 the loop is doing so that it has better
2314		 cache behavior.  */
2315	      if (TREE_CODE (stmt) == MODIFY_EXPR)
2316		{
2317		  use_operand_p use_a, use_b;
2318		  imm_use_iterator imm_iter;
2319		  ssa_op_iter op_iter, op_iter1;
2320		  tree op0 = TREE_OPERAND (stmt, 0);
2321		  tree scev = instantiate_parameters
2322		    (loop, analyze_scalar_evolution (loop, op0));
2323
2324		  /* If the IV is simple, it can be duplicated.  */
2325		  if (!automatically_generated_chrec_p (scev))
2326		    {
2327		      tree step = evolution_part_in_loop_num (scev, loop->num);
2328		      if (step && step != chrec_dont_know
2329			  && TREE_CODE (step) == INTEGER_CST)
2330			continue;
2331		    }
2332
2333		  /* The statement should not define a variable used
2334		     in the inner loop.  */
2335		  if (TREE_CODE (op0) == SSA_NAME)
2336		    FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2337		      if (bb_for_stmt (USE_STMT (use_a))->loop_father
2338			  == loop->inner)
2339			goto fail;
2340
2341		  FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2342		    {
2343		      tree node, op = USE_FROM_PTR (use_a);
2344
2345		      /* The variables should not be used in both loops.  */
2346		      FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2347		      if (bb_for_stmt (USE_STMT (use_b))->loop_father
2348			  == loop->inner)
2349			goto fail;
2350
2351		      /* The statement should not use the value of a
2352			 scalar that was modified in the loop.  */
2353		      node = SSA_NAME_DEF_STMT (op);
2354		      if (TREE_CODE (node) == PHI_NODE)
2355			FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2356			  {
2357			    tree arg = USE_FROM_PTR (use_b);
2358
2359			    if (TREE_CODE (arg) == SSA_NAME)
2360			      {
2361				tree arg_stmt = SSA_NAME_DEF_STMT (arg);
2362
2363				if (bb_for_stmt (arg_stmt)->loop_father
2364				    == loop->inner)
2365				  goto fail;
2366			      }
2367			  }
2368		    }
2369
2370		  if (can_put_in_inner_loop (loop->inner, stmt)
2371		      || can_put_after_inner_loop (loop, stmt))
2372		    continue;
2373		}
2374
2375	      /* Otherwise, if the bb of a statement we care about isn't
2376		 dominated by the header of the inner loop, then we can't
2377		 handle this case right now.  This test ensures that the
2378		 statement comes completely *after* the inner loop.  */
2379	      if (!dominated_by_p (CDI_DOMINATORS,
2380				   bb_for_stmt (stmt),
2381				   loop->inner->header))
2382		goto fail;
2383	    }
2384	}
2385    }
2386
2387  /* We also need to make sure the loop exit only has simple copy phis in it,
2388     otherwise we don't know how to transform it into a perfect nest right
2389     now.  */
2390  exitdest = loop->single_exit->dest;
2391
2392  for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2393    if (PHI_NUM_ARGS (phi) != 1)
2394      goto fail;
2395
2396  free (bbs);
2397  return true;
2398
2399 fail:
2400  free (bbs);
2401  return false;
2402}
2403
2404/* Transform the loop nest into a perfect nest, if possible.
2405   LOOPS is the current struct loops *
2406   LOOP is the loop nest to transform into a perfect nest
2407   LBOUNDS are the lower bounds for the loops to transform
2408   UBOUNDS are the upper bounds for the loops to transform
2409   STEPS is the STEPS for the loops to transform.
2410   LOOPIVS is the induction variables for the loops to transform.
2411
2412   Basically, for the case of
2413
2414   FOR (i = 0; i < 50; i++)
2415    {
2416     FOR (j =0; j < 50; j++)
2417     {
2418        <whatever>
2419     }
2420     <some code>
2421    }
2422
2423   This function will transform it into a perfect loop nest by splitting the
2424   outer loop into two loops, like so:
2425
2426   FOR (i = 0; i < 50; i++)
2427   {
2428     FOR (j = 0; j < 50; j++)
2429     {
2430         <whatever>
2431     }
2432   }
2433
2434   FOR (i = 0; i < 50; i ++)
2435   {
2436    <some code>
2437   }
2438
2439   Return FALSE if we can't make this loop into a perfect nest.  */
2440
2441static bool
2442perfect_nestify (struct loops *loops,
2443		 struct loop *loop,
2444		 VEC(tree,heap) *lbounds,
2445		 VEC(tree,heap) *ubounds,
2446		 VEC(int,heap) *steps,
2447		 VEC(tree,heap) *loopivs)
2448{
2449  basic_block *bbs;
2450  tree exit_condition;
2451  tree then_label, else_label, cond_stmt;
2452  basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2453  int i;
2454  block_stmt_iterator bsi, firstbsi;
2455  bool insert_after;
2456  edge e;
2457  struct loop *newloop;
2458  tree phi;
2459  tree uboundvar;
2460  tree stmt;
2461  tree oldivvar, ivvar, ivvarinced;
2462  VEC(tree,heap) *phis = NULL;
2463  htab_t replacements = NULL;
2464
2465  /* Create the new loop.  */
2466  olddest = loop->single_exit->dest;
2467  preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
2468  headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2469
2470  /* Push the exit phi nodes that we are moving.  */
2471  for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2472    {
2473      VEC_reserve (tree, heap, phis, 2);
2474      VEC_quick_push (tree, phis, PHI_RESULT (phi));
2475      VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2476    }
2477  e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2478
2479  /* Remove the exit phis from the old basic block.  Make sure to set
2480     PHI_RESULT to null so it doesn't get released.  */
2481  while (phi_nodes (olddest) != NULL)
2482    {
2483      SET_PHI_RESULT (phi_nodes (olddest), NULL);
2484      remove_phi_node (phi_nodes (olddest), NULL);
2485    }
2486
2487  /* and add them back to the new basic block.  */
2488  while (VEC_length (tree, phis) != 0)
2489    {
2490      tree def;
2491      tree phiname;
2492      def = VEC_pop (tree, phis);
2493      phiname = VEC_pop (tree, phis);
2494      phi = create_phi_node (phiname, preheaderbb);
2495      add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2496    }
2497  flush_pending_stmts (e);
2498  VEC_free (tree, heap, phis);
2499
2500  bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2501  latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2502  make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2503  then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2504  else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2505  cond_stmt = build3 (COND_EXPR, void_type_node,
2506		      build2 (NE_EXPR, boolean_type_node,
2507			      integer_one_node,
2508			      integer_zero_node),
2509		      then_label, else_label);
2510  bsi = bsi_start (bodybb);
2511  bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2512  e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2513  make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2514  make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2515
2516  /* Update the loop structures.  */
2517  newloop = duplicate_loop (loops, loop, olddest->loop_father);
2518  newloop->header = headerbb;
2519  newloop->latch = latchbb;
2520  newloop->single_exit = e;
2521  add_bb_to_loop (latchbb, newloop);
2522  add_bb_to_loop (bodybb, newloop);
2523  add_bb_to_loop (headerbb, newloop);
2524  set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2525  set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2526  set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2527			   loop->single_exit->src);
2528  set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2529  set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2530  /* Create the new iv.  */
2531  oldivvar = VEC_index (tree, loopivs, 0);
2532  ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2533  add_referenced_var (ivvar);
2534  standard_iv_increment_position (newloop, &bsi, &insert_after);
2535  create_iv (VEC_index (tree, lbounds, 0),
2536	     build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2537	     ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2538
2539  /* Create the new upper bound.  This may be not just a variable, so we copy
2540     it to one just in case.  */
2541
2542  exit_condition = get_loop_exit_condition (newloop);
2543  uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2544  add_referenced_var (uboundvar);
2545  stmt = build2 (MODIFY_EXPR, void_type_node, uboundvar,
2546		 VEC_index (tree, ubounds, 0));
2547  uboundvar = make_ssa_name (uboundvar, stmt);
2548  TREE_OPERAND (stmt, 0) = uboundvar;
2549
2550  if (insert_after)
2551    bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2552  else
2553    bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2554  update_stmt (stmt);
2555  COND_EXPR_COND (exit_condition) = build2 (GE_EXPR,
2556					    boolean_type_node,
2557					    uboundvar,
2558					    ivvarinced);
2559  update_stmt (exit_condition);
2560  replacements = htab_create_ggc (20, tree_map_hash,
2561				  tree_map_eq, NULL);
2562  bbs = get_loop_body_in_dom_order (loop);
2563  /* Now move the statements, and replace the induction variable in the moved
2564     statements with the correct loop induction variable.  */
2565  oldivvar = VEC_index (tree, loopivs, 0);
2566  firstbsi = bsi_start (bodybb);
2567  for (i = loop->num_nodes - 1; i >= 0 ; i--)
2568    {
2569      block_stmt_iterator tobsi = bsi_last (bodybb);
2570      if (bbs[i]->loop_father == loop)
2571	{
2572	  /* If this is true, we are *before* the inner loop.
2573	     If this isn't true, we are *after* it.
2574
2575	     The only time can_convert_to_perfect_nest returns true when we
2576	     have statements before the inner loop is if they can be moved
2577	     into the inner loop.
2578
2579	     The only time can_convert_to_perfect_nest returns true when we
2580	     have statements after the inner loop is if they can be moved into
2581	     the new split loop.  */
2582
2583	  if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2584	    {
2585	      block_stmt_iterator header_bsi
2586		= bsi_after_labels (loop->inner->header);
2587
2588	      for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2589		{
2590		  tree stmt = bsi_stmt (bsi);
2591
2592		  if (stmt == exit_condition
2593		      || not_interesting_stmt (stmt)
2594		      || stmt_is_bumper_for_loop (loop, stmt))
2595		    {
2596		      bsi_next (&bsi);
2597		      continue;
2598		    }
2599
2600		  bsi_move_before (&bsi, &header_bsi);
2601		}
2602	    }
2603	  else
2604	    {
2605	      /* Note that the bsi only needs to be explicitly incremented
2606		 when we don't move something, since it is automatically
2607		 incremented when we do.  */
2608	      for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2609		{
2610		  ssa_op_iter i;
2611		  tree n, stmt = bsi_stmt (bsi);
2612
2613		  if (stmt == exit_condition
2614		      || not_interesting_stmt (stmt)
2615		      || stmt_is_bumper_for_loop (loop, stmt))
2616		    {
2617		      bsi_next (&bsi);
2618		      continue;
2619		    }
2620
2621		  replace_uses_equiv_to_x_with_y
2622		    (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2623		     VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2624
2625		  bsi_move_before (&bsi, &tobsi);
2626
2627		  /* If the statement has any virtual operands, they may
2628		     need to be rewired because the original loop may
2629		     still reference them.  */
2630		  FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2631		    mark_sym_for_renaming (SSA_NAME_VAR (n));
2632		}
2633	    }
2634
2635	}
2636    }
2637
2638  free (bbs);
2639  htab_delete (replacements);
2640  return perfect_nest_p (loop);
2641}
2642
2643/* Return true if TRANS is a legal transformation matrix that respects
2644   the dependence vectors in DISTS and DIRS.  The conservative answer
2645   is false.
2646
2647   "Wolfe proves that a unimodular transformation represented by the
2648   matrix T is legal when applied to a loop nest with a set of
2649   lexicographically non-negative distance vectors RDG if and only if
2650   for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2651   i.e.: if and only if it transforms the lexicographically positive
2652   distance vectors to lexicographically positive vectors.  Note that
2653   a unimodular matrix must transform the zero vector (and only it) to
2654   the zero vector." S.Muchnick.  */
2655
2656bool
2657lambda_transform_legal_p (lambda_trans_matrix trans,
2658			  int nb_loops,
2659			  VEC (ddr_p, heap) *dependence_relations)
2660{
2661  unsigned int i, j;
2662  lambda_vector distres;
2663  struct data_dependence_relation *ddr;
2664
2665  gcc_assert (LTM_COLSIZE (trans) == nb_loops
2666	      && LTM_ROWSIZE (trans) == nb_loops);
2667
2668  /* When there is an unknown relation in the dependence_relations, we
2669     know that it is no worth looking at this loop nest: give up.  */
2670  ddr = VEC_index (ddr_p, dependence_relations, 0);
2671  if (ddr == NULL)
2672    return true;
2673  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2674    return false;
2675
2676  distres = lambda_vector_new (nb_loops);
2677
2678  /* For each distance vector in the dependence graph.  */
2679  for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
2680    {
2681      /* Don't care about relations for which we know that there is no
2682	 dependence, nor about read-read (aka. output-dependences):
2683	 these data accesses can happen in any order.  */
2684      if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2685	  || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2686	continue;
2687
2688      /* Conservatively answer: "this transformation is not valid".  */
2689      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2690	return false;
2691
2692      /* If the dependence could not be captured by a distance vector,
2693	 conservatively answer that the transform is not valid.  */
2694      if (DDR_NUM_DIST_VECTS (ddr) == 0)
2695	return false;
2696
2697      /* Compute trans.dist_vect */
2698      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2699	{
2700	  lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2701				     DDR_DIST_VECT (ddr, j), distres);
2702
2703	  if (!lambda_vector_lexico_pos (distres, nb_loops))
2704	    return false;
2705	}
2706    }
2707  return true;
2708}
2709