1/*  Loop transformation code generation
2    Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3    Contributed by Daniel Berlin <dberlin@dberlin.org>
4
5    This file is part of GCC.
6
7    GCC is free software; you can redistribute it and/or modify it under
8    the terms of the GNU General Public License as published by the Free
9    Software Foundation; either version 2, or (at your option) any later
10    version.
11
12    GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13    WARRANTY; without even the implied warranty of MERCHANTABILITY or
14    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15    for more details.
16
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING.  If not, write to the Free
19    Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20    02110-1301, USA.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "ggc.h"
27#include "tree.h"
28#include "target.h"
29#include "rtl.h"
30#include "basic-block.h"
31#include "diagnostic.h"
32#include "tree-flow.h"
33#include "tree-dump.h"
34#include "timevar.h"
35#include "cfgloop.h"
36#include "expr.h"
37#include "optabs.h"
38#include "tree-chrec.h"
39#include "tree-data-ref.h"
40#include "tree-pass.h"
41#include "tree-scalar-evolution.h"
42#include "vec.h"
43#include "lambda.h"
44
45/* This loop nest code generation is based on non-singular matrix
46   math.
47
48 A little terminology and a general sketch of the algorithm.  See "A singular
49 loop transformation framework based on non-singular matrices" by Wei Li and
50 Keshav Pingali for formal proofs that the various statements below are
51 correct.
52
53 A loop iteration space represents the points traversed by the loop.  A point in the
54 iteration space can be represented by a vector of size <loop depth>.  You can
55 therefore represent the iteration space as an integral combinations of a set
56 of basis vectors.
57
58 A loop iteration space is dense if every integer point between the loop
59 bounds is a point in the iteration space.  Every loop with a step of 1
60 therefore has a dense iteration space.
61
62 for i = 1 to 3, step 1 is a dense iteration space.
63
64 A loop iteration space is sparse if it is not dense.  That is, the iteration
65 space skips integer points that are within the loop bounds.
66
67 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
68 2 is skipped.
69
70 Dense source spaces are easy to transform, because they don't skip any
71 points to begin with.  Thus we can compute the exact bounds of the target
72 space using min/max and floor/ceil.
73
74 For a dense source space, we take the transformation matrix, decompose it
75 into a lower triangular part (H) and a unimodular part (U).
76 We then compute the auxiliary space from the unimodular part (source loop
77 nest . U = auxiliary space) , which has two important properties:
78  1. It traverses the iterations in the same lexicographic order as the source
79  space.
80  2. It is a dense space when the source is a dense space (even if the target
81  space is going to be sparse).
82
83 Given the auxiliary space, we use the lower triangular part to compute the
84 bounds in the target space by simple matrix multiplication.
85 The gaps in the target space (IE the new loop step sizes) will be the
86 diagonals of the H matrix.
87
88 Sparse source spaces require another step, because you can't directly compute
89 the exact bounds of the auxiliary and target space from the sparse space.
90 Rather than try to come up with a separate algorithm to handle sparse source
91 spaces directly, we just find a legal transformation matrix that gives you
92 the sparse source space, from a dense space, and then transform the dense
93 space.
94
95 For a regular sparse space, you can represent the source space as an integer
96 lattice, and the base space of that lattice will always be dense.  Thus, we
97 effectively use the lattice to figure out the transformation from the lattice
98 base space, to the sparse iteration space (IE what transform was applied to
99 the dense space to make it sparse).  We then compose this transform with the
100 transformation matrix specified by the user (since our matrix transformations
101 are closed under composition, this is okay).  We can then use the base space
102 (which is dense) plus the composed transformation matrix, to compute the rest
103 of the transform using the dense space algorithm above.
104
105 In other words, our sparse source space (B) is decomposed into a dense base
106 space (A), and a matrix (L) that transforms A into B, such that A.L = B.
107 We then compute the composition of L and the user transformation matrix (T),
108 so that T is now a transform from A to the result, instead of from B to the
109 result.
110 IE A.(LT) = result instead of B.T = result
111 Since A is now a dense source space, we can use the dense source space
112 algorithm above to compute the result of applying transform (LT) to A.
113
114 Fourier-Motzkin elimination is used to compute the bounds of the base space
115 of the lattice.  */
116
117DEF_VEC_I(int);
118DEF_VEC_ALLOC_I(int,heap);
119
120static bool perfect_nestify (struct loops *,
121			     struct loop *, VEC(tree,heap) *,
122			     VEC(tree,heap) *, VEC(int,heap) *,
123			     VEC(tree,heap) *);
124/* Lattice stuff that is internal to the code generation algorithm.  */
125
126typedef struct
127{
128  /* Lattice base matrix.  */
129  lambda_matrix base;
130  /* Lattice dimension.  */
131  int dimension;
132  /* Origin vector for the coefficients.  */
133  lambda_vector origin;
134  /* Origin matrix for the invariants.  */
135  lambda_matrix origin_invariants;
136  /* Number of invariants.  */
137  int invariants;
138} *lambda_lattice;
139
140#define LATTICE_BASE(T) ((T)->base)
141#define LATTICE_DIMENSION(T) ((T)->dimension)
142#define LATTICE_ORIGIN(T) ((T)->origin)
143#define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
144#define LATTICE_INVARIANTS(T) ((T)->invariants)
145
146static bool lle_equal (lambda_linear_expression, lambda_linear_expression,
147		       int, int);
148static lambda_lattice lambda_lattice_new (int, int);
149static lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
150
151static tree find_induction_var_from_exit_cond (struct loop *);
152static bool can_convert_to_perfect_nest (struct loop *);
153
154/* Create a new lambda body vector.  */
155
156lambda_body_vector
157lambda_body_vector_new (int size)
158{
159  lambda_body_vector ret;
160
161  ret = ggc_alloc (sizeof (*ret));
162  LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
163  LBV_SIZE (ret) = size;
164  LBV_DENOMINATOR (ret) = 1;
165  return ret;
166}
167
168/* Compute the new coefficients for the vector based on the
169  *inverse* of the transformation matrix.  */
170
171lambda_body_vector
172lambda_body_vector_compute_new (lambda_trans_matrix transform,
173				lambda_body_vector vect)
174{
175  lambda_body_vector temp;
176  int depth;
177
178  /* Make sure the matrix is square.  */
179  gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
180
181  depth = LTM_ROWSIZE (transform);
182
183  temp = lambda_body_vector_new (depth);
184  LBV_DENOMINATOR (temp) =
185    LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
186  lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
187			     LTM_MATRIX (transform), depth,
188			     LBV_COEFFICIENTS (temp));
189  LBV_SIZE (temp) = LBV_SIZE (vect);
190  return temp;
191}
192
193/* Print out a lambda body vector.  */
194
195void
196print_lambda_body_vector (FILE * outfile, lambda_body_vector body)
197{
198  print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
199}
200
201/* Return TRUE if two linear expressions are equal.  */
202
203static bool
204lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
205	   int depth, int invariants)
206{
207  int i;
208
209  if (lle1 == NULL || lle2 == NULL)
210    return false;
211  if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
212    return false;
213  if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
214    return false;
215  for (i = 0; i < depth; i++)
216    if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
217      return false;
218  for (i = 0; i < invariants; i++)
219    if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
220	LLE_INVARIANT_COEFFICIENTS (lle2)[i])
221      return false;
222  return true;
223}
224
225/* Create a new linear expression with dimension DIM, and total number
226   of invariants INVARIANTS.  */
227
228lambda_linear_expression
229lambda_linear_expression_new (int dim, int invariants)
230{
231  lambda_linear_expression ret;
232
233  ret = ggc_alloc_cleared (sizeof (*ret));
234
235  LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
236  LLE_CONSTANT (ret) = 0;
237  LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
238  LLE_DENOMINATOR (ret) = 1;
239  LLE_NEXT (ret) = NULL;
240
241  return ret;
242}
243
244/* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
245   The starting letter used for variable names is START.  */
246
247static void
248print_linear_expression (FILE * outfile, lambda_vector expr, int size,
249			 char start)
250{
251  int i;
252  bool first = true;
253  for (i = 0; i < size; i++)
254    {
255      if (expr[i] != 0)
256	{
257	  if (first)
258	    {
259	      if (expr[i] < 0)
260		fprintf (outfile, "-");
261	      first = false;
262	    }
263	  else if (expr[i] > 0)
264	    fprintf (outfile, " + ");
265	  else
266	    fprintf (outfile, " - ");
267	  if (abs (expr[i]) == 1)
268	    fprintf (outfile, "%c", start + i);
269	  else
270	    fprintf (outfile, "%d%c", abs (expr[i]), start + i);
271	}
272    }
273}
274
275/* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
276   depth/number of coefficients is given by DEPTH, the number of invariants is
277   given by INVARIANTS, and the character to start variable names with is given
278   by START.  */
279
280void
281print_lambda_linear_expression (FILE * outfile,
282				lambda_linear_expression expr,
283				int depth, int invariants, char start)
284{
285  fprintf (outfile, "\tLinear expression: ");
286  print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
287  fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
288  fprintf (outfile, "  invariants: ");
289  print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
290			   invariants, 'A');
291  fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
292}
293
294/* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
295   coefficients is given by DEPTH, the number of invariants is
296   given by INVARIANTS, and the character to start variable names with is given
297   by START.  */
298
299void
300print_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
301		   int invariants, char start)
302{
303  int step;
304  lambda_linear_expression expr;
305
306  gcc_assert (loop);
307
308  expr = LL_LINEAR_OFFSET (loop);
309  step = LL_STEP (loop);
310  fprintf (outfile, "  step size = %d \n", step);
311
312  if (expr)
313    {
314      fprintf (outfile, "  linear offset: \n");
315      print_lambda_linear_expression (outfile, expr, depth, invariants,
316				      start);
317    }
318
319  fprintf (outfile, "  lower bound: \n");
320  for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
321    print_lambda_linear_expression (outfile, expr, depth, invariants, start);
322  fprintf (outfile, "  upper bound: \n");
323  for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
324    print_lambda_linear_expression (outfile, expr, depth, invariants, start);
325}
326
327/* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
328   number of invariants.  */
329
330lambda_loopnest
331lambda_loopnest_new (int depth, int invariants)
332{
333  lambda_loopnest ret;
334  ret = ggc_alloc (sizeof (*ret));
335
336  LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
337  LN_DEPTH (ret) = depth;
338  LN_INVARIANTS (ret) = invariants;
339
340  return ret;
341}
342
343/* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
344   character to use for loop names is given by START.  */
345
346void
347print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
348{
349  int i;
350  for (i = 0; i < LN_DEPTH (nest); i++)
351    {
352      fprintf (outfile, "Loop %c\n", start + i);
353      print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
354			 LN_INVARIANTS (nest), 'i');
355      fprintf (outfile, "\n");
356    }
357}
358
359/* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
360   of invariants.  */
361
362static lambda_lattice
363lambda_lattice_new (int depth, int invariants)
364{
365  lambda_lattice ret;
366  ret = ggc_alloc (sizeof (*ret));
367  LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
368  LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
369  LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
370  LATTICE_DIMENSION (ret) = depth;
371  LATTICE_INVARIANTS (ret) = invariants;
372  return ret;
373}
374
375/* Compute the lattice base for NEST.  The lattice base is essentially a
376   non-singular transform from a dense base space to a sparse iteration space.
377   We use it so that we don't have to specially handle the case of a sparse
378   iteration space in other parts of the algorithm.  As a result, this routine
379   only does something interesting (IE produce a matrix that isn't the
380   identity matrix) if NEST is a sparse space.  */
381
382static lambda_lattice
383lambda_lattice_compute_base (lambda_loopnest nest)
384{
385  lambda_lattice ret;
386  int depth, invariants;
387  lambda_matrix base;
388
389  int i, j, step;
390  lambda_loop loop;
391  lambda_linear_expression expression;
392
393  depth = LN_DEPTH (nest);
394  invariants = LN_INVARIANTS (nest);
395
396  ret = lambda_lattice_new (depth, invariants);
397  base = LATTICE_BASE (ret);
398  for (i = 0; i < depth; i++)
399    {
400      loop = LN_LOOPS (nest)[i];
401      gcc_assert (loop);
402      step = LL_STEP (loop);
403      /* If we have a step of 1, then the base is one, and the
404         origin and invariant coefficients are 0.  */
405      if (step == 1)
406	{
407	  for (j = 0; j < depth; j++)
408	    base[i][j] = 0;
409	  base[i][i] = 1;
410	  LATTICE_ORIGIN (ret)[i] = 0;
411	  for (j = 0; j < invariants; j++)
412	    LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
413	}
414      else
415	{
416	  /* Otherwise, we need the lower bound expression (which must
417	     be an affine function)  to determine the base.  */
418	  expression = LL_LOWER_BOUND (loop);
419	  gcc_assert (expression && !LLE_NEXT (expression)
420		      && LLE_DENOMINATOR (expression) == 1);
421
422	  /* The lower triangular portion of the base is going to be the
423	     coefficient times the step */
424	  for (j = 0; j < i; j++)
425	    base[i][j] = LLE_COEFFICIENTS (expression)[j]
426	      * LL_STEP (LN_LOOPS (nest)[j]);
427	  base[i][i] = step;
428	  for (j = i + 1; j < depth; j++)
429	    base[i][j] = 0;
430
431	  /* Origin for this loop is the constant of the lower bound
432	     expression.  */
433	  LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
434
435	  /* Coefficient for the invariants are equal to the invariant
436	     coefficients in the expression.  */
437	  for (j = 0; j < invariants; j++)
438	    LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
439	      LLE_INVARIANT_COEFFICIENTS (expression)[j];
440	}
441    }
442  return ret;
443}
444
445/* Compute the greatest common denominator of two numbers (A and B) using
446   Euclid's algorithm.  */
447
448static int
449gcd (int a, int b)
450{
451
452  int x, y, z;
453
454  x = abs (a);
455  y = abs (b);
456
457  while (x > 0)
458    {
459      z = y % x;
460      y = x;
461      x = z;
462    }
463
464  return (y);
465}
466
467/* Compute the greatest common denominator of a VECTOR of SIZE numbers.  */
468
469static int
470gcd_vector (lambda_vector vector, int size)
471{
472  int i;
473  int gcd1 = 0;
474
475  if (size > 0)
476    {
477      gcd1 = vector[0];
478      for (i = 1; i < size; i++)
479	gcd1 = gcd (gcd1, vector[i]);
480    }
481  return gcd1;
482}
483
484/* Compute the least common multiple of two numbers A and B .  */
485
486static int
487lcm (int a, int b)
488{
489  return (abs (a) * abs (b) / gcd (a, b));
490}
491
492/* Perform Fourier-Motzkin elimination to calculate the bounds of the
493   auxiliary nest.
494   Fourier-Motzkin is a way of reducing systems of linear inequalities so that
495   it is easy to calculate the answer and bounds.
496   A sketch of how it works:
497   Given a system of linear inequalities, ai * xj >= bk, you can always
498   rewrite the constraints so they are all of the form
499   a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
500   in b1 ... bk, and some a in a1...ai)
501   You can then eliminate this x from the non-constant inequalities by
502   rewriting these as a <= b, x >= constant, and delete the x variable.
503   You can then repeat this for any remaining x variables, and then we have
504   an easy to use variable <= constant (or no variables at all) form that we
505   can construct our bounds from.
506
507   In our case, each time we eliminate, we construct part of the bound from
508   the ith variable, then delete the ith variable.
509
510   Remember the constant are in our vector a, our coefficient matrix is A,
511   and our invariant coefficient matrix is B.
512
513   SIZE is the size of the matrices being passed.
514   DEPTH is the loop nest depth.
515   INVARIANTS is the number of loop invariants.
516   A, B, and a are the coefficient matrix, invariant coefficient, and a
517   vector of constants, respectively.  */
518
519static lambda_loopnest
520compute_nest_using_fourier_motzkin (int size,
521				    int depth,
522				    int invariants,
523				    lambda_matrix A,
524				    lambda_matrix B,
525				    lambda_vector a)
526{
527
528  int multiple, f1, f2;
529  int i, j, k;
530  lambda_linear_expression expression;
531  lambda_loop loop;
532  lambda_loopnest auxillary_nest;
533  lambda_matrix swapmatrix, A1, B1;
534  lambda_vector swapvector, a1;
535  int newsize;
536
537  A1 = lambda_matrix_new (128, depth);
538  B1 = lambda_matrix_new (128, invariants);
539  a1 = lambda_vector_new (128);
540
541  auxillary_nest = lambda_loopnest_new (depth, invariants);
542
543  for (i = depth - 1; i >= 0; i--)
544    {
545      loop = lambda_loop_new ();
546      LN_LOOPS (auxillary_nest)[i] = loop;
547      LL_STEP (loop) = 1;
548
549      for (j = 0; j < size; j++)
550	{
551	  if (A[j][i] < 0)
552	    {
553	      /* Any linear expression in the matrix with a coefficient less
554		 than 0 becomes part of the new lower bound.  */
555	      expression = lambda_linear_expression_new (depth, invariants);
556
557	      for (k = 0; k < i; k++)
558		LLE_COEFFICIENTS (expression)[k] = A[j][k];
559
560	      for (k = 0; k < invariants; k++)
561		LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
562
563	      LLE_DENOMINATOR (expression) = -1 * A[j][i];
564	      LLE_CONSTANT (expression) = -1 * a[j];
565
566	      /* Ignore if identical to the existing lower bound.  */
567	      if (!lle_equal (LL_LOWER_BOUND (loop),
568			      expression, depth, invariants))
569		{
570		  LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
571		  LL_LOWER_BOUND (loop) = expression;
572		}
573
574	    }
575	  else if (A[j][i] > 0)
576	    {
577	      /* Any linear expression with a coefficient greater than 0
578		 becomes part of the new upper bound.  */
579	      expression = lambda_linear_expression_new (depth, invariants);
580	      for (k = 0; k < i; k++)
581		LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
582
583	      for (k = 0; k < invariants; k++)
584		LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
585
586	      LLE_DENOMINATOR (expression) = A[j][i];
587	      LLE_CONSTANT (expression) = a[j];
588
589	      /* Ignore if identical to the existing upper bound.  */
590	      if (!lle_equal (LL_UPPER_BOUND (loop),
591			      expression, depth, invariants))
592		{
593		  LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
594		  LL_UPPER_BOUND (loop) = expression;
595		}
596
597	    }
598	}
599
600      /* This portion creates a new system of linear inequalities by deleting
601	 the i'th variable, reducing the system by one variable.  */
602      newsize = 0;
603      for (j = 0; j < size; j++)
604	{
605	  /* If the coefficient for the i'th variable is 0, then we can just
606	     eliminate the variable straightaway.  Otherwise, we have to
607	     multiply through by the coefficients we are eliminating.  */
608	  if (A[j][i] == 0)
609	    {
610	      lambda_vector_copy (A[j], A1[newsize], depth);
611	      lambda_vector_copy (B[j], B1[newsize], invariants);
612	      a1[newsize] = a[j];
613	      newsize++;
614	    }
615	  else if (A[j][i] > 0)
616	    {
617	      for (k = 0; k < size; k++)
618		{
619		  if (A[k][i] < 0)
620		    {
621		      multiple = lcm (A[j][i], A[k][i]);
622		      f1 = multiple / A[j][i];
623		      f2 = -1 * multiple / A[k][i];
624
625		      lambda_vector_add_mc (A[j], f1, A[k], f2,
626					    A1[newsize], depth);
627		      lambda_vector_add_mc (B[j], f1, B[k], f2,
628					    B1[newsize], invariants);
629		      a1[newsize] = f1 * a[j] + f2 * a[k];
630		      newsize++;
631		    }
632		}
633	    }
634	}
635
636      swapmatrix = A;
637      A = A1;
638      A1 = swapmatrix;
639
640      swapmatrix = B;
641      B = B1;
642      B1 = swapmatrix;
643
644      swapvector = a;
645      a = a1;
646      a1 = swapvector;
647
648      size = newsize;
649    }
650
651  return auxillary_nest;
652}
653
654/* Compute the loop bounds for the auxiliary space NEST.
655   Input system used is Ax <= b.  TRANS is the unimodular transformation.
656   Given the original nest, this function will
657   1. Convert the nest into matrix form, which consists of a matrix for the
658   coefficients, a matrix for the
659   invariant coefficients, and a vector for the constants.
660   2. Use the matrix form to calculate the lattice base for the nest (which is
661   a dense space)
662   3. Compose the dense space transform with the user specified transform, to
663   get a transform we can easily calculate transformed bounds for.
664   4. Multiply the composed transformation matrix times the matrix form of the
665   loop.
666   5. Transform the newly created matrix (from step 4) back into a loop nest
667   using fourier motzkin elimination to figure out the bounds.  */
668
669static lambda_loopnest
670lambda_compute_auxillary_space (lambda_loopnest nest,
671				lambda_trans_matrix trans)
672{
673  lambda_matrix A, B, A1, B1;
674  lambda_vector a, a1;
675  lambda_matrix invertedtrans;
676  int depth, invariants, size;
677  int i, j;
678  lambda_loop loop;
679  lambda_linear_expression expression;
680  lambda_lattice lattice;
681
682  depth = LN_DEPTH (nest);
683  invariants = LN_INVARIANTS (nest);
684
685  /* Unfortunately, we can't know the number of constraints we'll have
686     ahead of time, but this should be enough even in ridiculous loop nest
687     cases. We must not go over this limit.  */
688  A = lambda_matrix_new (128, depth);
689  B = lambda_matrix_new (128, invariants);
690  a = lambda_vector_new (128);
691
692  A1 = lambda_matrix_new (128, depth);
693  B1 = lambda_matrix_new (128, invariants);
694  a1 = lambda_vector_new (128);
695
696  /* Store the bounds in the equation matrix A, constant vector a, and
697     invariant matrix B, so that we have Ax <= a + B.
698     This requires a little equation rearranging so that everything is on the
699     correct side of the inequality.  */
700  size = 0;
701  for (i = 0; i < depth; i++)
702    {
703      loop = LN_LOOPS (nest)[i];
704
705      /* First we do the lower bound.  */
706      if (LL_STEP (loop) > 0)
707	expression = LL_LOWER_BOUND (loop);
708      else
709	expression = LL_UPPER_BOUND (loop);
710
711      for (; expression != NULL; expression = LLE_NEXT (expression))
712	{
713	  /* Fill in the coefficient.  */
714	  for (j = 0; j < i; j++)
715	    A[size][j] = LLE_COEFFICIENTS (expression)[j];
716
717	  /* And the invariant coefficient.  */
718	  for (j = 0; j < invariants; j++)
719	    B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
720
721	  /* And the constant.  */
722	  a[size] = LLE_CONSTANT (expression);
723
724	  /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
725	     constants and single variables on   */
726	  A[size][i] = -1 * LLE_DENOMINATOR (expression);
727	  a[size] *= -1;
728	  for (j = 0; j < invariants; j++)
729	    B[size][j] *= -1;
730
731	  size++;
732	  /* Need to increase matrix sizes above.  */
733	  gcc_assert (size <= 127);
734
735	}
736
737      /* Then do the exact same thing for the upper bounds.  */
738      if (LL_STEP (loop) > 0)
739	expression = LL_UPPER_BOUND (loop);
740      else
741	expression = LL_LOWER_BOUND (loop);
742
743      for (; expression != NULL; expression = LLE_NEXT (expression))
744	{
745	  /* Fill in the coefficient.  */
746	  for (j = 0; j < i; j++)
747	    A[size][j] = LLE_COEFFICIENTS (expression)[j];
748
749	  /* And the invariant coefficient.  */
750	  for (j = 0; j < invariants; j++)
751	    B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
752
753	  /* And the constant.  */
754	  a[size] = LLE_CONSTANT (expression);
755
756	  /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
757	  for (j = 0; j < i; j++)
758	    A[size][j] *= -1;
759	  A[size][i] = LLE_DENOMINATOR (expression);
760	  size++;
761	  /* Need to increase matrix sizes above.  */
762	  gcc_assert (size <= 127);
763
764	}
765    }
766
767  /* Compute the lattice base x = base * y + origin, where y is the
768     base space.  */
769  lattice = lambda_lattice_compute_base (nest);
770
771  /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
772
773  /* A1 = A * L */
774  lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
775
776  /* a1 = a - A * origin constant.  */
777  lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
778  lambda_vector_add_mc (a, 1, a1, -1, a1, size);
779
780  /* B1 = B - A * origin invariant.  */
781  lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
782		      invariants);
783  lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
784
785  /* Now compute the auxiliary space bounds by first inverting U, multiplying
786     it by A1, then performing fourier motzkin.  */
787
788  invertedtrans = lambda_matrix_new (depth, depth);
789
790  /* Compute the inverse of U.  */
791  lambda_matrix_inverse (LTM_MATRIX (trans),
792			 invertedtrans, depth);
793
794  /* A = A1 inv(U).  */
795  lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
796
797  return compute_nest_using_fourier_motzkin (size, depth, invariants,
798					     A, B1, a1);
799}
800
801/* Compute the loop bounds for the target space, using the bounds of
802   the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
803   The target space loop bounds are computed by multiplying the triangular
804   matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
805   the loop steps (positive or negative) is then used to swap the bounds if
806   the loop counts downwards.
807   Return the target loopnest.  */
808
809static lambda_loopnest
810lambda_compute_target_space (lambda_loopnest auxillary_nest,
811			     lambda_trans_matrix H, lambda_vector stepsigns)
812{
813  lambda_matrix inverse, H1;
814  int determinant, i, j;
815  int gcd1, gcd2;
816  int factor;
817
818  lambda_loopnest target_nest;
819  int depth, invariants;
820  lambda_matrix target;
821
822  lambda_loop auxillary_loop, target_loop;
823  lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
824
825  depth = LN_DEPTH (auxillary_nest);
826  invariants = LN_INVARIANTS (auxillary_nest);
827
828  inverse = lambda_matrix_new (depth, depth);
829  determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
830
831  /* H1 is H excluding its diagonal.  */
832  H1 = lambda_matrix_new (depth, depth);
833  lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
834
835  for (i = 0; i < depth; i++)
836    H1[i][i] = 0;
837
838  /* Computes the linear offsets of the loop bounds.  */
839  target = lambda_matrix_new (depth, depth);
840  lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
841
842  target_nest = lambda_loopnest_new (depth, invariants);
843
844  for (i = 0; i < depth; i++)
845    {
846
847      /* Get a new loop structure.  */
848      target_loop = lambda_loop_new ();
849      LN_LOOPS (target_nest)[i] = target_loop;
850
851      /* Computes the gcd of the coefficients of the linear part.  */
852      gcd1 = gcd_vector (target[i], i);
853
854      /* Include the denominator in the GCD.  */
855      gcd1 = gcd (gcd1, determinant);
856
857      /* Now divide through by the gcd.  */
858      for (j = 0; j < i; j++)
859	target[i][j] = target[i][j] / gcd1;
860
861      expression = lambda_linear_expression_new (depth, invariants);
862      lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
863      LLE_DENOMINATOR (expression) = determinant / gcd1;
864      LLE_CONSTANT (expression) = 0;
865      lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
866			   invariants);
867      LL_LINEAR_OFFSET (target_loop) = expression;
868    }
869
870  /* For each loop, compute the new bounds from H.  */
871  for (i = 0; i < depth; i++)
872    {
873      auxillary_loop = LN_LOOPS (auxillary_nest)[i];
874      target_loop = LN_LOOPS (target_nest)[i];
875      LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
876      factor = LTM_MATRIX (H)[i][i];
877
878      /* First we do the lower bound.  */
879      auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
880
881      for (; auxillary_expr != NULL;
882	   auxillary_expr = LLE_NEXT (auxillary_expr))
883	{
884	  target_expr = lambda_linear_expression_new (depth, invariants);
885	  lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
886				     depth, inverse, depth,
887				     LLE_COEFFICIENTS (target_expr));
888	  lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
889				    LLE_COEFFICIENTS (target_expr), depth,
890				    factor);
891
892	  LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
893	  lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
894			      LLE_INVARIANT_COEFFICIENTS (target_expr),
895			      invariants);
896	  lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
897				    LLE_INVARIANT_COEFFICIENTS (target_expr),
898				    invariants, factor);
899	  LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
900
901	  if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
902	    {
903	      LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
904		* determinant;
905	      lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
906					(target_expr),
907					LLE_INVARIANT_COEFFICIENTS
908					(target_expr), invariants,
909					determinant);
910	      LLE_DENOMINATOR (target_expr) =
911		LLE_DENOMINATOR (target_expr) * determinant;
912	    }
913	  /* Find the gcd and divide by it here, rather than doing it
914	     at the tree level.  */
915	  gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
916	  gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
917			     invariants);
918	  gcd1 = gcd (gcd1, gcd2);
919	  gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
920	  gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
921	  for (j = 0; j < depth; j++)
922	    LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
923	  for (j = 0; j < invariants; j++)
924	    LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
925	  LLE_CONSTANT (target_expr) /= gcd1;
926	  LLE_DENOMINATOR (target_expr) /= gcd1;
927	  /* Ignore if identical to existing bound.  */
928	  if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
929			  invariants))
930	    {
931	      LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
932	      LL_LOWER_BOUND (target_loop) = target_expr;
933	    }
934	}
935      /* Now do the upper bound.  */
936      auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
937
938      for (; auxillary_expr != NULL;
939	   auxillary_expr = LLE_NEXT (auxillary_expr))
940	{
941	  target_expr = lambda_linear_expression_new (depth, invariants);
942	  lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
943				     depth, inverse, depth,
944				     LLE_COEFFICIENTS (target_expr));
945	  lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
946				    LLE_COEFFICIENTS (target_expr), depth,
947				    factor);
948	  LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
949	  lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
950			      LLE_INVARIANT_COEFFICIENTS (target_expr),
951			      invariants);
952	  lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
953				    LLE_INVARIANT_COEFFICIENTS (target_expr),
954				    invariants, factor);
955	  LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
956
957	  if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
958	    {
959	      LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
960		* determinant;
961	      lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
962					(target_expr),
963					LLE_INVARIANT_COEFFICIENTS
964					(target_expr), invariants,
965					determinant);
966	      LLE_DENOMINATOR (target_expr) =
967		LLE_DENOMINATOR (target_expr) * determinant;
968	    }
969	  /* Find the gcd and divide by it here, instead of at the
970	     tree level.  */
971	  gcd1 = gcd_vector (LLE_COEFFICIENTS (target_expr), depth);
972	  gcd2 = gcd_vector (LLE_INVARIANT_COEFFICIENTS (target_expr),
973			     invariants);
974	  gcd1 = gcd (gcd1, gcd2);
975	  gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
976	  gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
977	  for (j = 0; j < depth; j++)
978	    LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
979	  for (j = 0; j < invariants; j++)
980	    LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
981	  LLE_CONSTANT (target_expr) /= gcd1;
982	  LLE_DENOMINATOR (target_expr) /= gcd1;
983	  /* Ignore if equal to existing bound.  */
984	  if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
985			  invariants))
986	    {
987	      LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
988	      LL_UPPER_BOUND (target_loop) = target_expr;
989	    }
990	}
991    }
992  for (i = 0; i < depth; i++)
993    {
994      target_loop = LN_LOOPS (target_nest)[i];
995      /* If necessary, exchange the upper and lower bounds and negate
996         the step size.  */
997      if (stepsigns[i] < 0)
998	{
999	  LL_STEP (target_loop) *= -1;
1000	  tmp_expr = LL_LOWER_BOUND (target_loop);
1001	  LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
1002	  LL_UPPER_BOUND (target_loop) = tmp_expr;
1003	}
1004    }
1005  return target_nest;
1006}
1007
1008/* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
1009   result.  */
1010
1011static lambda_vector
1012lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
1013{
1014  lambda_matrix matrix, H;
1015  int size;
1016  lambda_vector newsteps;
1017  int i, j, factor, minimum_column;
1018  int temp;
1019
1020  matrix = LTM_MATRIX (trans);
1021  size = LTM_ROWSIZE (trans);
1022  H = lambda_matrix_new (size, size);
1023
1024  newsteps = lambda_vector_new (size);
1025  lambda_vector_copy (stepsigns, newsteps, size);
1026
1027  lambda_matrix_copy (matrix, H, size, size);
1028
1029  for (j = 0; j < size; j++)
1030    {
1031      lambda_vector row;
1032      row = H[j];
1033      for (i = j; i < size; i++)
1034	if (row[i] < 0)
1035	  lambda_matrix_col_negate (H, size, i);
1036      while (lambda_vector_first_nz (row, size, j + 1) < size)
1037	{
1038	  minimum_column = lambda_vector_min_nz (row, size, j);
1039	  lambda_matrix_col_exchange (H, size, j, minimum_column);
1040
1041	  temp = newsteps[j];
1042	  newsteps[j] = newsteps[minimum_column];
1043	  newsteps[minimum_column] = temp;
1044
1045	  for (i = j + 1; i < size; i++)
1046	    {
1047	      factor = row[i] / row[j];
1048	      lambda_matrix_col_add (H, size, j, i, -1 * factor);
1049	    }
1050	}
1051    }
1052  return newsteps;
1053}
1054
1055/* Transform NEST according to TRANS, and return the new loopnest.
1056   This involves
1057   1. Computing a lattice base for the transformation
1058   2. Composing the dense base with the specified transformation (TRANS)
1059   3. Decomposing the combined transformation into a lower triangular portion,
1060   and a unimodular portion.
1061   4. Computing the auxiliary nest using the unimodular portion.
1062   5. Computing the target nest using the auxiliary nest and the lower
1063   triangular portion.  */
1064
1065lambda_loopnest
1066lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
1067{
1068  lambda_loopnest auxillary_nest, target_nest;
1069
1070  int depth, invariants;
1071  int i, j;
1072  lambda_lattice lattice;
1073  lambda_trans_matrix trans1, H, U;
1074  lambda_loop loop;
1075  lambda_linear_expression expression;
1076  lambda_vector origin;
1077  lambda_matrix origin_invariants;
1078  lambda_vector stepsigns;
1079  int f;
1080
1081  depth = LN_DEPTH (nest);
1082  invariants = LN_INVARIANTS (nest);
1083
1084  /* Keep track of the signs of the loop steps.  */
1085  stepsigns = lambda_vector_new (depth);
1086  for (i = 0; i < depth; i++)
1087    {
1088      if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
1089	stepsigns[i] = 1;
1090      else
1091	stepsigns[i] = -1;
1092    }
1093
1094  /* Compute the lattice base.  */
1095  lattice = lambda_lattice_compute_base (nest);
1096  trans1 = lambda_trans_matrix_new (depth, depth);
1097
1098  /* Multiply the transformation matrix by the lattice base.  */
1099
1100  lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
1101		      LTM_MATRIX (trans1), depth, depth, depth);
1102
1103  /* Compute the Hermite normal form for the new transformation matrix.  */
1104  H = lambda_trans_matrix_new (depth, depth);
1105  U = lambda_trans_matrix_new (depth, depth);
1106  lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
1107			 LTM_MATRIX (U));
1108
1109  /* Compute the auxiliary loop nest's space from the unimodular
1110     portion.  */
1111  auxillary_nest = lambda_compute_auxillary_space (nest, U);
1112
1113  /* Compute the loop step signs from the old step signs and the
1114     transformation matrix.  */
1115  stepsigns = lambda_compute_step_signs (trans1, stepsigns);
1116
1117  /* Compute the target loop nest space from the auxiliary nest and
1118     the lower triangular matrix H.  */
1119  target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
1120  origin = lambda_vector_new (depth);
1121  origin_invariants = lambda_matrix_new (depth, invariants);
1122  lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
1123			     LATTICE_ORIGIN (lattice), origin);
1124  lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
1125		      origin_invariants, depth, depth, invariants);
1126
1127  for (i = 0; i < depth; i++)
1128    {
1129      loop = LN_LOOPS (target_nest)[i];
1130      expression = LL_LINEAR_OFFSET (loop);
1131      if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
1132	f = 1;
1133      else
1134	f = LLE_DENOMINATOR (expression);
1135
1136      LLE_CONSTANT (expression) += f * origin[i];
1137
1138      for (j = 0; j < invariants; j++)
1139	LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
1140	  f * origin_invariants[i][j];
1141    }
1142
1143  return target_nest;
1144
1145}
1146
1147/* Convert a gcc tree expression EXPR to a lambda linear expression, and
1148   return the new expression.  DEPTH is the depth of the loopnest.
1149   OUTERINDUCTIONVARS is an array of the induction variables for outer loops
1150   in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
1151   is the amount we have to add/subtract from the expression because of the
1152   type of comparison it is used in.  */
1153
1154static lambda_linear_expression
1155gcc_tree_to_linear_expression (int depth, tree expr,
1156			       VEC(tree,heap) *outerinductionvars,
1157			       VEC(tree,heap) *invariants, int extra)
1158{
1159  lambda_linear_expression lle = NULL;
1160  switch (TREE_CODE (expr))
1161    {
1162    case INTEGER_CST:
1163      {
1164	lle = lambda_linear_expression_new (depth, 2 * depth);
1165	LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
1166	if (extra != 0)
1167	  LLE_CONSTANT (lle) += extra;
1168
1169	LLE_DENOMINATOR (lle) = 1;
1170      }
1171      break;
1172    case SSA_NAME:
1173      {
1174	tree iv, invar;
1175	size_t i;
1176	for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
1177	  if (iv != NULL)
1178	    {
1179	      if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
1180		{
1181		  lle = lambda_linear_expression_new (depth, 2 * depth);
1182		  LLE_COEFFICIENTS (lle)[i] = 1;
1183		  if (extra != 0)
1184		    LLE_CONSTANT (lle) = extra;
1185
1186		  LLE_DENOMINATOR (lle) = 1;
1187		}
1188	    }
1189	for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1190	  if (invar != NULL)
1191	    {
1192	      if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
1193		{
1194		  lle = lambda_linear_expression_new (depth, 2 * depth);
1195		  LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
1196		  if (extra != 0)
1197		    LLE_CONSTANT (lle) = extra;
1198		  LLE_DENOMINATOR (lle) = 1;
1199		}
1200	    }
1201      }
1202      break;
1203    default:
1204      return NULL;
1205    }
1206
1207  return lle;
1208}
1209
1210/* Return the depth of the loopnest NEST */
1211
1212static int
1213depth_of_nest (struct loop *nest)
1214{
1215  size_t depth = 0;
1216  while (nest)
1217    {
1218      depth++;
1219      nest = nest->inner;
1220    }
1221  return depth;
1222}
1223
1224
1225/* Return true if OP is invariant in LOOP and all outer loops.  */
1226
1227static bool
1228invariant_in_loop_and_outer_loops (struct loop *loop, tree op)
1229{
1230  if (is_gimple_min_invariant (op))
1231    return true;
1232  if (loop->depth == 0)
1233    return true;
1234  if (!expr_invariant_in_loop_p (loop, op))
1235    return false;
1236  if (loop->outer
1237      && !invariant_in_loop_and_outer_loops (loop->outer, op))
1238    return false;
1239  return true;
1240}
1241
1242/* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
1243   or NULL if it could not be converted.
1244   DEPTH is the depth of the loop.
1245   INVARIANTS is a pointer to the array of loop invariants.
1246   The induction variable for this loop should be stored in the parameter
1247   OURINDUCTIONVAR.
1248   OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
1249
1250static lambda_loop
1251gcc_loop_to_lambda_loop (struct loop *loop, int depth,
1252			 VEC(tree,heap) ** invariants,
1253			 tree * ourinductionvar,
1254			 VEC(tree,heap) * outerinductionvars,
1255			 VEC(tree,heap) ** lboundvars,
1256			 VEC(tree,heap) ** uboundvars,
1257			 VEC(int,heap) ** steps)
1258{
1259  tree phi;
1260  tree exit_cond;
1261  tree access_fn, inductionvar;
1262  tree step;
1263  lambda_loop lloop = NULL;
1264  lambda_linear_expression lbound, ubound;
1265  tree test;
1266  int stepint;
1267  int extra = 0;
1268  tree lboundvar, uboundvar, uboundresult;
1269
1270  /* Find out induction var and exit condition.  */
1271  inductionvar = find_induction_var_from_exit_cond (loop);
1272  exit_cond = get_loop_exit_condition (loop);
1273
1274  if (inductionvar == NULL || exit_cond == NULL)
1275    {
1276      if (dump_file && (dump_flags & TDF_DETAILS))
1277	fprintf (dump_file,
1278		 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
1279      return NULL;
1280    }
1281
1282  test = TREE_OPERAND (exit_cond, 0);
1283
1284  if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
1285    {
1286
1287      if (dump_file && (dump_flags & TDF_DETAILS))
1288	fprintf (dump_file,
1289		 "Unable to convert loop: Cannot find PHI node for induction variable\n");
1290
1291      return NULL;
1292    }
1293
1294  phi = SSA_NAME_DEF_STMT (inductionvar);
1295  if (TREE_CODE (phi) != PHI_NODE)
1296    {
1297      phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
1298      if (!phi)
1299	{
1300
1301	  if (dump_file && (dump_flags & TDF_DETAILS))
1302	    fprintf (dump_file,
1303		     "Unable to convert loop: Cannot find PHI node for induction variable\n");
1304
1305	  return NULL;
1306	}
1307
1308      phi = SSA_NAME_DEF_STMT (phi);
1309      if (TREE_CODE (phi) != PHI_NODE)
1310	{
1311
1312	  if (dump_file && (dump_flags & TDF_DETAILS))
1313	    fprintf (dump_file,
1314		     "Unable to convert loop: Cannot find PHI node for induction variable\n");
1315	  return NULL;
1316	}
1317
1318    }
1319
1320  /* The induction variable name/version we want to put in the array is the
1321     result of the induction variable phi node.  */
1322  *ourinductionvar = PHI_RESULT (phi);
1323  access_fn = instantiate_parameters
1324    (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1325  if (access_fn == chrec_dont_know)
1326    {
1327      if (dump_file && (dump_flags & TDF_DETAILS))
1328	fprintf (dump_file,
1329		 "Unable to convert loop: Access function for induction variable phi is unknown\n");
1330
1331      return NULL;
1332    }
1333
1334  step = evolution_part_in_loop_num (access_fn, loop->num);
1335  if (!step || step == chrec_dont_know)
1336    {
1337      if (dump_file && (dump_flags & TDF_DETAILS))
1338	fprintf (dump_file,
1339		 "Unable to convert loop: Cannot determine step of loop.\n");
1340
1341      return NULL;
1342    }
1343  if (TREE_CODE (step) != INTEGER_CST)
1344    {
1345
1346      if (dump_file && (dump_flags & TDF_DETAILS))
1347	fprintf (dump_file,
1348		 "Unable to convert loop: Step of loop is not integer.\n");
1349      return NULL;
1350    }
1351
1352  stepint = TREE_INT_CST_LOW (step);
1353
1354  /* Only want phis for induction vars, which will have two
1355     arguments.  */
1356  if (PHI_NUM_ARGS (phi) != 2)
1357    {
1358      if (dump_file && (dump_flags & TDF_DETAILS))
1359	fprintf (dump_file,
1360		 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
1361      return NULL;
1362    }
1363
1364  /* Another induction variable check. One argument's source should be
1365     in the loop, one outside the loop.  */
1366  if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
1367      && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
1368    {
1369
1370      if (dump_file && (dump_flags & TDF_DETAILS))
1371	fprintf (dump_file,
1372		 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
1373
1374      return NULL;
1375    }
1376
1377  if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
1378    {
1379      lboundvar = PHI_ARG_DEF (phi, 1);
1380      lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1381					      outerinductionvars, *invariants,
1382					      0);
1383    }
1384  else
1385    {
1386      lboundvar = PHI_ARG_DEF (phi, 0);
1387      lbound = gcc_tree_to_linear_expression (depth, lboundvar,
1388					      outerinductionvars, *invariants,
1389					      0);
1390    }
1391
1392  if (!lbound)
1393    {
1394
1395      if (dump_file && (dump_flags & TDF_DETAILS))
1396	fprintf (dump_file,
1397		 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
1398
1399      return NULL;
1400    }
1401  /* One part of the test may be a loop invariant tree.  */
1402  VEC_reserve (tree, heap, *invariants, 1);
1403  if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
1404      && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
1405    VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
1406  else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
1407	   && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
1408    VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
1409
1410  /* The non-induction variable part of the test is the upper bound variable.
1411   */
1412  if (TREE_OPERAND (test, 0) == inductionvar)
1413    uboundvar = TREE_OPERAND (test, 1);
1414  else
1415    uboundvar = TREE_OPERAND (test, 0);
1416
1417
1418  /* We only size the vectors assuming we have, at max, 2 times as many
1419     invariants as we do loops (one for each bound).
1420     This is just an arbitrary number, but it has to be matched against the
1421     code below.  */
1422  gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
1423
1424
1425  /* We might have some leftover.  */
1426  if (TREE_CODE (test) == LT_EXPR)
1427    extra = -1 * stepint;
1428  else if (TREE_CODE (test) == NE_EXPR)
1429    extra = -1 * stepint;
1430  else if (TREE_CODE (test) == GT_EXPR)
1431    extra = -1 * stepint;
1432  else if (TREE_CODE (test) == EQ_EXPR)
1433    extra = 1 * stepint;
1434
1435  ubound = gcc_tree_to_linear_expression (depth, uboundvar,
1436					  outerinductionvars,
1437					  *invariants, extra);
1438  uboundresult = build (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
1439			build_int_cst (TREE_TYPE (uboundvar), extra));
1440  VEC_safe_push (tree, heap, *uboundvars, uboundresult);
1441  VEC_safe_push (tree, heap, *lboundvars, lboundvar);
1442  VEC_safe_push (int, heap, *steps, stepint);
1443  if (!ubound)
1444    {
1445      if (dump_file && (dump_flags & TDF_DETAILS))
1446	fprintf (dump_file,
1447		 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
1448      return NULL;
1449    }
1450
1451  lloop = lambda_loop_new ();
1452  LL_STEP (lloop) = stepint;
1453  LL_LOWER_BOUND (lloop) = lbound;
1454  LL_UPPER_BOUND (lloop) = ubound;
1455  return lloop;
1456}
1457
1458/* Given a LOOP, find the induction variable it is testing against in the exit
1459   condition.  Return the induction variable if found, NULL otherwise.  */
1460
1461static tree
1462find_induction_var_from_exit_cond (struct loop *loop)
1463{
1464  tree expr = get_loop_exit_condition (loop);
1465  tree ivarop;
1466  tree test;
1467  if (expr == NULL_TREE)
1468    return NULL_TREE;
1469  if (TREE_CODE (expr) != COND_EXPR)
1470    return NULL_TREE;
1471  test = TREE_OPERAND (expr, 0);
1472  if (!COMPARISON_CLASS_P (test))
1473    return NULL_TREE;
1474
1475  /* Find the side that is invariant in this loop. The ivar must be the other
1476     side.  */
1477
1478  if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
1479      ivarop = TREE_OPERAND (test, 1);
1480  else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
1481      ivarop = TREE_OPERAND (test, 0);
1482  else
1483    return NULL_TREE;
1484
1485  if (TREE_CODE (ivarop) != SSA_NAME)
1486    return NULL_TREE;
1487  return ivarop;
1488}
1489
1490DEF_VEC_P(lambda_loop);
1491DEF_VEC_ALLOC_P(lambda_loop,heap);
1492
1493/* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
1494   Return the new loop nest.
1495   INDUCTIONVARS is a pointer to an array of induction variables for the
1496   loopnest that will be filled in during this process.
1497   INVARIANTS is a pointer to an array of invariants that will be filled in
1498   during this process.  */
1499
1500lambda_loopnest
1501gcc_loopnest_to_lambda_loopnest (struct loops *currloops,
1502				 struct loop *loop_nest,
1503				 VEC(tree,heap) **inductionvars,
1504				 VEC(tree,heap) **invariants)
1505{
1506  lambda_loopnest ret = NULL;
1507  struct loop *temp = loop_nest;
1508  int depth = depth_of_nest (loop_nest);
1509  size_t i;
1510  VEC(lambda_loop,heap) *loops = NULL;
1511  VEC(tree,heap) *uboundvars = NULL;
1512  VEC(tree,heap) *lboundvars  = NULL;
1513  VEC(int,heap) *steps = NULL;
1514  lambda_loop newloop;
1515  tree inductionvar = NULL;
1516  bool perfect_nest = perfect_nest_p (loop_nest);
1517
1518  if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
1519    goto fail;
1520
1521  while (temp)
1522    {
1523      newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
1524					 &inductionvar, *inductionvars,
1525					 &lboundvars, &uboundvars,
1526					 &steps);
1527      if (!newloop)
1528	goto fail;
1529
1530      VEC_safe_push (tree, heap, *inductionvars, inductionvar);
1531      VEC_safe_push (lambda_loop, heap, loops, newloop);
1532      temp = temp->inner;
1533    }
1534
1535  if (!perfect_nest)
1536    {
1537      if (!perfect_nestify (currloops, loop_nest,
1538			    lboundvars, uboundvars, steps, *inductionvars))
1539	{
1540	  if (dump_file)
1541	    fprintf (dump_file,
1542		     "Not a perfect loop nest and couldn't convert to one.\n");
1543	  goto fail;
1544	}
1545      else if (dump_file)
1546	fprintf (dump_file,
1547		 "Successfully converted loop nest to perfect loop nest.\n");
1548    }
1549
1550  ret = lambda_loopnest_new (depth, 2 * depth);
1551
1552  for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
1553    LN_LOOPS (ret)[i] = newloop;
1554
1555 fail:
1556  VEC_free (lambda_loop, heap, loops);
1557  VEC_free (tree, heap, uboundvars);
1558  VEC_free (tree, heap, lboundvars);
1559  VEC_free (int, heap, steps);
1560
1561  return ret;
1562}
1563
1564/* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
1565   STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
1566   inserted for us are stored.  INDUCTION_VARS is the array of induction
1567   variables for the loop this LBV is from.  TYPE is the tree type to use for
1568   the variables and trees involved.  */
1569
1570static tree
1571lbv_to_gcc_expression (lambda_body_vector lbv,
1572		       tree type, VEC(tree,heap) *induction_vars,
1573		       tree *stmts_to_insert)
1574{
1575  tree stmts, stmt, resvar, name;
1576  tree iv;
1577  size_t i;
1578  tree_stmt_iterator tsi;
1579
1580  /* Create a statement list and a linear expression temporary.  */
1581  stmts = alloc_stmt_list ();
1582  resvar = create_tmp_var (type, "lbvtmp");
1583  add_referenced_tmp_var (resvar);
1584
1585  /* Start at 0.  */
1586  stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1587  name = make_ssa_name (resvar, stmt);
1588  TREE_OPERAND (stmt, 0) = name;
1589  tsi = tsi_last (stmts);
1590  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1591
1592  for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1593    {
1594      if (LBV_COEFFICIENTS (lbv)[i] != 0)
1595	{
1596	  tree newname;
1597	  tree coeffmult;
1598
1599	  /* newname = coefficient * induction_variable */
1600	  coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
1601	  stmt = build (MODIFY_EXPR, void_type_node, resvar,
1602			fold_build2 (MULT_EXPR, type, iv, coeffmult));
1603
1604	  newname = make_ssa_name (resvar, stmt);
1605	  TREE_OPERAND (stmt, 0) = newname;
1606	  fold_stmt (&stmt);
1607	  tsi = tsi_last (stmts);
1608	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1609
1610	  /* name = name + newname */
1611	  stmt = build (MODIFY_EXPR, void_type_node, resvar,
1612			build (PLUS_EXPR, type, name, newname));
1613	  name = make_ssa_name (resvar, stmt);
1614	  TREE_OPERAND (stmt, 0) = name;
1615	  fold_stmt (&stmt);
1616	  tsi = tsi_last (stmts);
1617	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1618
1619	}
1620    }
1621
1622  /* Handle any denominator that occurs.  */
1623  if (LBV_DENOMINATOR (lbv) != 1)
1624    {
1625      tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
1626      stmt = build (MODIFY_EXPR, void_type_node, resvar,
1627		    build (CEIL_DIV_EXPR, type, name, denominator));
1628      name = make_ssa_name (resvar, stmt);
1629      TREE_OPERAND (stmt, 0) = name;
1630      fold_stmt (&stmt);
1631      tsi = tsi_last (stmts);
1632      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1633    }
1634  *stmts_to_insert = stmts;
1635  return name;
1636}
1637
1638/* Convert a linear expression from coefficient and constant form to a
1639   gcc tree.
1640   Return the tree that represents the final value of the expression.
1641   LLE is the linear expression to convert.
1642   OFFSET is the linear offset to apply to the expression.
1643   TYPE is the tree type to use for the variables and math.
1644   INDUCTION_VARS is a vector of induction variables for the loops.
1645   INVARIANTS is a vector of the loop nest invariants.
1646   WRAP specifies what tree code to wrap the results in, if there is more than
1647   one (it is either MAX_EXPR, or MIN_EXPR).
1648   STMTS_TO_INSERT Is a pointer to the statement list we fill in with
1649   statements that need to be inserted for the linear expression.  */
1650
1651static tree
1652lle_to_gcc_expression (lambda_linear_expression lle,
1653		       lambda_linear_expression offset,
1654		       tree type,
1655		       VEC(tree,heap) *induction_vars,
1656		       VEC(tree,heap) *invariants,
1657		       enum tree_code wrap, tree *stmts_to_insert)
1658{
1659  tree stmts, stmt, resvar, name;
1660  size_t i;
1661  tree_stmt_iterator tsi;
1662  tree iv, invar;
1663  VEC(tree,heap) *results = NULL;
1664
1665  gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
1666  name = NULL_TREE;
1667  /* Create a statement list and a linear expression temporary.  */
1668  stmts = alloc_stmt_list ();
1669  resvar = create_tmp_var (type, "lletmp");
1670  add_referenced_tmp_var (resvar);
1671
1672  /* Build up the linear expressions, and put the variable representing the
1673     result in the results array.  */
1674  for (; lle != NULL; lle = LLE_NEXT (lle))
1675    {
1676      /* Start at name = 0.  */
1677      stmt = build (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
1678      name = make_ssa_name (resvar, stmt);
1679      TREE_OPERAND (stmt, 0) = name;
1680      fold_stmt (&stmt);
1681      tsi = tsi_last (stmts);
1682      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1683
1684      /* First do the induction variables.
1685         at the end, name = name + all the induction variables added
1686         together.  */
1687      for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
1688	{
1689	  if (LLE_COEFFICIENTS (lle)[i] != 0)
1690	    {
1691	      tree newname;
1692	      tree mult;
1693	      tree coeff;
1694
1695	      /* mult = induction variable * coefficient.  */
1696	      if (LLE_COEFFICIENTS (lle)[i] == 1)
1697		{
1698		  mult = VEC_index (tree, induction_vars, i);
1699		}
1700	      else
1701		{
1702		  coeff = build_int_cst (type,
1703					 LLE_COEFFICIENTS (lle)[i]);
1704		  mult = fold_build2 (MULT_EXPR, type, iv, coeff);
1705		}
1706
1707	      /* newname = mult */
1708	      stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1709	      newname = make_ssa_name (resvar, stmt);
1710	      TREE_OPERAND (stmt, 0) = newname;
1711	      fold_stmt (&stmt);
1712	      tsi = tsi_last (stmts);
1713	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1714
1715	      /* name = name + newname */
1716	      stmt = build (MODIFY_EXPR, void_type_node, resvar,
1717			    build (PLUS_EXPR, type, name, newname));
1718	      name = make_ssa_name (resvar, stmt);
1719	      TREE_OPERAND (stmt, 0) = name;
1720	      fold_stmt (&stmt);
1721	      tsi = tsi_last (stmts);
1722	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1723	    }
1724	}
1725
1726      /* Handle our invariants.
1727         At the end, we have name = name + result of adding all multiplied
1728         invariants.  */
1729      for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
1730	{
1731	  if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
1732	    {
1733	      tree newname;
1734	      tree mult;
1735	      tree coeff;
1736	      int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
1737	      /* mult = invariant * coefficient  */
1738	      if (invcoeff == 1)
1739		{
1740		  mult = invar;
1741		}
1742	      else
1743		{
1744		  coeff = build_int_cst (type, invcoeff);
1745		  mult = fold_build2 (MULT_EXPR, type, invar, coeff);
1746		}
1747
1748	      /* newname = mult */
1749	      stmt = build (MODIFY_EXPR, void_type_node, resvar, mult);
1750	      newname = make_ssa_name (resvar, stmt);
1751	      TREE_OPERAND (stmt, 0) = newname;
1752	      fold_stmt (&stmt);
1753	      tsi = tsi_last (stmts);
1754	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1755
1756	      /* name = name + newname */
1757	      stmt = build (MODIFY_EXPR, void_type_node, resvar,
1758			    build (PLUS_EXPR, type, name, newname));
1759	      name = make_ssa_name (resvar, stmt);
1760	      TREE_OPERAND (stmt, 0) = name;
1761	      fold_stmt (&stmt);
1762	      tsi = tsi_last (stmts);
1763	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1764	    }
1765	}
1766
1767      /* Now handle the constant.
1768         name = name + constant.  */
1769      if (LLE_CONSTANT (lle) != 0)
1770	{
1771	  stmt = build (MODIFY_EXPR, void_type_node, resvar,
1772			build (PLUS_EXPR, type, name,
1773			       build_int_cst (type, LLE_CONSTANT (lle))));
1774	  name = make_ssa_name (resvar, stmt);
1775	  TREE_OPERAND (stmt, 0) = name;
1776	  fold_stmt (&stmt);
1777	  tsi = tsi_last (stmts);
1778	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1779	}
1780
1781      /* Now handle the offset.
1782         name = name + linear offset.  */
1783      if (LLE_CONSTANT (offset) != 0)
1784	{
1785	  stmt = build (MODIFY_EXPR, void_type_node, resvar,
1786			build (PLUS_EXPR, type, name,
1787			       build_int_cst (type, LLE_CONSTANT (offset))));
1788	  name = make_ssa_name (resvar, stmt);
1789	  TREE_OPERAND (stmt, 0) = name;
1790	  fold_stmt (&stmt);
1791	  tsi = tsi_last (stmts);
1792	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1793	}
1794
1795      /* Handle any denominator that occurs.  */
1796      if (LLE_DENOMINATOR (lle) != 1)
1797	{
1798	  stmt = build_int_cst (type, LLE_DENOMINATOR (lle));
1799	  stmt = build (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
1800			type, name, stmt);
1801	  stmt = build (MODIFY_EXPR, void_type_node, resvar, stmt);
1802
1803	  /* name = {ceil, floor}(name/denominator) */
1804	  name = make_ssa_name (resvar, stmt);
1805	  TREE_OPERAND (stmt, 0) = name;
1806	  tsi = tsi_last (stmts);
1807	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1808	}
1809      VEC_safe_push (tree, heap, results, name);
1810    }
1811
1812  /* Again, out of laziness, we don't handle this case yet.  It's not
1813     hard, it just hasn't occurred.  */
1814  gcc_assert (VEC_length (tree, results) <= 2);
1815
1816  /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
1817  if (VEC_length (tree, results) > 1)
1818    {
1819      tree op1 = VEC_index (tree, results, 0);
1820      tree op2 = VEC_index (tree, results, 1);
1821      stmt = build (MODIFY_EXPR, void_type_node, resvar,
1822		    build (wrap, type, op1, op2));
1823      name = make_ssa_name (resvar, stmt);
1824      TREE_OPERAND (stmt, 0) = name;
1825      tsi = tsi_last (stmts);
1826      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
1827    }
1828
1829  VEC_free (tree, heap, results);
1830
1831  *stmts_to_insert = stmts;
1832  return name;
1833}
1834
1835/* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
1836   it, back into gcc code.  This changes the
1837   loops, their induction variables, and their bodies, so that they
1838   match the transformed loopnest.
1839   OLD_LOOPNEST is the loopnest before we've replaced it with the new
1840   loopnest.
1841   OLD_IVS is a vector of induction variables from the old loopnest.
1842   INVARIANTS is a vector of loop invariants from the old loopnest.
1843   NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
1844   TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
1845   NEW_LOOPNEST.  */
1846
1847void
1848lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
1849				 VEC(tree,heap) *old_ivs,
1850				 VEC(tree,heap) *invariants,
1851				 lambda_loopnest new_loopnest,
1852				 lambda_trans_matrix transform)
1853{
1854  struct loop *temp;
1855  size_t i = 0;
1856  size_t depth = 0;
1857  VEC(tree,heap) *new_ivs = NULL;
1858  tree oldiv;
1859
1860  block_stmt_iterator bsi;
1861
1862  if (dump_file)
1863    {
1864      transform = lambda_trans_matrix_inverse (transform);
1865      fprintf (dump_file, "Inverse of transformation matrix:\n");
1866      print_lambda_trans_matrix (dump_file, transform);
1867    }
1868  depth = depth_of_nest (old_loopnest);
1869  temp = old_loopnest;
1870
1871  while (temp)
1872    {
1873      lambda_loop newloop;
1874      basic_block bb;
1875      edge exit;
1876      tree ivvar, ivvarinced, exitcond, stmts;
1877      enum tree_code testtype;
1878      tree newupperbound, newlowerbound;
1879      lambda_linear_expression offset;
1880      tree type;
1881      bool insert_after;
1882      tree inc_stmt;
1883
1884      oldiv = VEC_index (tree, old_ivs, i);
1885      type = TREE_TYPE (oldiv);
1886
1887      /* First, build the new induction variable temporary  */
1888
1889      ivvar = create_tmp_var (type, "lnivtmp");
1890      add_referenced_tmp_var (ivvar);
1891
1892      VEC_safe_push (tree, heap, new_ivs, ivvar);
1893
1894      newloop = LN_LOOPS (new_loopnest)[i];
1895
1896      /* Linear offset is a bit tricky to handle.  Punt on the unhandled
1897         cases for now.  */
1898      offset = LL_LINEAR_OFFSET (newloop);
1899
1900      gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
1901		  lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
1902
1903      /* Now build the  new lower bounds, and insert the statements
1904         necessary to generate it on the loop preheader.  */
1905      newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
1906					     LL_LINEAR_OFFSET (newloop),
1907					     type,
1908					     new_ivs,
1909					     invariants, MAX_EXPR, &stmts);
1910      bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
1911      bsi_commit_edge_inserts ();
1912      /* Build the new upper bound and insert its statements in the
1913         basic block of the exit condition */
1914      newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
1915					     LL_LINEAR_OFFSET (newloop),
1916					     type,
1917					     new_ivs,
1918					     invariants, MIN_EXPR, &stmts);
1919      exit = temp->single_exit;
1920      exitcond = get_loop_exit_condition (temp);
1921      bb = bb_for_stmt (exitcond);
1922      bsi = bsi_start (bb);
1923      bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
1924
1925      /* Create the new iv.  */
1926
1927      standard_iv_increment_position (temp, &bsi, &insert_after);
1928      create_iv (newlowerbound,
1929		 build_int_cst (type, LL_STEP (newloop)),
1930		 ivvar, temp, &bsi, insert_after, &ivvar,
1931		 NULL);
1932
1933      /* Unfortunately, the incremented ivvar that create_iv inserted may not
1934	 dominate the block containing the exit condition.
1935	 So we simply create our own incremented iv to use in the new exit
1936	 test,  and let redundancy elimination sort it out.  */
1937      inc_stmt = build (PLUS_EXPR, type,
1938			ivvar, build_int_cst (type, LL_STEP (newloop)));
1939      inc_stmt = build (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
1940			inc_stmt);
1941      ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
1942      TREE_OPERAND (inc_stmt, 0) = ivvarinced;
1943      bsi = bsi_for_stmt (exitcond);
1944      bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
1945
1946      /* Replace the exit condition with the new upper bound
1947         comparison.  */
1948
1949      testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
1950
1951      /* We want to build a conditional where true means exit the loop, and
1952	 false means continue the loop.
1953	 So swap the testtype if this isn't the way things are.*/
1954
1955      if (exit->flags & EDGE_FALSE_VALUE)
1956	testtype = swap_tree_comparison (testtype);
1957
1958      COND_EXPR_COND (exitcond) = build (testtype,
1959					 boolean_type_node,
1960					 newupperbound, ivvarinced);
1961      update_stmt (exitcond);
1962      VEC_replace (tree, new_ivs, i, ivvar);
1963
1964      i++;
1965      temp = temp->inner;
1966    }
1967
1968  /* Rewrite uses of the old ivs so that they are now specified in terms of
1969     the new ivs.  */
1970
1971  for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
1972    {
1973      imm_use_iterator imm_iter;
1974      use_operand_p imm_use;
1975      tree oldiv_def;
1976      tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
1977
1978      if (TREE_CODE (oldiv_stmt) == PHI_NODE)
1979        oldiv_def = PHI_RESULT (oldiv_stmt);
1980      else
1981	oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
1982      gcc_assert (oldiv_def != NULL_TREE);
1983
1984      FOR_EACH_IMM_USE_SAFE (imm_use, imm_iter, oldiv_def)
1985	{
1986	  tree stmt = USE_STMT (imm_use);
1987	  use_operand_p use_p;
1988	  ssa_op_iter iter;
1989	  gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1990	  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
1991	    {
1992	      if (USE_FROM_PTR (use_p) == oldiv)
1993		{
1994		  tree newiv, stmts;
1995		  lambda_body_vector lbv, newlbv;
1996		  /* Compute the new expression for the induction
1997		     variable.  */
1998		  depth = VEC_length (tree, new_ivs);
1999		  lbv = lambda_body_vector_new (depth);
2000		  LBV_COEFFICIENTS (lbv)[i] = 1;
2001
2002		  newlbv = lambda_body_vector_compute_new (transform, lbv);
2003
2004		  newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
2005						 new_ivs, &stmts);
2006		  bsi = bsi_for_stmt (stmt);
2007		  /* Insert the statements to build that
2008		     expression.  */
2009		  bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
2010		  propagate_value (use_p, newiv);
2011		  update_stmt (stmt);
2012
2013		}
2014	    }
2015	}
2016    }
2017  VEC_free (tree, heap, new_ivs);
2018}
2019
2020/* Return TRUE if this is not interesting statement from the perspective of
2021   determining if we have a perfect loop nest.  */
2022
2023static bool
2024not_interesting_stmt (tree stmt)
2025{
2026  /* Note that COND_EXPR's aren't interesting because if they were exiting the
2027     loop, we would have already failed the number of exits tests.  */
2028  if (TREE_CODE (stmt) == LABEL_EXPR
2029      || TREE_CODE (stmt) == GOTO_EXPR
2030      || TREE_CODE (stmt) == COND_EXPR)
2031    return true;
2032  return false;
2033}
2034
2035/* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
2036
2037static bool
2038phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
2039{
2040  int i;
2041  for (i = 0; i < PHI_NUM_ARGS (phi); i++)
2042    if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
2043      if (PHI_ARG_DEF (phi, i) == def)
2044	return true;
2045  return false;
2046}
2047
2048/* Return TRUE if STMT is a use of PHI_RESULT.  */
2049
2050static bool
2051stmt_uses_phi_result (tree stmt, tree phi_result)
2052{
2053  tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
2054
2055  /* This is conservatively true, because we only want SIMPLE bumpers
2056     of the form x +- constant for our pass.  */
2057  return (use == phi_result);
2058}
2059
2060/* STMT is a bumper stmt for LOOP if the version it defines is used in the
2061   in-loop-edge in a phi node, and the operand it uses is the result of that
2062   phi node.
2063   I.E. i_29 = i_3 + 1
2064        i_3 = PHI (0, i_29);  */
2065
2066static bool
2067stmt_is_bumper_for_loop (struct loop *loop, tree stmt)
2068{
2069  tree use;
2070  tree def;
2071  imm_use_iterator iter;
2072  use_operand_p use_p;
2073
2074  def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2075  if (!def)
2076    return false;
2077
2078  FOR_EACH_IMM_USE_FAST (use_p, iter, def)
2079    {
2080      use = USE_STMT (use_p);
2081      if (TREE_CODE (use) == PHI_NODE)
2082	{
2083	  if (phi_loop_edge_uses_def (loop, use, def))
2084	    if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
2085	      return true;
2086	}
2087    }
2088  return false;
2089}
2090
2091
2092/* Return true if LOOP is a perfect loop nest.
2093   Perfect loop nests are those loop nests where all code occurs in the
2094   innermost loop body.
2095   If S is a program statement, then
2096
2097   i.e.
2098   DO I = 1, 20
2099       S1
2100       DO J = 1, 20
2101       ...
2102       END DO
2103   END DO
2104   is not a perfect loop nest because of S1.
2105
2106   DO I = 1, 20
2107      DO J = 1, 20
2108        S1
2109	...
2110      END DO
2111   END DO
2112   is a perfect loop nest.
2113
2114   Since we don't have high level loops anymore, we basically have to walk our
2115   statements and ignore those that are there because the loop needs them (IE
2116   the induction variable increment, and jump back to the top of the loop).  */
2117
2118bool
2119perfect_nest_p (struct loop *loop)
2120{
2121  basic_block *bbs;
2122  size_t i;
2123  tree exit_cond;
2124
2125  if (!loop->inner)
2126    return true;
2127  bbs = get_loop_body (loop);
2128  exit_cond = get_loop_exit_condition (loop);
2129  for (i = 0; i < loop->num_nodes; i++)
2130    {
2131      if (bbs[i]->loop_father == loop)
2132	{
2133	  block_stmt_iterator bsi;
2134	  for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2135	    {
2136	      tree stmt = bsi_stmt (bsi);
2137	      if (stmt == exit_cond
2138		  || not_interesting_stmt (stmt)
2139		  || stmt_is_bumper_for_loop (loop, stmt))
2140		continue;
2141	      free (bbs);
2142	      return false;
2143	    }
2144	}
2145    }
2146  free (bbs);
2147  /* See if the inner loops are perfectly nested as well.  */
2148  if (loop->inner)
2149    return perfect_nest_p (loop->inner);
2150  return true;
2151}
2152
2153/* Replace the USES of X in STMT, or uses with the same step as X with Y.
2154   YINIT is the initial value of Y, REPLACEMENTS is a hash table to
2155   avoid creating duplicate temporaries and FIRSTBSI is statement
2156   iterator where new temporaries should be inserted at the beginning
2157   of body basic block.  */
2158
2159static void
2160replace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
2161				int xstep, tree y, tree yinit,
2162				htab_t replacements,
2163				block_stmt_iterator *firstbsi)
2164{
2165  ssa_op_iter iter;
2166  use_operand_p use_p;
2167
2168  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2169    {
2170      tree use = USE_FROM_PTR (use_p);
2171      tree step = NULL_TREE;
2172      tree scev, init, val, var, setstmt;
2173      struct tree_map *h, in;
2174      void **loc;
2175
2176      /* Replace uses of X with Y right away.  */
2177      if (use == x)
2178	{
2179	  SET_USE (use_p, y);
2180	  continue;
2181	}
2182
2183      scev = instantiate_parameters (loop,
2184				     analyze_scalar_evolution (loop, use));
2185
2186      if (scev == NULL || scev == chrec_dont_know)
2187	continue;
2188
2189      step = evolution_part_in_loop_num (scev, loop->num);
2190      if (step == NULL
2191	  || step == chrec_dont_know
2192	  || TREE_CODE (step) != INTEGER_CST
2193	  || int_cst_value (step) != xstep)
2194	continue;
2195
2196      /* Use REPLACEMENTS hash table to cache already created
2197	 temporaries.  */
2198      in.hash = htab_hash_pointer (use);
2199      in.from = use;
2200      h = htab_find_with_hash (replacements, &in, in.hash);
2201      if (h != NULL)
2202	{
2203	  SET_USE (use_p, h->to);
2204	  continue;
2205	}
2206
2207      /* USE which has the same step as X should be replaced
2208	 with a temporary set to Y + YINIT - INIT.  */
2209      init = initial_condition_in_loop_num (scev, loop->num);
2210      gcc_assert (init != NULL && init != chrec_dont_know);
2211      if (TREE_TYPE (use) == TREE_TYPE (y))
2212	{
2213	  val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
2214	  val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
2215	  if (val == y)
2216 	    {
2217	      /* If X has the same type as USE, the same step
2218		 and same initial value, it can be replaced by Y.  */
2219	      SET_USE (use_p, y);
2220	      continue;
2221	    }
2222	}
2223      else
2224	{
2225	  val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
2226	  val = fold_convert (TREE_TYPE (use), val);
2227	  val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
2228	}
2229
2230      /* Create a temporary variable and insert it at the beginning
2231	 of the loop body basic block, right after the PHI node
2232	 which sets Y.  */
2233      var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
2234      add_referenced_tmp_var (var);
2235      val = force_gimple_operand_bsi (firstbsi, val, false, NULL);
2236      setstmt = build2 (MODIFY_EXPR, void_type_node, var, val);
2237      var = make_ssa_name (var, setstmt);
2238      TREE_OPERAND (setstmt, 0) = var;
2239      bsi_insert_before (firstbsi, setstmt, BSI_SAME_STMT);
2240      update_stmt (setstmt);
2241      SET_USE (use_p, var);
2242      h = ggc_alloc (sizeof (struct tree_map));
2243      h->hash = in.hash;
2244      h->from = use;
2245      h->to = var;
2246      loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
2247      gcc_assert ((*(struct tree_map **)loc) == NULL);
2248      *(struct tree_map **) loc = h;
2249    }
2250}
2251
2252/* Return true if STMT is an exit PHI for LOOP */
2253
2254static bool
2255exit_phi_for_loop_p (struct loop *loop, tree stmt)
2256{
2257
2258  if (TREE_CODE (stmt) != PHI_NODE
2259      || PHI_NUM_ARGS (stmt) != 1
2260      || bb_for_stmt (stmt) != loop->single_exit->dest)
2261    return false;
2262
2263  return true;
2264}
2265
2266/* Return true if STMT can be put back into INNER, a loop by moving it to the
2267   beginning of that loop.  */
2268
2269static bool
2270can_put_in_inner_loop (struct loop *inner, tree stmt)
2271{
2272  imm_use_iterator imm_iter;
2273  use_operand_p use_p;
2274  basic_block use_bb = NULL;
2275
2276  gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
2277  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
2278      || !expr_invariant_in_loop_p (inner, TREE_OPERAND (stmt, 1)))
2279    return false;
2280
2281  /* We require that the basic block of all uses be the same, or the use be an
2282     exit phi.  */
2283  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2284    {
2285      if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
2286	{
2287	  basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2288
2289	  if (!flow_bb_inside_loop_p (inner, immbb))
2290	    return false;
2291	  if (use_bb == NULL)
2292	    use_bb = immbb;
2293	  else if (immbb != use_bb)
2294	    return false;
2295	}
2296    }
2297  return true;
2298
2299}
2300
2301/* Return true if STMT can be put *after* the inner loop of LOOP.  */
2302
2303static bool
2304can_put_after_inner_loop (struct loop *loop, tree stmt)
2305{
2306  imm_use_iterator imm_iter;
2307  use_operand_p use_p;
2308
2309  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
2310    return false;
2311
2312  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
2313    {
2314      if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
2315	{
2316	  basic_block immbb = bb_for_stmt (USE_STMT (use_p));
2317
2318	  if (!dominated_by_p (CDI_DOMINATORS,
2319			       immbb,
2320			       loop->inner->header)
2321	      && !can_put_in_inner_loop (loop->inner, stmt))
2322	    return false;
2323	}
2324    }
2325  return true;
2326}
2327
2328/* Return TRUE if LOOP is an imperfect nest that we can convert to a
2329   perfect one.  At the moment, we only handle imperfect nests of
2330   depth 2, where all of the statements occur after the inner loop.  */
2331
2332static bool
2333can_convert_to_perfect_nest (struct loop *loop)
2334{
2335  basic_block *bbs;
2336  tree exit_condition, phi;
2337  size_t i;
2338  block_stmt_iterator bsi;
2339  basic_block exitdest;
2340
2341  /* Can't handle triply nested+ loops yet.  */
2342  if (!loop->inner || loop->inner->inner)
2343    return false;
2344
2345  bbs = get_loop_body (loop);
2346  exit_condition = get_loop_exit_condition (loop);
2347  for (i = 0; i < loop->num_nodes; i++)
2348    {
2349      if (bbs[i]->loop_father == loop)
2350	{
2351	  for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
2352	    {
2353	      tree stmt = bsi_stmt (bsi);
2354
2355	      if (stmt == exit_condition
2356		  || not_interesting_stmt (stmt)
2357		  || stmt_is_bumper_for_loop (loop, stmt))
2358		continue;
2359
2360	      /* If this is a simple operation like a cast that is invariant
2361		 in the inner loop, only used there, and we can place it
2362		 there, then it's not going to hurt us.
2363		 This means that we will propagate casts and other cheap
2364		 invariant operations *back*
2365		 into the inner loop if we can interchange the loop, on the
2366		 theory that we are going to gain a lot more by interchanging
2367		 the loop than we are by leaving some invariant code there for
2368		 some other pass to clean up.  */
2369	      if (TREE_CODE (stmt) == MODIFY_EXPR)
2370		{
2371		  use_operand_p use_a, use_b;
2372		  imm_use_iterator imm_iter;
2373		  ssa_op_iter op_iter, op_iter1;
2374		  tree op0 = TREE_OPERAND (stmt, 0);
2375		  tree scev = instantiate_parameters
2376		    (loop, analyze_scalar_evolution (loop, op0));
2377
2378		  /* If the IV is simple, it can be duplicated.  */
2379		  if (!automatically_generated_chrec_p (scev))
2380		    {
2381		      tree step = evolution_part_in_loop_num (scev, loop->num);
2382		      if (step && step != chrec_dont_know
2383			  && TREE_CODE (step) == INTEGER_CST)
2384			continue;
2385		    }
2386
2387		  /* The statement should not define a variable used
2388		     in the inner loop.  */
2389		  if (TREE_CODE (op0) == SSA_NAME)
2390		    FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
2391		      if (bb_for_stmt (USE_STMT (use_a))->loop_father
2392			  == loop->inner)
2393			goto fail;
2394
2395		  FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
2396		    {
2397		      tree node, op = USE_FROM_PTR (use_a);
2398
2399		      /* The variables should not be used in both loops.  */
2400		      FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
2401		      if (bb_for_stmt (USE_STMT (use_b))->loop_father
2402			  == loop->inner)
2403			goto fail;
2404
2405		      /* The statement should not use the value of a
2406			 scalar that was modified in the loop.  */
2407		      node = SSA_NAME_DEF_STMT (op);
2408		      if (TREE_CODE (node) == PHI_NODE)
2409			FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
2410			  {
2411			    tree arg = USE_FROM_PTR (use_b);
2412
2413			    if (TREE_CODE (arg) == SSA_NAME)
2414			      {
2415				tree arg_stmt = SSA_NAME_DEF_STMT (arg);
2416
2417				if (bb_for_stmt (arg_stmt)->loop_father
2418				    == loop->inner)
2419				  goto fail;
2420			      }
2421			  }
2422		    }
2423
2424		  if (can_put_in_inner_loop (loop->inner, stmt)
2425		      || can_put_after_inner_loop (loop, stmt))
2426		    continue;
2427		}
2428
2429	      /* Otherwise, if the bb of a statement we care about isn't
2430		 dominated by the header of the inner loop, then we can't
2431		 handle this case right now.  This test ensures that the
2432		 statement comes completely *after* the inner loop.  */
2433	      if (!dominated_by_p (CDI_DOMINATORS,
2434				   bb_for_stmt (stmt),
2435				   loop->inner->header))
2436		goto fail;
2437	    }
2438	}
2439    }
2440
2441  /* We also need to make sure the loop exit only has simple copy phis in it,
2442     otherwise we don't know how to transform it into a perfect nest right
2443     now.  */
2444  exitdest = loop->single_exit->dest;
2445
2446  for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
2447    if (PHI_NUM_ARGS (phi) != 1)
2448      goto fail;
2449
2450  free (bbs);
2451  return true;
2452
2453 fail:
2454  free (bbs);
2455  return false;
2456}
2457
2458/* Transform the loop nest into a perfect nest, if possible.
2459   LOOPS is the current struct loops *
2460   LOOP is the loop nest to transform into a perfect nest
2461   LBOUNDS are the lower bounds for the loops to transform
2462   UBOUNDS are the upper bounds for the loops to transform
2463   STEPS is the STEPS for the loops to transform.
2464   LOOPIVS is the induction variables for the loops to transform.
2465
2466   Basically, for the case of
2467
2468   FOR (i = 0; i < 50; i++)
2469    {
2470     FOR (j =0; j < 50; j++)
2471     {
2472        <whatever>
2473     }
2474     <some code>
2475    }
2476
2477   This function will transform it into a perfect loop nest by splitting the
2478   outer loop into two loops, like so:
2479
2480   FOR (i = 0; i < 50; i++)
2481   {
2482     FOR (j = 0; j < 50; j++)
2483     {
2484         <whatever>
2485     }
2486   }
2487
2488   FOR (i = 0; i < 50; i ++)
2489   {
2490    <some code>
2491   }
2492
2493   Return FALSE if we can't make this loop into a perfect nest.  */
2494
2495static bool
2496perfect_nestify (struct loops *loops,
2497		 struct loop *loop,
2498		 VEC(tree,heap) *lbounds,
2499		 VEC(tree,heap) *ubounds,
2500		 VEC(int,heap) *steps,
2501		 VEC(tree,heap) *loopivs)
2502{
2503  basic_block *bbs;
2504  tree exit_condition;
2505  tree then_label, else_label, cond_stmt;
2506  basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
2507  int i;
2508  block_stmt_iterator bsi, firstbsi;
2509  bool insert_after;
2510  edge e;
2511  struct loop *newloop;
2512  tree phi;
2513  tree uboundvar;
2514  tree stmt;
2515  tree oldivvar, ivvar, ivvarinced;
2516  VEC(tree,heap) *phis = NULL;
2517  htab_t replacements = NULL;
2518
2519  /* Create the new loop.  */
2520  olddest = loop->single_exit->dest;
2521  preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
2522  headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2523
2524  /* Push the exit phi nodes that we are moving.  */
2525  for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
2526    {
2527      VEC_reserve (tree, heap, phis, 2);
2528      VEC_quick_push (tree, phis, PHI_RESULT (phi));
2529      VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
2530    }
2531  e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
2532
2533  /* Remove the exit phis from the old basic block.  Make sure to set
2534     PHI_RESULT to null so it doesn't get released.  */
2535  while (phi_nodes (olddest) != NULL)
2536    {
2537      SET_PHI_RESULT (phi_nodes (olddest), NULL);
2538      remove_phi_node (phi_nodes (olddest), NULL);
2539    }
2540
2541  /* and add them back to the new basic block.  */
2542  while (VEC_length (tree, phis) != 0)
2543    {
2544      tree def;
2545      tree phiname;
2546      def = VEC_pop (tree, phis);
2547      phiname = VEC_pop (tree, phis);
2548      phi = create_phi_node (phiname, preheaderbb);
2549      add_phi_arg (phi, def, single_pred_edge (preheaderbb));
2550    }
2551  flush_pending_stmts (e);
2552  VEC_free (tree, heap, phis);
2553
2554  bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2555  latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
2556  make_edge (headerbb, bodybb, EDGE_FALLTHRU);
2557  then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
2558  else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
2559  cond_stmt = build (COND_EXPR, void_type_node,
2560		     build (NE_EXPR, boolean_type_node,
2561			    integer_one_node,
2562			    integer_zero_node),
2563		     then_label, else_label);
2564  bsi = bsi_start (bodybb);
2565  bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
2566  e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
2567  make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
2568  make_edge (latchbb, headerbb, EDGE_FALLTHRU);
2569
2570  /* Update the loop structures.  */
2571  newloop = duplicate_loop (loops, loop, olddest->loop_father);
2572  newloop->header = headerbb;
2573  newloop->latch = latchbb;
2574  newloop->single_exit = e;
2575  add_bb_to_loop (latchbb, newloop);
2576  add_bb_to_loop (bodybb, newloop);
2577  add_bb_to_loop (headerbb, newloop);
2578  set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
2579  set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
2580  set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
2581			   loop->single_exit->src);
2582  set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
2583  set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
2584  /* Create the new iv.  */
2585  oldivvar = VEC_index (tree, loopivs, 0);
2586  ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
2587  add_referenced_tmp_var (ivvar);
2588  standard_iv_increment_position (newloop, &bsi, &insert_after);
2589  create_iv (VEC_index (tree, lbounds, 0),
2590	     build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
2591	     ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
2592
2593  /* Create the new upper bound.  This may be not just a variable, so we copy
2594     it to one just in case.  */
2595
2596  exit_condition = get_loop_exit_condition (newloop);
2597  uboundvar = create_tmp_var (integer_type_node, "uboundvar");
2598  add_referenced_tmp_var (uboundvar);
2599  stmt = build (MODIFY_EXPR, void_type_node, uboundvar,
2600		VEC_index (tree, ubounds, 0));
2601  uboundvar = make_ssa_name (uboundvar, stmt);
2602  TREE_OPERAND (stmt, 0) = uboundvar;
2603
2604  if (insert_after)
2605    bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
2606  else
2607    bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
2608  update_stmt (stmt);
2609  COND_EXPR_COND (exit_condition) = build (GE_EXPR,
2610					   boolean_type_node,
2611					   uboundvar,
2612					   ivvarinced);
2613  update_stmt (exit_condition);
2614  replacements = htab_create_ggc (20, tree_map_hash,
2615				  tree_map_eq, NULL);
2616  bbs = get_loop_body_in_dom_order (loop);
2617  /* Now move the statements, and replace the induction variable in the moved
2618     statements with the correct loop induction variable.  */
2619  oldivvar = VEC_index (tree, loopivs, 0);
2620  firstbsi = bsi_start (bodybb);
2621  for (i = loop->num_nodes - 1; i >= 0 ; i--)
2622    {
2623      block_stmt_iterator tobsi = bsi_last (bodybb);
2624      if (bbs[i]->loop_father == loop)
2625	{
2626	  /* If this is true, we are *before* the inner loop.
2627	     If this isn't true, we are *after* it.
2628
2629	     The only time can_convert_to_perfect_nest returns true when we
2630	     have statements before the inner loop is if they can be moved
2631	     into the inner loop.
2632
2633	     The only time can_convert_to_perfect_nest returns true when we
2634	     have statements after the inner loop is if they can be moved into
2635	     the new split loop.  */
2636
2637	  if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
2638	    {
2639	      block_stmt_iterator header_bsi
2640		= bsi_after_labels (loop->inner->header);
2641
2642	      for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2643		{
2644		  tree stmt = bsi_stmt (bsi);
2645
2646		  if (stmt == exit_condition
2647		      || not_interesting_stmt (stmt)
2648		      || stmt_is_bumper_for_loop (loop, stmt))
2649		    {
2650		      bsi_next (&bsi);
2651		      continue;
2652		    }
2653
2654		  bsi_move_before (&bsi, &header_bsi);
2655		}
2656	    }
2657	  else
2658	    {
2659	      /* Note that the bsi only needs to be explicitly incremented
2660		 when we don't move something, since it is automatically
2661		 incremented when we do.  */
2662	      for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
2663		{
2664		  ssa_op_iter i;
2665		  tree n, stmt = bsi_stmt (bsi);
2666
2667		  if (stmt == exit_condition
2668		      || not_interesting_stmt (stmt)
2669		      || stmt_is_bumper_for_loop (loop, stmt))
2670		    {
2671		      bsi_next (&bsi);
2672		      continue;
2673		    }
2674
2675		  replace_uses_equiv_to_x_with_y
2676		    (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
2677		     VEC_index (tree, lbounds, 0), replacements, &firstbsi);
2678
2679		  bsi_move_before (&bsi, &tobsi);
2680
2681		  /* If the statement has any virtual operands, they may
2682		     need to be rewired because the original loop may
2683		     still reference them.  */
2684		  FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
2685		    mark_sym_for_renaming (SSA_NAME_VAR (n));
2686		}
2687	    }
2688
2689	}
2690    }
2691
2692  free (bbs);
2693  htab_delete (replacements);
2694  return perfect_nest_p (loop);
2695}
2696
2697/* Return true if TRANS is a legal transformation matrix that respects
2698   the dependence vectors in DISTS and DIRS.  The conservative answer
2699   is false.
2700
2701   "Wolfe proves that a unimodular transformation represented by the
2702   matrix T is legal when applied to a loop nest with a set of
2703   lexicographically non-negative distance vectors RDG if and only if
2704   for each vector d in RDG, (T.d >= 0) is lexicographically positive.
2705   i.e.: if and only if it transforms the lexicographically positive
2706   distance vectors to lexicographically positive vectors.  Note that
2707   a unimodular matrix must transform the zero vector (and only it) to
2708   the zero vector." S.Muchnick.  */
2709
2710bool
2711lambda_transform_legal_p (lambda_trans_matrix trans,
2712			  int nb_loops,
2713			  varray_type dependence_relations)
2714{
2715  unsigned int i, j;
2716  lambda_vector distres;
2717  struct data_dependence_relation *ddr;
2718
2719  gcc_assert (LTM_COLSIZE (trans) == nb_loops
2720	      && LTM_ROWSIZE (trans) == nb_loops);
2721
2722  /* When there is an unknown relation in the dependence_relations, we
2723     know that it is no worth looking at this loop nest: give up.  */
2724  ddr = (struct data_dependence_relation *)
2725    VARRAY_GENERIC_PTR (dependence_relations, 0);
2726  if (ddr == NULL)
2727    return true;
2728  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2729    return false;
2730
2731  distres = lambda_vector_new (nb_loops);
2732
2733  /* For each distance vector in the dependence graph.  */
2734  for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
2735    {
2736      ddr = (struct data_dependence_relation *)
2737	VARRAY_GENERIC_PTR (dependence_relations, i);
2738
2739      /* Don't care about relations for which we know that there is no
2740	 dependence, nor about read-read (aka. output-dependences):
2741	 these data accesses can happen in any order.  */
2742      if (DDR_ARE_DEPENDENT (ddr) == chrec_known
2743	  || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
2744	continue;
2745
2746      /* Conservatively answer: "this transformation is not valid".  */
2747      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2748	return false;
2749
2750      /* If the dependence could not be captured by a distance vector,
2751	 conservatively answer that the transform is not valid.  */
2752      if (DDR_NUM_DIST_VECTS (ddr) == 0)
2753	return false;
2754
2755      /* Compute trans.dist_vect */
2756      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
2757	{
2758	  lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
2759				     DDR_DIST_VECT (ddr, j), distres);
2760
2761	  if (!lambda_vector_lexico_pos (distres, nb_loops))
2762	    return false;
2763	}
2764    }
2765  return true;
2766}
2767