contrib/gcc/lambda-code.c

169689Skan/*  Loop transformation code generation
169689Skan    Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
169689Skan    Contributed by Daniel Berlin <dberlin@dberlin.org>
169689Skan
169689Skan    This file is part of GCC.
169689Skan
169689Skan    GCC is free software; you can redistribute it and/or modify it under
169689Skan    the terms of the GNU General Public License as published by the Free
169689Skan    Software Foundation; either version 2, or (at your option) any later
169689Skan    version.
169689Skan
169689Skan    GCC is distributed in the hope that it will be useful, but WITHOUT ANY
169689Skan    WARRANTY; without even the implied warranty of MERCHANTABILITY or
169689Skan    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
169689Skan    for more details.
169689Skan
169689Skan    You should have received a copy of the GNU General Public License
169689Skan    along with GCC; see the file COPYING.  If not, write to the Free
169689Skan    Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
169689Skan    02110-1301, USA.  */
169689Skan
169689Skan#include "config.h"
169689Skan#include "system.h"
169689Skan#include "coretypes.h"
169689Skan#include "tm.h"
169689Skan#include "ggc.h"
169689Skan#include "tree.h"
169689Skan#include "target.h"
169689Skan#include "rtl.h"
169689Skan#include "basic-block.h"
169689Skan#include "diagnostic.h"
169689Skan#include "tree-flow.h"
169689Skan#include "tree-dump.h"
169689Skan#include "timevar.h"
169689Skan#include "cfgloop.h"
169689Skan#include "expr.h"
169689Skan#include "optabs.h"
169689Skan#include "tree-chrec.h"
169689Skan#include "tree-data-ref.h"
169689Skan#include "tree-pass.h"
169689Skan#include "tree-scalar-evolution.h"
169689Skan#include "vec.h"
169689Skan#include "lambda.h"
169689Skan#include "vecprim.h"
169689Skan
169689Skan/* This loop nest code generation is based on non-singular matrix
169689Skan   math.
169689Skan
169689Skan A little terminology and a general sketch of the algorithm.  See "A singular
169689Skan loop transformation framework based on non-singular matrices" by Wei Li and
169689Skan Keshav Pingali for formal proofs that the various statements below are
169689Skan correct.
169689Skan
169689Skan A loop iteration space represents the points traversed by the loop.  A point in the
169689Skan iteration space can be represented by a vector of size <loop depth>.  You can
169689Skan therefore represent the iteration space as an integral combinations of a set
169689Skan of basis vectors.
169689Skan
169689Skan A loop iteration space is dense if every integer point between the loop
169689Skan bounds is a point in the iteration space.  Every loop with a step of 1
169689Skan therefore has a dense iteration space.
169689Skan
169689Skan for i = 1 to 3, step 1 is a dense iteration space.
169689Skan
169689Skan A loop iteration space is sparse if it is not dense.  That is, the iteration
169689Skan space skips integer points that are within the loop bounds.
169689Skan
169689Skan for i = 1 to 3, step 2 is a sparse iteration space, because the integer point
169689Skan 2 is skipped.
169689Skan
169689Skan Dense source spaces are easy to transform, because they don't skip any
169689Skan points to begin with.  Thus we can compute the exact bounds of the target
169689Skan space using min/max and floor/ceil.
169689Skan
169689Skan For a dense source space, we take the transformation matrix, decompose it
169689Skan into a lower triangular part (H) and a unimodular part (U).
169689Skan We then compute the auxiliary space from the unimodular part (source loop
169689Skan nest . U = auxiliary space) , which has two important properties:
169689Skan  1. It traverses the iterations in the same lexicographic order as the source
169689Skan  space.
169689Skan  2. It is a dense space when the source is a dense space (even if the target
169689Skan  space is going to be sparse).
169689Skan
169689Skan Given the auxiliary space, we use the lower triangular part to compute the
169689Skan bounds in the target space by simple matrix multiplication.
169689Skan The gaps in the target space (IE the new loop step sizes) will be the
169689Skan diagonals of the H matrix.
169689Skan
169689Skan Sparse source spaces require another step, because you can't directly compute
169689Skan the exact bounds of the auxiliary and target space from the sparse space.
169689Skan Rather than try to come up with a separate algorithm to handle sparse source
169689Skan spaces directly, we just find a legal transformation matrix that gives you
169689Skan the sparse source space, from a dense space, and then transform the dense
169689Skan space.
169689Skan
169689Skan For a regular sparse space, you can represent the source space as an integer
169689Skan lattice, and the base space of that lattice will always be dense.  Thus, we
169689Skan effectively use the lattice to figure out the transformation from the lattice
169689Skan base space, to the sparse iteration space (IE what transform was applied to
169689Skan the dense space to make it sparse).  We then compose this transform with the
169689Skan transformation matrix specified by the user (since our matrix transformations
169689Skan are closed under composition, this is okay).  We can then use the base space
169689Skan (which is dense) plus the composed transformation matrix, to compute the rest
169689Skan of the transform using the dense space algorithm above.
169689Skan
169689Skan In other words, our sparse source space (B) is decomposed into a dense base
169689Skan space (A), and a matrix (L) that transforms A into B, such that A.L = B.
169689Skan We then compute the composition of L and the user transformation matrix (T),
169689Skan so that T is now a transform from A to the result, instead of from B to the
169689Skan result.
169689Skan IE A.(LT) = result instead of B.T = result
169689Skan Since A is now a dense source space, we can use the dense source space
169689Skan algorithm above to compute the result of applying transform (LT) to A.
169689Skan
169689Skan Fourier-Motzkin elimination is used to compute the bounds of the base space
169689Skan of the lattice.  */
169689Skan
169689Skanstatic bool perfect_nestify (struct loops *,
169689Skan			     struct loop *, VEC(tree,heap) *,
169689Skan			     VEC(tree,heap) *, VEC(int,heap) *,
169689Skan			     VEC(tree,heap) *);
169689Skan/* Lattice stuff that is internal to the code generation algorithm.  */
169689Skan
169689Skantypedef struct
169689Skan{
169689Skan  /* Lattice base matrix.  */
169689Skan  lambda_matrix base;
169689Skan  /* Lattice dimension.  */
169689Skan  int dimension;
169689Skan  /* Origin vector for the coefficients.  */
169689Skan  lambda_vector origin;
169689Skan  /* Origin matrix for the invariants.  */
169689Skan  lambda_matrix origin_invariants;
169689Skan  /* Number of invariants.  */
169689Skan  int invariants;
169689Skan} *lambda_lattice;
169689Skan
169689Skan#define LATTICE_BASE(T) ((T)->base)
169689Skan#define LATTICE_DIMENSION(T) ((T)->dimension)
169689Skan#define LATTICE_ORIGIN(T) ((T)->origin)
169689Skan#define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants)
169689Skan#define LATTICE_INVARIANTS(T) ((T)->invariants)
169689Skan
169689Skanstatic bool lle_equal (lambda_linear_expression, lambda_linear_expression,
169689Skan		       int, int);
169689Skanstatic lambda_lattice lambda_lattice_new (int, int);
169689Skanstatic lambda_lattice lambda_lattice_compute_base (lambda_loopnest);
169689Skan
169689Skanstatic tree find_induction_var_from_exit_cond (struct loop *);
169689Skanstatic bool can_convert_to_perfect_nest (struct loop *);
169689Skan
169689Skan/* Create a new lambda body vector.  */
169689Skan
169689Skanlambda_body_vector
169689Skanlambda_body_vector_new (int size)
169689Skan{
169689Skan  lambda_body_vector ret;
169689Skan
169689Skan  ret = ggc_alloc (sizeof (*ret));
169689Skan  LBV_COEFFICIENTS (ret) = lambda_vector_new (size);
169689Skan  LBV_SIZE (ret) = size;
169689Skan  LBV_DENOMINATOR (ret) = 1;
169689Skan  return ret;
169689Skan}
169689Skan
169689Skan/* Compute the new coefficients for the vector based on the
169689Skan  *inverse* of the transformation matrix.  */
169689Skan
169689Skanlambda_body_vector
169689Skanlambda_body_vector_compute_new (lambda_trans_matrix transform,
169689Skan				lambda_body_vector vect)
169689Skan{
169689Skan  lambda_body_vector temp;
169689Skan  int depth;
169689Skan
169689Skan  /* Make sure the matrix is square.  */
169689Skan  gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
169689Skan
169689Skan  depth = LTM_ROWSIZE (transform);
169689Skan
169689Skan  temp = lambda_body_vector_new (depth);
169689Skan  LBV_DENOMINATOR (temp) =
169689Skan    LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform);
169689Skan  lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth,
169689Skan			     LTM_MATRIX (transform), depth,
169689Skan			     LBV_COEFFICIENTS (temp));
169689Skan  LBV_SIZE (temp) = LBV_SIZE (vect);
169689Skan  return temp;
169689Skan}
169689Skan
169689Skan/* Print out a lambda body vector.  */
169689Skan
169689Skanvoid
169689Skanprint_lambda_body_vector (FILE * outfile, lambda_body_vector body)
169689Skan{
169689Skan  print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body));
169689Skan}
169689Skan
169689Skan/* Return TRUE if two linear expressions are equal.  */
169689Skan
169689Skanstatic bool
169689Skanlle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2,
169689Skan	   int depth, int invariants)
169689Skan{
169689Skan  int i;
169689Skan
169689Skan  if (lle1 == NULL || lle2 == NULL)
169689Skan    return false;
169689Skan  if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2))
169689Skan    return false;
169689Skan  if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2))
169689Skan    return false;
169689Skan  for (i = 0; i < depth; i++)
169689Skan    if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i])
169689Skan      return false;
169689Skan  for (i = 0; i < invariants; i++)
169689Skan    if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] !=
169689Skan	LLE_INVARIANT_COEFFICIENTS (lle2)[i])
169689Skan      return false;
169689Skan  return true;
169689Skan}
169689Skan
169689Skan/* Create a new linear expression with dimension DIM, and total number
169689Skan   of invariants INVARIANTS.  */
169689Skan
169689Skanlambda_linear_expression
169689Skanlambda_linear_expression_new (int dim, int invariants)
169689Skan{
169689Skan  lambda_linear_expression ret;
169689Skan
169689Skan  ret = ggc_alloc_cleared (sizeof (*ret));
169689Skan
169689Skan  LLE_COEFFICIENTS (ret) = lambda_vector_new (dim);
169689Skan  LLE_CONSTANT (ret) = 0;
169689Skan  LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants);
169689Skan  LLE_DENOMINATOR (ret) = 1;
169689Skan  LLE_NEXT (ret) = NULL;
169689Skan
169689Skan  return ret;
169689Skan}
169689Skan
169689Skan/* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE.
169689Skan   The starting letter used for variable names is START.  */
169689Skan
169689Skanstatic void
169689Skanprint_linear_expression (FILE * outfile, lambda_vector expr, int size,
169689Skan			 char start)
169689Skan{
169689Skan  int i;
169689Skan  bool first = true;
169689Skan  for (i = 0; i < size; i++)
169689Skan    {
169689Skan      if (expr[i] != 0)
169689Skan	{
169689Skan	  if (first)
169689Skan	    {
169689Skan	      if (expr[i] < 0)
169689Skan		fprintf (outfile, "-");
169689Skan	      first = false;
169689Skan	    }
169689Skan	  else if (expr[i] > 0)
169689Skan	    fprintf (outfile, " + ");
169689Skan	  else
169689Skan	    fprintf (outfile, " - ");
169689Skan	  if (abs (expr[i]) == 1)
169689Skan	    fprintf (outfile, "%c", start + i);
169689Skan	  else
169689Skan	    fprintf (outfile, "%d%c", abs (expr[i]), start + i);
169689Skan	}
169689Skan    }
169689Skan}
169689Skan
169689Skan/* Print out a lambda linear expression structure, EXPR, to OUTFILE. The
169689Skan   depth/number of coefficients is given by DEPTH, the number of invariants is
169689Skan   given by INVARIANTS, and the character to start variable names with is given
169689Skan   by START.  */
169689Skan
169689Skanvoid
169689Skanprint_lambda_linear_expression (FILE * outfile,
169689Skan				lambda_linear_expression expr,
169689Skan				int depth, int invariants, char start)
169689Skan{
169689Skan  fprintf (outfile, "\tLinear expression: ");
169689Skan  print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start);
169689Skan  fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr));
169689Skan  fprintf (outfile, "  invariants: ");
169689Skan  print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr),
169689Skan			   invariants, 'A');
169689Skan  fprintf (outfile, "  denominator: %d\n", LLE_DENOMINATOR (expr));
169689Skan}
169689Skan
169689Skan/* Print a lambda loop structure LOOP to OUTFILE.  The depth/number of
169689Skan   coefficients is given by DEPTH, the number of invariants is
169689Skan   given by INVARIANTS, and the character to start variable names with is given
169689Skan   by START.  */
169689Skan
169689Skanvoid
169689Skanprint_lambda_loop (FILE * outfile, lambda_loop loop, int depth,
169689Skan		   int invariants, char start)
169689Skan{
169689Skan  int step;
169689Skan  lambda_linear_expression expr;
169689Skan
169689Skan  gcc_assert (loop);
169689Skan
169689Skan  expr = LL_LINEAR_OFFSET (loop);
169689Skan  step = LL_STEP (loop);
169689Skan  fprintf (outfile, "  step size = %d \n", step);
169689Skan
169689Skan  if (expr)
169689Skan    {
169689Skan      fprintf (outfile, "  linear offset: \n");
169689Skan      print_lambda_linear_expression (outfile, expr, depth, invariants,
169689Skan				      start);
169689Skan    }
169689Skan
169689Skan  fprintf (outfile, "  lower bound: \n");
169689Skan  for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
169689Skan    print_lambda_linear_expression (outfile, expr, depth, invariants, start);
169689Skan  fprintf (outfile, "  upper bound: \n");
169689Skan  for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr))
169689Skan    print_lambda_linear_expression (outfile, expr, depth, invariants, start);
169689Skan}
169689Skan
169689Skan/* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the
169689Skan   number of invariants.  */
169689Skan
169689Skanlambda_loopnest
169689Skanlambda_loopnest_new (int depth, int invariants)
169689Skan{
169689Skan  lambda_loopnest ret;
169689Skan  ret = ggc_alloc (sizeof (*ret));
169689Skan
169689Skan  LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop));
169689Skan  LN_DEPTH (ret) = depth;
169689Skan  LN_INVARIANTS (ret) = invariants;
169689Skan
169689Skan  return ret;
169689Skan}
169689Skan
169689Skan/* Print a lambda loopnest structure, NEST, to OUTFILE.  The starting
169689Skan   character to use for loop names is given by START.  */
169689Skan
169689Skanvoid
169689Skanprint_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start)
169689Skan{
169689Skan  int i;
169689Skan  for (i = 0; i < LN_DEPTH (nest); i++)
169689Skan    {
169689Skan      fprintf (outfile, "Loop %c\n", start + i);
169689Skan      print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest),
169689Skan			 LN_INVARIANTS (nest), 'i');
169689Skan      fprintf (outfile, "\n");
169689Skan    }
169689Skan}
169689Skan
169689Skan/* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number
169689Skan   of invariants.  */
169689Skan
169689Skanstatic lambda_lattice
169689Skanlambda_lattice_new (int depth, int invariants)
169689Skan{
169689Skan  lambda_lattice ret;
169689Skan  ret = ggc_alloc (sizeof (*ret));
169689Skan  LATTICE_BASE (ret) = lambda_matrix_new (depth, depth);
169689Skan  LATTICE_ORIGIN (ret) = lambda_vector_new (depth);
169689Skan  LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants);
169689Skan  LATTICE_DIMENSION (ret) = depth;
169689Skan  LATTICE_INVARIANTS (ret) = invariants;
169689Skan  return ret;
169689Skan}
169689Skan
169689Skan/* Compute the lattice base for NEST.  The lattice base is essentially a
169689Skan   non-singular transform from a dense base space to a sparse iteration space.
169689Skan   We use it so that we don't have to specially handle the case of a sparse
169689Skan   iteration space in other parts of the algorithm.  As a result, this routine
169689Skan   only does something interesting (IE produce a matrix that isn't the
169689Skan   identity matrix) if NEST is a sparse space.  */
169689Skan
169689Skanstatic lambda_lattice
169689Skanlambda_lattice_compute_base (lambda_loopnest nest)
169689Skan{
169689Skan  lambda_lattice ret;
169689Skan  int depth, invariants;
169689Skan  lambda_matrix base;
169689Skan
169689Skan  int i, j, step;
169689Skan  lambda_loop loop;
169689Skan  lambda_linear_expression expression;
169689Skan
169689Skan  depth = LN_DEPTH (nest);
169689Skan  invariants = LN_INVARIANTS (nest);
169689Skan
169689Skan  ret = lambda_lattice_new (depth, invariants);
169689Skan  base = LATTICE_BASE (ret);
169689Skan  for (i = 0; i < depth; i++)
169689Skan    {
169689Skan      loop = LN_LOOPS (nest)[i];
169689Skan      gcc_assert (loop);
169689Skan      step = LL_STEP (loop);
169689Skan      /* If we have a step of 1, then the base is one, and the
169689Skan         origin and invariant coefficients are 0.  */
169689Skan      if (step == 1)
169689Skan	{
169689Skan	  for (j = 0; j < depth; j++)
169689Skan	    base[i][j] = 0;
169689Skan	  base[i][i] = 1;
169689Skan	  LATTICE_ORIGIN (ret)[i] = 0;
169689Skan	  for (j = 0; j < invariants; j++)
169689Skan	    LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0;
169689Skan	}
169689Skan      else
169689Skan	{
169689Skan	  /* Otherwise, we need the lower bound expression (which must
169689Skan	     be an affine function)  to determine the base.  */
169689Skan	  expression = LL_LOWER_BOUND (loop);
169689Skan	  gcc_assert (expression && !LLE_NEXT (expression)
169689Skan		      && LLE_DENOMINATOR (expression) == 1);
169689Skan
169689Skan	  /* The lower triangular portion of the base is going to be the
169689Skan	     coefficient times the step */
169689Skan	  for (j = 0; j < i; j++)
169689Skan	    base[i][j] = LLE_COEFFICIENTS (expression)[j]
169689Skan	      * LL_STEP (LN_LOOPS (nest)[j]);
169689Skan	  base[i][i] = step;
169689Skan	  for (j = i + 1; j < depth; j++)
169689Skan	    base[i][j] = 0;
169689Skan
169689Skan	  /* Origin for this loop is the constant of the lower bound
169689Skan	     expression.  */
169689Skan	  LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression);
169689Skan
169689Skan	  /* Coefficient for the invariants are equal to the invariant
169689Skan	     coefficients in the expression.  */
169689Skan	  for (j = 0; j < invariants; j++)
169689Skan	    LATTICE_ORIGIN_INVARIANTS (ret)[i][j] =
169689Skan	      LLE_INVARIANT_COEFFICIENTS (expression)[j];
169689Skan	}
169689Skan    }
169689Skan  return ret;
169689Skan}
169689Skan
169689Skan/* Compute the least common multiple of two numbers A and B .  */
169689Skan
169689Skanstatic int
169689Skanlcm (int a, int b)
169689Skan{
169689Skan  return (abs (a) * abs (b) / gcd (a, b));
169689Skan}
169689Skan
169689Skan/* Perform Fourier-Motzkin elimination to calculate the bounds of the
169689Skan   auxiliary nest.
169689Skan   Fourier-Motzkin is a way of reducing systems of linear inequalities so that
169689Skan   it is easy to calculate the answer and bounds.
169689Skan   A sketch of how it works:
169689Skan   Given a system of linear inequalities, ai * xj >= bk, you can always
169689Skan   rewrite the constraints so they are all of the form
169689Skan   a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b
169689Skan   in b1 ... bk, and some a in a1...ai)
169689Skan   You can then eliminate this x from the non-constant inequalities by
169689Skan   rewriting these as a <= b, x >= constant, and delete the x variable.
169689Skan   You can then repeat this for any remaining x variables, and then we have
169689Skan   an easy to use variable <= constant (or no variables at all) form that we
169689Skan   can construct our bounds from.
169689Skan
169689Skan   In our case, each time we eliminate, we construct part of the bound from
169689Skan   the ith variable, then delete the ith variable.
169689Skan
169689Skan   Remember the constant are in our vector a, our coefficient matrix is A,
169689Skan   and our invariant coefficient matrix is B.
169689Skan
169689Skan   SIZE is the size of the matrices being passed.
169689Skan   DEPTH is the loop nest depth.
169689Skan   INVARIANTS is the number of loop invariants.
169689Skan   A, B, and a are the coefficient matrix, invariant coefficient, and a
169689Skan   vector of constants, respectively.  */
169689Skan
169689Skanstatic lambda_loopnest
169689Skancompute_nest_using_fourier_motzkin (int size,
169689Skan				    int depth,
169689Skan				    int invariants,
169689Skan				    lambda_matrix A,
169689Skan				    lambda_matrix B,
169689Skan				    lambda_vector a)
169689Skan{
169689Skan
169689Skan  int multiple, f1, f2;
169689Skan  int i, j, k;
169689Skan  lambda_linear_expression expression;
169689Skan  lambda_loop loop;
169689Skan  lambda_loopnest auxillary_nest;
169689Skan  lambda_matrix swapmatrix, A1, B1;
169689Skan  lambda_vector swapvector, a1;
169689Skan  int newsize;
169689Skan
169689Skan  A1 = lambda_matrix_new (128, depth);
169689Skan  B1 = lambda_matrix_new (128, invariants);
169689Skan  a1 = lambda_vector_new (128);
169689Skan
169689Skan  auxillary_nest = lambda_loopnest_new (depth, invariants);
169689Skan
169689Skan  for (i = depth - 1; i >= 0; i--)
169689Skan    {
169689Skan      loop = lambda_loop_new ();
169689Skan      LN_LOOPS (auxillary_nest)[i] = loop;
169689Skan      LL_STEP (loop) = 1;
169689Skan
169689Skan      for (j = 0; j < size; j++)
169689Skan	{
169689Skan	  if (A[j][i] < 0)
169689Skan	    {
169689Skan	      /* Any linear expression in the matrix with a coefficient less
169689Skan		 than 0 becomes part of the new lower bound.  */
169689Skan	      expression = lambda_linear_expression_new (depth, invariants);
169689Skan
169689Skan	      for (k = 0; k < i; k++)
169689Skan		LLE_COEFFICIENTS (expression)[k] = A[j][k];
169689Skan
169689Skan	      for (k = 0; k < invariants; k++)
169689Skan		LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k];
169689Skan
169689Skan	      LLE_DENOMINATOR (expression) = -1 * A[j][i];
169689Skan	      LLE_CONSTANT (expression) = -1 * a[j];
169689Skan
169689Skan	      /* Ignore if identical to the existing lower bound.  */
169689Skan	      if (!lle_equal (LL_LOWER_BOUND (loop),
169689Skan			      expression, depth, invariants))
169689Skan		{
169689Skan		  LLE_NEXT (expression) = LL_LOWER_BOUND (loop);
169689Skan		  LL_LOWER_BOUND (loop) = expression;
169689Skan		}
169689Skan
169689Skan	    }
169689Skan	  else if (A[j][i] > 0)
169689Skan	    {
169689Skan	      /* Any linear expression with a coefficient greater than 0
169689Skan		 becomes part of the new upper bound.  */
169689Skan	      expression = lambda_linear_expression_new (depth, invariants);
169689Skan	      for (k = 0; k < i; k++)
169689Skan		LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k];
169689Skan
169689Skan	      for (k = 0; k < invariants; k++)
169689Skan		LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k];
169689Skan
169689Skan	      LLE_DENOMINATOR (expression) = A[j][i];
169689Skan	      LLE_CONSTANT (expression) = a[j];
169689Skan
169689Skan	      /* Ignore if identical to the existing upper bound.  */
169689Skan	      if (!lle_equal (LL_UPPER_BOUND (loop),
169689Skan			      expression, depth, invariants))
169689Skan		{
169689Skan		  LLE_NEXT (expression) = LL_UPPER_BOUND (loop);
169689Skan		  LL_UPPER_BOUND (loop) = expression;
169689Skan		}
169689Skan
169689Skan	    }
169689Skan	}
169689Skan
169689Skan      /* This portion creates a new system of linear inequalities by deleting
169689Skan	 the i'th variable, reducing the system by one variable.  */
169689Skan      newsize = 0;
169689Skan      for (j = 0; j < size; j++)
169689Skan	{
169689Skan	  /* If the coefficient for the i'th variable is 0, then we can just
169689Skan	     eliminate the variable straightaway.  Otherwise, we have to
169689Skan	     multiply through by the coefficients we are eliminating.  */
169689Skan	  if (A[j][i] == 0)
169689Skan	    {
169689Skan	      lambda_vector_copy (A[j], A1[newsize], depth);
169689Skan	      lambda_vector_copy (B[j], B1[newsize], invariants);
169689Skan	      a1[newsize] = a[j];
169689Skan	      newsize++;
169689Skan	    }
169689Skan	  else if (A[j][i] > 0)
169689Skan	    {
169689Skan	      for (k = 0; k < size; k++)
169689Skan		{
169689Skan		  if (A[k][i] < 0)
169689Skan		    {
169689Skan		      multiple = lcm (A[j][i], A[k][i]);
169689Skan		      f1 = multiple / A[j][i];
169689Skan		      f2 = -1 * multiple / A[k][i];
169689Skan
169689Skan		      lambda_vector_add_mc (A[j], f1, A[k], f2,
169689Skan					    A1[newsize], depth);
169689Skan		      lambda_vector_add_mc (B[j], f1, B[k], f2,
169689Skan					    B1[newsize], invariants);
169689Skan		      a1[newsize] = f1 * a[j] + f2 * a[k];
169689Skan		      newsize++;
169689Skan		    }
169689Skan		}
169689Skan	    }
169689Skan	}
169689Skan
169689Skan      swapmatrix = A;
169689Skan      A = A1;
169689Skan      A1 = swapmatrix;
169689Skan
169689Skan      swapmatrix = B;
169689Skan      B = B1;
169689Skan      B1 = swapmatrix;
169689Skan
169689Skan      swapvector = a;
169689Skan      a = a1;
169689Skan      a1 = swapvector;
169689Skan
169689Skan      size = newsize;
169689Skan    }
169689Skan
169689Skan  return auxillary_nest;
169689Skan}
169689Skan
169689Skan/* Compute the loop bounds for the auxiliary space NEST.
169689Skan   Input system used is Ax <= b.  TRANS is the unimodular transformation.
169689Skan   Given the original nest, this function will
169689Skan   1. Convert the nest into matrix form, which consists of a matrix for the
169689Skan   coefficients, a matrix for the
169689Skan   invariant coefficients, and a vector for the constants.
169689Skan   2. Use the matrix form to calculate the lattice base for the nest (which is
169689Skan   a dense space)
169689Skan   3. Compose the dense space transform with the user specified transform, to
169689Skan   get a transform we can easily calculate transformed bounds for.
169689Skan   4. Multiply the composed transformation matrix times the matrix form of the
169689Skan   loop.
169689Skan   5. Transform the newly created matrix (from step 4) back into a loop nest
169689Skan   using Fourier-Motzkin elimination to figure out the bounds.  */
169689Skan
169689Skanstatic lambda_loopnest
169689Skanlambda_compute_auxillary_space (lambda_loopnest nest,
169689Skan				lambda_trans_matrix trans)
169689Skan{
169689Skan  lambda_matrix A, B, A1, B1;
169689Skan  lambda_vector a, a1;
169689Skan  lambda_matrix invertedtrans;
169689Skan  int depth, invariants, size;
169689Skan  int i, j;
169689Skan  lambda_loop loop;
169689Skan  lambda_linear_expression expression;
169689Skan  lambda_lattice lattice;
169689Skan
169689Skan  depth = LN_DEPTH (nest);
169689Skan  invariants = LN_INVARIANTS (nest);
169689Skan
169689Skan  /* Unfortunately, we can't know the number of constraints we'll have
169689Skan     ahead of time, but this should be enough even in ridiculous loop nest
169689Skan     cases. We must not go over this limit.  */
169689Skan  A = lambda_matrix_new (128, depth);
169689Skan  B = lambda_matrix_new (128, invariants);
169689Skan  a = lambda_vector_new (128);
169689Skan
169689Skan  A1 = lambda_matrix_new (128, depth);
169689Skan  B1 = lambda_matrix_new (128, invariants);
169689Skan  a1 = lambda_vector_new (128);
169689Skan
169689Skan  /* Store the bounds in the equation matrix A, constant vector a, and
169689Skan     invariant matrix B, so that we have Ax <= a + B.
169689Skan     This requires a little equation rearranging so that everything is on the
169689Skan     correct side of the inequality.  */
169689Skan  size = 0;
169689Skan  for (i = 0; i < depth; i++)
169689Skan    {
169689Skan      loop = LN_LOOPS (nest)[i];
169689Skan
169689Skan      /* First we do the lower bound.  */
169689Skan      if (LL_STEP (loop) > 0)
169689Skan	expression = LL_LOWER_BOUND (loop);
169689Skan      else
169689Skan	expression = LL_UPPER_BOUND (loop);
169689Skan
169689Skan      for (; expression != NULL; expression = LLE_NEXT (expression))
169689Skan	{
169689Skan	  /* Fill in the coefficient.  */
169689Skan	  for (j = 0; j < i; j++)
169689Skan	    A[size][j] = LLE_COEFFICIENTS (expression)[j];
169689Skan
169689Skan	  /* And the invariant coefficient.  */
169689Skan	  for (j = 0; j < invariants; j++)
169689Skan	    B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
169689Skan
169689Skan	  /* And the constant.  */
169689Skan	  a[size] = LLE_CONSTANT (expression);
169689Skan
169689Skan	  /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b.  IE put all
169689Skan	     constants and single variables on   */
169689Skan	  A[size][i] = -1 * LLE_DENOMINATOR (expression);
169689Skan	  a[size] *= -1;
169689Skan	  for (j = 0; j < invariants; j++)
169689Skan	    B[size][j] *= -1;
169689Skan
169689Skan	  size++;
169689Skan	  /* Need to increase matrix sizes above.  */
169689Skan	  gcc_assert (size <= 127);
169689Skan
169689Skan	}
169689Skan
169689Skan      /* Then do the exact same thing for the upper bounds.  */
169689Skan      if (LL_STEP (loop) > 0)
169689Skan	expression = LL_UPPER_BOUND (loop);
169689Skan      else
169689Skan	expression = LL_LOWER_BOUND (loop);
169689Skan
169689Skan      for (; expression != NULL; expression = LLE_NEXT (expression))
169689Skan	{
169689Skan	  /* Fill in the coefficient.  */
169689Skan	  for (j = 0; j < i; j++)
169689Skan	    A[size][j] = LLE_COEFFICIENTS (expression)[j];
169689Skan
169689Skan	  /* And the invariant coefficient.  */
169689Skan	  for (j = 0; j < invariants; j++)
169689Skan	    B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j];
169689Skan
169689Skan	  /* And the constant.  */
169689Skan	  a[size] = LLE_CONSTANT (expression);
169689Skan
169689Skan	  /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b.  */
169689Skan	  for (j = 0; j < i; j++)
169689Skan	    A[size][j] *= -1;
169689Skan	  A[size][i] = LLE_DENOMINATOR (expression);
169689Skan	  size++;
169689Skan	  /* Need to increase matrix sizes above.  */
169689Skan	  gcc_assert (size <= 127);
169689Skan
169689Skan	}
169689Skan    }
169689Skan
169689Skan  /* Compute the lattice base x = base * y + origin, where y is the
169689Skan     base space.  */
169689Skan  lattice = lambda_lattice_compute_base (nest);
169689Skan
169689Skan  /* Ax <= a + B then becomes ALy <= a+B - A*origin.  L is the lattice base  */
169689Skan
169689Skan  /* A1 = A * L */
169689Skan  lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth);
169689Skan
169689Skan  /* a1 = a - A * origin constant.  */
169689Skan  lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1);
169689Skan  lambda_vector_add_mc (a, 1, a1, -1, a1, size);
169689Skan
169689Skan  /* B1 = B - A * origin invariant.  */
169689Skan  lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth,
169689Skan		      invariants);
169689Skan  lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants);
169689Skan
169689Skan  /* Now compute the auxiliary space bounds by first inverting U, multiplying
169689Skan     it by A1, then performing Fourier-Motzkin.  */
169689Skan
169689Skan  invertedtrans = lambda_matrix_new (depth, depth);
169689Skan
169689Skan  /* Compute the inverse of U.  */
169689Skan  lambda_matrix_inverse (LTM_MATRIX (trans),
169689Skan			 invertedtrans, depth);
169689Skan
169689Skan  /* A = A1 inv(U).  */
169689Skan  lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth);
169689Skan
169689Skan  return compute_nest_using_fourier_motzkin (size, depth, invariants,
169689Skan					     A, B1, a1);
169689Skan}
169689Skan
169689Skan/* Compute the loop bounds for the target space, using the bounds of
169689Skan   the auxiliary nest AUXILLARY_NEST, and the triangular matrix H.
169689Skan   The target space loop bounds are computed by multiplying the triangular
169689Skan   matrix H by the auxiliary nest, to get the new loop bounds.  The sign of
169689Skan   the loop steps (positive or negative) is then used to swap the bounds if
169689Skan   the loop counts downwards.
169689Skan   Return the target loopnest.  */
169689Skan
169689Skanstatic lambda_loopnest
169689Skanlambda_compute_target_space (lambda_loopnest auxillary_nest,
169689Skan			     lambda_trans_matrix H, lambda_vector stepsigns)
169689Skan{
169689Skan  lambda_matrix inverse, H1;
169689Skan  int determinant, i, j;
169689Skan  int gcd1, gcd2;
169689Skan  int factor;
169689Skan
169689Skan  lambda_loopnest target_nest;
169689Skan  int depth, invariants;
169689Skan  lambda_matrix target;
169689Skan
169689Skan  lambda_loop auxillary_loop, target_loop;
169689Skan  lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr;
169689Skan
169689Skan  depth = LN_DEPTH (auxillary_nest);
169689Skan  invariants = LN_INVARIANTS (auxillary_nest);
169689Skan
169689Skan  inverse = lambda_matrix_new (depth, depth);
169689Skan  determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth);
169689Skan
169689Skan  /* H1 is H excluding its diagonal.  */
169689Skan  H1 = lambda_matrix_new (depth, depth);
169689Skan  lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth);
169689Skan
169689Skan  for (i = 0; i < depth; i++)
169689Skan    H1[i][i] = 0;
169689Skan
169689Skan  /* Computes the linear offsets of the loop bounds.  */
169689Skan  target = lambda_matrix_new (depth, depth);
169689Skan  lambda_matrix_mult (H1, inverse, target, depth, depth, depth);
169689Skan
169689Skan  target_nest = lambda_loopnest_new (depth, invariants);
169689Skan
169689Skan  for (i = 0; i < depth; i++)
169689Skan    {
169689Skan
169689Skan      /* Get a new loop structure.  */
169689Skan      target_loop = lambda_loop_new ();
169689Skan      LN_LOOPS (target_nest)[i] = target_loop;
169689Skan
169689Skan      /* Computes the gcd of the coefficients of the linear part.  */
169689Skan      gcd1 = lambda_vector_gcd (target[i], i);
169689Skan
169689Skan      /* Include the denominator in the GCD.  */
169689Skan      gcd1 = gcd (gcd1, determinant);
169689Skan
169689Skan      /* Now divide through by the gcd.  */
169689Skan      for (j = 0; j < i; j++)
169689Skan	target[i][j] = target[i][j] / gcd1;
169689Skan
169689Skan      expression = lambda_linear_expression_new (depth, invariants);
169689Skan      lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth);
169689Skan      LLE_DENOMINATOR (expression) = determinant / gcd1;
169689Skan      LLE_CONSTANT (expression) = 0;
169689Skan      lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression),
169689Skan			   invariants);
169689Skan      LL_LINEAR_OFFSET (target_loop) = expression;
169689Skan    }
169689Skan
169689Skan  /* For each loop, compute the new bounds from H.  */
169689Skan  for (i = 0; i < depth; i++)
169689Skan    {
169689Skan      auxillary_loop = LN_LOOPS (auxillary_nest)[i];
169689Skan      target_loop = LN_LOOPS (target_nest)[i];
169689Skan      LL_STEP (target_loop) = LTM_MATRIX (H)[i][i];
169689Skan      factor = LTM_MATRIX (H)[i][i];
169689Skan
169689Skan      /* First we do the lower bound.  */
169689Skan      auxillary_expr = LL_LOWER_BOUND (auxillary_loop);
169689Skan
169689Skan      for (; auxillary_expr != NULL;
169689Skan	   auxillary_expr = LLE_NEXT (auxillary_expr))
169689Skan	{
169689Skan	  target_expr = lambda_linear_expression_new (depth, invariants);
169689Skan	  lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
169689Skan				     depth, inverse, depth,
169689Skan				     LLE_COEFFICIENTS (target_expr));
169689Skan	  lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
169689Skan				    LLE_COEFFICIENTS (target_expr), depth,
169689Skan				    factor);
169689Skan
169689Skan	  LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
169689Skan	  lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
169689Skan			      LLE_INVARIANT_COEFFICIENTS (target_expr),
169689Skan			      invariants);
169689Skan	  lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
169689Skan				    LLE_INVARIANT_COEFFICIENTS (target_expr),
169689Skan				    invariants, factor);
169689Skan	  LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
169689Skan
169689Skan	  if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
169689Skan	    {
169689Skan	      LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
169689Skan		* determinant;
169689Skan	      lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
169689Skan					(target_expr),
169689Skan					LLE_INVARIANT_COEFFICIENTS
169689Skan					(target_expr), invariants,
169689Skan					determinant);
169689Skan	      LLE_DENOMINATOR (target_expr) =
169689Skan		LLE_DENOMINATOR (target_expr) * determinant;
169689Skan	    }
169689Skan	  /* Find the gcd and divide by it here, rather than doing it
169689Skan	     at the tree level.  */
169689Skan	  gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
169689Skan	  gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
169689Skan				    invariants);
169689Skan	  gcd1 = gcd (gcd1, gcd2);
169689Skan	  gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
169689Skan	  gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
169689Skan	  for (j = 0; j < depth; j++)
169689Skan	    LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
169689Skan	  for (j = 0; j < invariants; j++)
169689Skan	    LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
169689Skan	  LLE_CONSTANT (target_expr) /= gcd1;
169689Skan	  LLE_DENOMINATOR (target_expr) /= gcd1;
169689Skan	  /* Ignore if identical to existing bound.  */
169689Skan	  if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth,
169689Skan			  invariants))
169689Skan	    {
169689Skan	      LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop);
169689Skan	      LL_LOWER_BOUND (target_loop) = target_expr;
169689Skan	    }
169689Skan	}
169689Skan      /* Now do the upper bound.  */
169689Skan      auxillary_expr = LL_UPPER_BOUND (auxillary_loop);
169689Skan
169689Skan      for (; auxillary_expr != NULL;
169689Skan	   auxillary_expr = LLE_NEXT (auxillary_expr))
169689Skan	{
169689Skan	  target_expr = lambda_linear_expression_new (depth, invariants);
169689Skan	  lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr),
169689Skan				     depth, inverse, depth,
169689Skan				     LLE_COEFFICIENTS (target_expr));
169689Skan	  lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr),
169689Skan				    LLE_COEFFICIENTS (target_expr), depth,
169689Skan				    factor);
169689Skan	  LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor;
169689Skan	  lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr),
169689Skan			      LLE_INVARIANT_COEFFICIENTS (target_expr),
169689Skan			      invariants);
169689Skan	  lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr),
169689Skan				    LLE_INVARIANT_COEFFICIENTS (target_expr),
169689Skan				    invariants, factor);
169689Skan	  LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr);
169689Skan
169689Skan	  if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth))
169689Skan	    {
169689Skan	      LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr)
169689Skan		* determinant;
169689Skan	      lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS
169689Skan					(target_expr),
169689Skan					LLE_INVARIANT_COEFFICIENTS
169689Skan					(target_expr), invariants,
169689Skan					determinant);
169689Skan	      LLE_DENOMINATOR (target_expr) =
169689Skan		LLE_DENOMINATOR (target_expr) * determinant;
169689Skan	    }
169689Skan	  /* Find the gcd and divide by it here, instead of at the
169689Skan	     tree level.  */
169689Skan	  gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth);
169689Skan	  gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr),
169689Skan				    invariants);
169689Skan	  gcd1 = gcd (gcd1, gcd2);
169689Skan	  gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr));
169689Skan	  gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr));
169689Skan	  for (j = 0; j < depth; j++)
169689Skan	    LLE_COEFFICIENTS (target_expr)[j] /= gcd1;
169689Skan	  for (j = 0; j < invariants; j++)
169689Skan	    LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1;
169689Skan	  LLE_CONSTANT (target_expr) /= gcd1;
169689Skan	  LLE_DENOMINATOR (target_expr) /= gcd1;
169689Skan	  /* Ignore if equal to existing bound.  */
169689Skan	  if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth,
169689Skan			  invariants))
169689Skan	    {
169689Skan	      LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop);
169689Skan	      LL_UPPER_BOUND (target_loop) = target_expr;
169689Skan	    }
169689Skan	}
169689Skan    }
169689Skan  for (i = 0; i < depth; i++)
169689Skan    {
169689Skan      target_loop = LN_LOOPS (target_nest)[i];
169689Skan      /* If necessary, exchange the upper and lower bounds and negate
169689Skan         the step size.  */
169689Skan      if (stepsigns[i] < 0)
169689Skan	{
169689Skan	  LL_STEP (target_loop) *= -1;
169689Skan	  tmp_expr = LL_LOWER_BOUND (target_loop);
169689Skan	  LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop);
169689Skan	  LL_UPPER_BOUND (target_loop) = tmp_expr;
169689Skan	}
169689Skan    }
169689Skan  return target_nest;
169689Skan}
169689Skan
169689Skan/* Compute the step signs of TRANS, using TRANS and stepsigns.  Return the new
169689Skan   result.  */
169689Skan
169689Skanstatic lambda_vector
169689Skanlambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns)
169689Skan{
169689Skan  lambda_matrix matrix, H;
169689Skan  int size;
169689Skan  lambda_vector newsteps;
169689Skan  int i, j, factor, minimum_column;
169689Skan  int temp;
169689Skan
169689Skan  matrix = LTM_MATRIX (trans);
169689Skan  size = LTM_ROWSIZE (trans);
169689Skan  H = lambda_matrix_new (size, size);
169689Skan
169689Skan  newsteps = lambda_vector_new (size);
169689Skan  lambda_vector_copy (stepsigns, newsteps, size);
169689Skan
169689Skan  lambda_matrix_copy (matrix, H, size, size);
169689Skan
169689Skan  for (j = 0; j < size; j++)
169689Skan    {
169689Skan      lambda_vector row;
169689Skan      row = H[j];
169689Skan      for (i = j; i < size; i++)
169689Skan	if (row[i] < 0)
169689Skan	  lambda_matrix_col_negate (H, size, i);
169689Skan      while (lambda_vector_first_nz (row, size, j + 1) < size)
169689Skan	{
169689Skan	  minimum_column = lambda_vector_min_nz (row, size, j);
169689Skan	  lambda_matrix_col_exchange (H, size, j, minimum_column);
169689Skan
169689Skan	  temp = newsteps[j];
169689Skan	  newsteps[j] = newsteps[minimum_column];
169689Skan	  newsteps[minimum_column] = temp;
169689Skan
169689Skan	  for (i = j + 1; i < size; i++)
169689Skan	    {
169689Skan	      factor = row[i] / row[j];
169689Skan	      lambda_matrix_col_add (H, size, j, i, -1 * factor);
169689Skan	    }
169689Skan	}
169689Skan    }
169689Skan  return newsteps;
169689Skan}
169689Skan
169689Skan/* Transform NEST according to TRANS, and return the new loopnest.
169689Skan   This involves
169689Skan   1. Computing a lattice base for the transformation
169689Skan   2. Composing the dense base with the specified transformation (TRANS)
169689Skan   3. Decomposing the combined transformation into a lower triangular portion,
169689Skan   and a unimodular portion.
169689Skan   4. Computing the auxiliary nest using the unimodular portion.
169689Skan   5. Computing the target nest using the auxiliary nest and the lower
169689Skan   triangular portion.  */
169689Skan
169689Skanlambda_loopnest
169689Skanlambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans)
169689Skan{
169689Skan  lambda_loopnest auxillary_nest, target_nest;
169689Skan
169689Skan  int depth, invariants;
169689Skan  int i, j;
169689Skan  lambda_lattice lattice;
169689Skan  lambda_trans_matrix trans1, H, U;
169689Skan  lambda_loop loop;
169689Skan  lambda_linear_expression expression;
169689Skan  lambda_vector origin;
169689Skan  lambda_matrix origin_invariants;
169689Skan  lambda_vector stepsigns;
169689Skan  int f;
169689Skan
169689Skan  depth = LN_DEPTH (nest);
169689Skan  invariants = LN_INVARIANTS (nest);
169689Skan
169689Skan  /* Keep track of the signs of the loop steps.  */
169689Skan  stepsigns = lambda_vector_new (depth);
169689Skan  for (i = 0; i < depth; i++)
169689Skan    {
169689Skan      if (LL_STEP (LN_LOOPS (nest)[i]) > 0)
169689Skan	stepsigns[i] = 1;
169689Skan      else
169689Skan	stepsigns[i] = -1;
169689Skan    }
169689Skan
169689Skan  /* Compute the lattice base.  */
169689Skan  lattice = lambda_lattice_compute_base (nest);
169689Skan  trans1 = lambda_trans_matrix_new (depth, depth);
169689Skan
169689Skan  /* Multiply the transformation matrix by the lattice base.  */
169689Skan
169689Skan  lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice),
169689Skan		      LTM_MATRIX (trans1), depth, depth, depth);
169689Skan
169689Skan  /* Compute the Hermite normal form for the new transformation matrix.  */
169689Skan  H = lambda_trans_matrix_new (depth, depth);
169689Skan  U = lambda_trans_matrix_new (depth, depth);
169689Skan  lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H),
169689Skan			 LTM_MATRIX (U));
169689Skan
169689Skan  /* Compute the auxiliary loop nest's space from the unimodular
169689Skan     portion.  */
169689Skan  auxillary_nest = lambda_compute_auxillary_space (nest, U);
169689Skan
169689Skan  /* Compute the loop step signs from the old step signs and the
169689Skan     transformation matrix.  */
169689Skan  stepsigns = lambda_compute_step_signs (trans1, stepsigns);
169689Skan
169689Skan  /* Compute the target loop nest space from the auxiliary nest and
169689Skan     the lower triangular matrix H.  */
169689Skan  target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns);
169689Skan  origin = lambda_vector_new (depth);
169689Skan  origin_invariants = lambda_matrix_new (depth, invariants);
169689Skan  lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth,
169689Skan			     LATTICE_ORIGIN (lattice), origin);
169689Skan  lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice),
169689Skan		      origin_invariants, depth, depth, invariants);
169689Skan
169689Skan  for (i = 0; i < depth; i++)
169689Skan    {
169689Skan      loop = LN_LOOPS (target_nest)[i];
169689Skan      expression = LL_LINEAR_OFFSET (loop);
169689Skan      if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth))
169689Skan	f = 1;
169689Skan      else
169689Skan	f = LLE_DENOMINATOR (expression);
169689Skan
169689Skan      LLE_CONSTANT (expression) += f * origin[i];
169689Skan
169689Skan      for (j = 0; j < invariants; j++)
169689Skan	LLE_INVARIANT_COEFFICIENTS (expression)[j] +=
169689Skan	  f * origin_invariants[i][j];
169689Skan    }
169689Skan
169689Skan  return target_nest;
169689Skan
169689Skan}
169689Skan
169689Skan/* Convert a gcc tree expression EXPR to a lambda linear expression, and
169689Skan   return the new expression.  DEPTH is the depth of the loopnest.
169689Skan   OUTERINDUCTIONVARS is an array of the induction variables for outer loops
169689Skan   in this nest.  INVARIANTS is the array of invariants for the loop.  EXTRA
169689Skan   is the amount we have to add/subtract from the expression because of the
169689Skan   type of comparison it is used in.  */
169689Skan
169689Skanstatic lambda_linear_expression
169689Skangcc_tree_to_linear_expression (int depth, tree expr,
169689Skan			       VEC(tree,heap) *outerinductionvars,
169689Skan			       VEC(tree,heap) *invariants, int extra)
169689Skan{
169689Skan  lambda_linear_expression lle = NULL;
169689Skan  switch (TREE_CODE (expr))
169689Skan    {
169689Skan    case INTEGER_CST:
169689Skan      {
169689Skan	lle = lambda_linear_expression_new (depth, 2 * depth);
169689Skan	LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr);
169689Skan	if (extra != 0)
169689Skan	  LLE_CONSTANT (lle) += extra;
169689Skan
169689Skan	LLE_DENOMINATOR (lle) = 1;
169689Skan      }
169689Skan      break;
169689Skan    case SSA_NAME:
169689Skan      {
169689Skan	tree iv, invar;
169689Skan	size_t i;
169689Skan	for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++)
169689Skan	  if (iv != NULL)
169689Skan	    {
169689Skan	      if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr))
169689Skan		{
169689Skan		  lle = lambda_linear_expression_new (depth, 2 * depth);
169689Skan		  LLE_COEFFICIENTS (lle)[i] = 1;
169689Skan		  if (extra != 0)
169689Skan		    LLE_CONSTANT (lle) = extra;
169689Skan
169689Skan		  LLE_DENOMINATOR (lle) = 1;
169689Skan		}
169689Skan	    }
169689Skan	for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
169689Skan	  if (invar != NULL)
169689Skan	    {
169689Skan	      if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr))
169689Skan		{
169689Skan		  lle = lambda_linear_expression_new (depth, 2 * depth);
169689Skan		  LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1;
169689Skan		  if (extra != 0)
169689Skan		    LLE_CONSTANT (lle) = extra;
169689Skan		  LLE_DENOMINATOR (lle) = 1;
169689Skan		}
169689Skan	    }
169689Skan      }
169689Skan      break;
169689Skan    default:
169689Skan      return NULL;
169689Skan    }
169689Skan
169689Skan  return lle;
169689Skan}
169689Skan
169689Skan/* Return the depth of the loopnest NEST */
169689Skan
169689Skanstatic int
169689Skandepth_of_nest (struct loop *nest)
169689Skan{
169689Skan  size_t depth = 0;
169689Skan  while (nest)
169689Skan    {
169689Skan      depth++;
169689Skan      nest = nest->inner;
169689Skan    }
169689Skan  return depth;
169689Skan}
169689Skan
169689Skan
169689Skan/* Return true if OP is invariant in LOOP and all outer loops.  */
169689Skan
169689Skanstatic bool
169689Skaninvariant_in_loop_and_outer_loops (struct loop *loop, tree op)
169689Skan{
169689Skan  if (is_gimple_min_invariant (op))
169689Skan    return true;
169689Skan  if (loop->depth == 0)
169689Skan    return true;
169689Skan  if (!expr_invariant_in_loop_p (loop, op))
169689Skan    return false;
169689Skan  if (loop->outer
169689Skan      && !invariant_in_loop_and_outer_loops (loop->outer, op))
169689Skan    return false;
169689Skan  return true;
169689Skan}
169689Skan
169689Skan/* Generate a lambda loop from a gcc loop LOOP.  Return the new lambda loop,
169689Skan   or NULL if it could not be converted.
169689Skan   DEPTH is the depth of the loop.
169689Skan   INVARIANTS is a pointer to the array of loop invariants.
169689Skan   The induction variable for this loop should be stored in the parameter
169689Skan   OURINDUCTIONVAR.
169689Skan   OUTERINDUCTIONVARS is an array of induction variables for outer loops.  */
169689Skan
169689Skanstatic lambda_loop
169689Skangcc_loop_to_lambda_loop (struct loop *loop, int depth,
169689Skan			 VEC(tree,heap) ** invariants,
169689Skan			 tree * ourinductionvar,
169689Skan			 VEC(tree,heap) * outerinductionvars,
169689Skan			 VEC(tree,heap) ** lboundvars,
169689Skan			 VEC(tree,heap) ** uboundvars,
169689Skan			 VEC(int,heap) ** steps)
169689Skan{
169689Skan  tree phi;
169689Skan  tree exit_cond;
169689Skan  tree access_fn, inductionvar;
169689Skan  tree step;
169689Skan  lambda_loop lloop = NULL;
169689Skan  lambda_linear_expression lbound, ubound;
169689Skan  tree test;
169689Skan  int stepint;
169689Skan  int extra = 0;
169689Skan  tree lboundvar, uboundvar, uboundresult;
169689Skan
169689Skan  /* Find out induction var and exit condition.  */
169689Skan  inductionvar = find_induction_var_from_exit_cond (loop);
169689Skan  exit_cond = get_loop_exit_condition (loop);
169689Skan
169689Skan  if (inductionvar == NULL || exit_cond == NULL)
169689Skan    {
169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
169689Skan	fprintf (dump_file,
169689Skan		 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n");
169689Skan      return NULL;
169689Skan    }
169689Skan
169689Skan  test = TREE_OPERAND (exit_cond, 0);
169689Skan
169689Skan  if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
169689Skan    {
169689Skan
169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
169689Skan	fprintf (dump_file,
169689Skan		 "Unable to convert loop: Cannot find PHI node for induction variable\n");
169689Skan
169689Skan      return NULL;
169689Skan    }
169689Skan
169689Skan  phi = SSA_NAME_DEF_STMT (inductionvar);
169689Skan  if (TREE_CODE (phi) != PHI_NODE)
169689Skan    {
169689Skan      phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE);
169689Skan      if (!phi)
169689Skan	{
169689Skan
169689Skan	  if (dump_file && (dump_flags & TDF_DETAILS))
169689Skan	    fprintf (dump_file,
169689Skan		     "Unable to convert loop: Cannot find PHI node for induction variable\n");
169689Skan
169689Skan	  return NULL;
169689Skan	}
169689Skan
169689Skan      phi = SSA_NAME_DEF_STMT (phi);
169689Skan      if (TREE_CODE (phi) != PHI_NODE)
169689Skan	{
169689Skan
169689Skan	  if (dump_file && (dump_flags & TDF_DETAILS))
169689Skan	    fprintf (dump_file,
169689Skan		     "Unable to convert loop: Cannot find PHI node for induction variable\n");
169689Skan	  return NULL;
169689Skan	}
169689Skan
169689Skan    }
169689Skan
169689Skan  /* The induction variable name/version we want to put in the array is the
169689Skan     result of the induction variable phi node.  */
169689Skan  *ourinductionvar = PHI_RESULT (phi);
169689Skan  access_fn = instantiate_parameters
169689Skan    (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
169689Skan  if (access_fn == chrec_dont_know)
169689Skan    {
169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
169689Skan	fprintf (dump_file,
169689Skan		 "Unable to convert loop: Access function for induction variable phi is unknown\n");
169689Skan
169689Skan      return NULL;
169689Skan    }
169689Skan
169689Skan  step = evolution_part_in_loop_num (access_fn, loop->num);
169689Skan  if (!step || step == chrec_dont_know)
169689Skan    {
169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
169689Skan	fprintf (dump_file,
169689Skan		 "Unable to convert loop: Cannot determine step of loop.\n");
169689Skan
169689Skan      return NULL;
169689Skan    }
169689Skan  if (TREE_CODE (step) != INTEGER_CST)
169689Skan    {
169689Skan
169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
169689Skan	fprintf (dump_file,
169689Skan		 "Unable to convert loop: Step of loop is not integer.\n");
169689Skan      return NULL;
169689Skan    }
169689Skan
169689Skan  stepint = TREE_INT_CST_LOW (step);
169689Skan
169689Skan  /* Only want phis for induction vars, which will have two
169689Skan     arguments.  */
169689Skan  if (PHI_NUM_ARGS (phi) != 2)
169689Skan    {
169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
169689Skan	fprintf (dump_file,
169689Skan		 "Unable to convert loop: PHI node for induction variable has >2 arguments\n");
169689Skan      return NULL;
169689Skan    }
169689Skan
169689Skan  /* Another induction variable check. One argument's source should be
169689Skan     in the loop, one outside the loop.  */
169689Skan  if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)
169689Skan      && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src))
169689Skan    {
169689Skan
169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
169689Skan	fprintf (dump_file,
169689Skan		 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n");
169689Skan
169689Skan      return NULL;
169689Skan    }
169689Skan
169689Skan  if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src))
169689Skan    {
169689Skan      lboundvar = PHI_ARG_DEF (phi, 1);
169689Skan      lbound = gcc_tree_to_linear_expression (depth, lboundvar,
169689Skan					      outerinductionvars, *invariants,
169689Skan					      0);
169689Skan    }
169689Skan  else
169689Skan    {
169689Skan      lboundvar = PHI_ARG_DEF (phi, 0);
169689Skan      lbound = gcc_tree_to_linear_expression (depth, lboundvar,
169689Skan					      outerinductionvars, *invariants,
169689Skan					      0);
169689Skan    }
169689Skan
169689Skan  if (!lbound)
169689Skan    {
169689Skan
169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
169689Skan	fprintf (dump_file,
169689Skan		 "Unable to convert loop: Cannot convert lower bound to linear expression\n");
169689Skan
169689Skan      return NULL;
169689Skan    }
169689Skan  /* One part of the test may be a loop invariant tree.  */
169689Skan  VEC_reserve (tree, heap, *invariants, 1);
169689Skan  if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
169689Skan      && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1)))
169689Skan    VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1));
169689Skan  else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
169689Skan	   && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0)))
169689Skan    VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0));
169689Skan
169689Skan  /* The non-induction variable part of the test is the upper bound variable.
169689Skan   */
169689Skan  if (TREE_OPERAND (test, 0) == inductionvar)
169689Skan    uboundvar = TREE_OPERAND (test, 1);
169689Skan  else
169689Skan    uboundvar = TREE_OPERAND (test, 0);
169689Skan
169689Skan
169689Skan  /* We only size the vectors assuming we have, at max, 2 times as many
169689Skan     invariants as we do loops (one for each bound).
169689Skan     This is just an arbitrary number, but it has to be matched against the
169689Skan     code below.  */
169689Skan  gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
169689Skan
169689Skan
169689Skan  /* We might have some leftover.  */
169689Skan  if (TREE_CODE (test) == LT_EXPR)
169689Skan    extra = -1 * stepint;
169689Skan  else if (TREE_CODE (test) == NE_EXPR)
169689Skan    extra = -1 * stepint;
169689Skan  else if (TREE_CODE (test) == GT_EXPR)
169689Skan    extra = -1 * stepint;
169689Skan  else if (TREE_CODE (test) == EQ_EXPR)
169689Skan    extra = 1 * stepint;
169689Skan
169689Skan  ubound = gcc_tree_to_linear_expression (depth, uboundvar,
169689Skan					  outerinductionvars,
169689Skan					  *invariants, extra);
169689Skan  uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar,
169689Skan			 build_int_cst (TREE_TYPE (uboundvar), extra));
169689Skan  VEC_safe_push (tree, heap, *uboundvars, uboundresult);
169689Skan  VEC_safe_push (tree, heap, *lboundvars, lboundvar);
169689Skan  VEC_safe_push (int, heap, *steps, stepint);
169689Skan  if (!ubound)
169689Skan    {
169689Skan      if (dump_file && (dump_flags & TDF_DETAILS))
169689Skan	fprintf (dump_file,
169689Skan		 "Unable to convert loop: Cannot convert upper bound to linear expression\n");
169689Skan      return NULL;
169689Skan    }
169689Skan
169689Skan  lloop = lambda_loop_new ();
169689Skan  LL_STEP (lloop) = stepint;
169689Skan  LL_LOWER_BOUND (lloop) = lbound;
169689Skan  LL_UPPER_BOUND (lloop) = ubound;
169689Skan  return lloop;
169689Skan}
169689Skan
169689Skan/* Given a LOOP, find the induction variable it is testing against in the exit
169689Skan   condition.  Return the induction variable if found, NULL otherwise.  */
169689Skan
169689Skanstatic tree
169689Skanfind_induction_var_from_exit_cond (struct loop *loop)
169689Skan{
169689Skan  tree expr = get_loop_exit_condition (loop);
169689Skan  tree ivarop;
169689Skan  tree test;
169689Skan  if (expr == NULL_TREE)
169689Skan    return NULL_TREE;
169689Skan  if (TREE_CODE (expr) != COND_EXPR)
169689Skan    return NULL_TREE;
169689Skan  test = TREE_OPERAND (expr, 0);
169689Skan  if (!COMPARISON_CLASS_P (test))
169689Skan    return NULL_TREE;
169689Skan
169689Skan  /* Find the side that is invariant in this loop. The ivar must be the other
169689Skan     side.  */
169689Skan
169689Skan  if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0)))
169689Skan      ivarop = TREE_OPERAND (test, 1);
169689Skan  else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1)))
169689Skan      ivarop = TREE_OPERAND (test, 0);
169689Skan  else
169689Skan    return NULL_TREE;
169689Skan
169689Skan  if (TREE_CODE (ivarop) != SSA_NAME)
169689Skan    return NULL_TREE;
169689Skan  return ivarop;
169689Skan}
169689Skan
169689SkanDEF_VEC_P(lambda_loop);
169689SkanDEF_VEC_ALLOC_P(lambda_loop,heap);
169689Skan
169689Skan/* Generate a lambda loopnest from a gcc loopnest LOOP_NEST.
169689Skan   Return the new loop nest.
169689Skan   INDUCTIONVARS is a pointer to an array of induction variables for the
169689Skan   loopnest that will be filled in during this process.
169689Skan   INVARIANTS is a pointer to an array of invariants that will be filled in
169689Skan   during this process.  */
169689Skan
169689Skanlambda_loopnest
169689Skangcc_loopnest_to_lambda_loopnest (struct loops *currloops,
169689Skan				 struct loop *loop_nest,
169689Skan				 VEC(tree,heap) **inductionvars,
169689Skan				 VEC(tree,heap) **invariants)
169689Skan{
169689Skan  lambda_loopnest ret = NULL;
169689Skan  struct loop *temp = loop_nest;
169689Skan  int depth = depth_of_nest (loop_nest);
169689Skan  size_t i;
169689Skan  VEC(lambda_loop,heap) *loops = NULL;
169689Skan  VEC(tree,heap) *uboundvars = NULL;
169689Skan  VEC(tree,heap) *lboundvars  = NULL;
169689Skan  VEC(int,heap) *steps = NULL;
169689Skan  lambda_loop newloop;
169689Skan  tree inductionvar = NULL;
169689Skan  bool perfect_nest = perfect_nest_p (loop_nest);
169689Skan
169689Skan  if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest))
169689Skan    goto fail;
169689Skan
169689Skan  while (temp)
169689Skan    {
169689Skan      newloop = gcc_loop_to_lambda_loop (temp, depth, invariants,
169689Skan					 &inductionvar, *inductionvars,
169689Skan					 &lboundvars, &uboundvars,
169689Skan					 &steps);
169689Skan      if (!newloop)
169689Skan	goto fail;
169689Skan
169689Skan      VEC_safe_push (tree, heap, *inductionvars, inductionvar);
169689Skan      VEC_safe_push (lambda_loop, heap, loops, newloop);
169689Skan      temp = temp->inner;
169689Skan    }
169689Skan
169689Skan  if (!perfect_nest)
169689Skan    {
169689Skan      if (!perfect_nestify (currloops, loop_nest,
169689Skan			    lboundvars, uboundvars, steps, *inductionvars))
169689Skan	{
169689Skan	  if (dump_file)
169689Skan	    fprintf (dump_file,
169689Skan		     "Not a perfect loop nest and couldn't convert to one.\n");
169689Skan	  goto fail;
169689Skan	}
169689Skan      else if (dump_file)
169689Skan	fprintf (dump_file,
169689Skan		 "Successfully converted loop nest to perfect loop nest.\n");
169689Skan    }
169689Skan
169689Skan  ret = lambda_loopnest_new (depth, 2 * depth);
169689Skan
169689Skan  for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++)
169689Skan    LN_LOOPS (ret)[i] = newloop;
169689Skan
169689Skan fail:
169689Skan  VEC_free (lambda_loop, heap, loops);
169689Skan  VEC_free (tree, heap, uboundvars);
169689Skan  VEC_free (tree, heap, lboundvars);
169689Skan  VEC_free (int, heap, steps);
169689Skan
169689Skan  return ret;
169689Skan}
169689Skan
169689Skan/* Convert a lambda body vector LBV to a gcc tree, and return the new tree.
169689Skan   STMTS_TO_INSERT is a pointer to a tree where the statements we need to be
169689Skan   inserted for us are stored.  INDUCTION_VARS is the array of induction
169689Skan   variables for the loop this LBV is from.  TYPE is the tree type to use for
169689Skan   the variables and trees involved.  */
169689Skan
169689Skanstatic tree
169689Skanlbv_to_gcc_expression (lambda_body_vector lbv,
169689Skan		       tree type, VEC(tree,heap) *induction_vars,
169689Skan		       tree *stmts_to_insert)
169689Skan{
169689Skan  tree stmts, stmt, resvar, name;
169689Skan  tree iv;
169689Skan  size_t i;
169689Skan  tree_stmt_iterator tsi;
169689Skan
169689Skan  /* Create a statement list and a linear expression temporary.  */
169689Skan  stmts = alloc_stmt_list ();
169689Skan  resvar = create_tmp_var (type, "lbvtmp");
169689Skan  add_referenced_var (resvar);
169689Skan
169689Skan  /* Start at 0.  */
169689Skan  stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
169689Skan  name = make_ssa_name (resvar, stmt);
169689Skan  TREE_OPERAND (stmt, 0) = name;
169689Skan  tsi = tsi_last (stmts);
169689Skan  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
169689Skan
169689Skan  for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
169689Skan    {
169689Skan      if (LBV_COEFFICIENTS (lbv)[i] != 0)
169689Skan	{
169689Skan	  tree newname;
169689Skan	  tree coeffmult;
169689Skan
169689Skan	  /* newname = coefficient * induction_variable */
169689Skan	  coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]);
169689Skan	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
169689Skan			 fold_build2 (MULT_EXPR, type, iv, coeffmult));
169689Skan
169689Skan	  newname = make_ssa_name (resvar, stmt);
169689Skan	  TREE_OPERAND (stmt, 0) = newname;
169689Skan	  fold_stmt (&stmt);
169689Skan	  tsi = tsi_last (stmts);
169689Skan	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
169689Skan
169689Skan	  /* name = name + newname */
169689Skan	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
169689Skan			 build2 (PLUS_EXPR, type, name, newname));
169689Skan	  name = make_ssa_name (resvar, stmt);
169689Skan	  TREE_OPERAND (stmt, 0) = name;
169689Skan	  fold_stmt (&stmt);
169689Skan	  tsi = tsi_last (stmts);
169689Skan	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
169689Skan
169689Skan	}
169689Skan    }
169689Skan
169689Skan  /* Handle any denominator that occurs.  */
169689Skan  if (LBV_DENOMINATOR (lbv) != 1)
169689Skan    {
169689Skan      tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv));
169689Skan      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
169689Skan		     build2 (CEIL_DIV_EXPR, type, name, denominator));
169689Skan      name = make_ssa_name (resvar, stmt);
169689Skan      TREE_OPERAND (stmt, 0) = name;
169689Skan      fold_stmt (&stmt);
169689Skan      tsi = tsi_last (stmts);
169689Skan      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
169689Skan    }
169689Skan  *stmts_to_insert = stmts;
169689Skan  return name;
169689Skan}
169689Skan
169689Skan/* Convert a linear expression from coefficient and constant form to a
169689Skan   gcc tree.
169689Skan   Return the tree that represents the final value of the expression.
169689Skan   LLE is the linear expression to convert.
169689Skan   OFFSET is the linear offset to apply to the expression.
169689Skan   TYPE is the tree type to use for the variables and math.
169689Skan   INDUCTION_VARS is a vector of induction variables for the loops.
169689Skan   INVARIANTS is a vector of the loop nest invariants.
169689Skan   WRAP specifies what tree code to wrap the results in, if there is more than
169689Skan   one (it is either MAX_EXPR, or MIN_EXPR).
169689Skan   STMTS_TO_INSERT Is a pointer to the statement list we fill in with
169689Skan   statements that need to be inserted for the linear expression.  */
169689Skan
169689Skanstatic tree
169689Skanlle_to_gcc_expression (lambda_linear_expression lle,
169689Skan		       lambda_linear_expression offset,
169689Skan		       tree type,
169689Skan		       VEC(tree,heap) *induction_vars,
169689Skan		       VEC(tree,heap) *invariants,
169689Skan		       enum tree_code wrap, tree *stmts_to_insert)
169689Skan{
169689Skan  tree stmts, stmt, resvar, name;
169689Skan  size_t i;
169689Skan  tree_stmt_iterator tsi;
169689Skan  tree iv, invar;
169689Skan  VEC(tree,heap) *results = NULL;
169689Skan
169689Skan  gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR);
169689Skan  name = NULL_TREE;
169689Skan  /* Create a statement list and a linear expression temporary.  */
169689Skan  stmts = alloc_stmt_list ();
169689Skan  resvar = create_tmp_var (type, "lletmp");
169689Skan  add_referenced_var (resvar);
169689Skan
169689Skan  /* Build up the linear expressions, and put the variable representing the
169689Skan     result in the results array.  */
169689Skan  for (; lle != NULL; lle = LLE_NEXT (lle))
169689Skan    {
169689Skan      /* Start at name = 0.  */
169689Skan      stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node);
169689Skan      name = make_ssa_name (resvar, stmt);
169689Skan      TREE_OPERAND (stmt, 0) = name;
169689Skan      fold_stmt (&stmt);
169689Skan      tsi = tsi_last (stmts);
169689Skan      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
169689Skan
169689Skan      /* First do the induction variables.
169689Skan         at the end, name = name + all the induction variables added
169689Skan         together.  */
169689Skan      for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++)
169689Skan	{
169689Skan	  if (LLE_COEFFICIENTS (lle)[i] != 0)
169689Skan	    {
169689Skan	      tree newname;
169689Skan	      tree mult;
169689Skan	      tree coeff;
169689Skan
169689Skan	      /* mult = induction variable * coefficient.  */
169689Skan	      if (LLE_COEFFICIENTS (lle)[i] == 1)
169689Skan		{
169689Skan		  mult = VEC_index (tree, induction_vars, i);
169689Skan		}
169689Skan	      else
169689Skan		{
169689Skan		  coeff = build_int_cst (type,
169689Skan					 LLE_COEFFICIENTS (lle)[i]);
169689Skan		  mult = fold_build2 (MULT_EXPR, type, iv, coeff);
169689Skan		}
169689Skan
169689Skan	      /* newname = mult */
169689Skan	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult);
169689Skan	      newname = make_ssa_name (resvar, stmt);
169689Skan	      TREE_OPERAND (stmt, 0) = newname;
169689Skan	      fold_stmt (&stmt);
169689Skan	      tsi = tsi_last (stmts);
169689Skan	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
169689Skan
169689Skan	      /* name = name + newname */
169689Skan	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
169689Skan			     build2 (PLUS_EXPR, type, name, newname));
169689Skan	      name = make_ssa_name (resvar, stmt);
169689Skan	      TREE_OPERAND (stmt, 0) = name;
169689Skan	      fold_stmt (&stmt);
169689Skan	      tsi = tsi_last (stmts);
169689Skan	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
169689Skan	    }
169689Skan	}
169689Skan
169689Skan      /* Handle our invariants.
169689Skan         At the end, we have name = name + result of adding all multiplied
169689Skan         invariants.  */
169689Skan      for (i = 0; VEC_iterate (tree, invariants, i, invar); i++)
169689Skan	{
169689Skan	  if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0)
169689Skan	    {
169689Skan	      tree newname;
169689Skan	      tree mult;
169689Skan	      tree coeff;
169689Skan	      int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i];
169689Skan	      /* mult = invariant * coefficient  */
169689Skan	      if (invcoeff == 1)
169689Skan		{
169689Skan		  mult = invar;
169689Skan		}
169689Skan	      else
169689Skan		{
169689Skan		  coeff = build_int_cst (type, invcoeff);
169689Skan		  mult = fold_build2 (MULT_EXPR, type, invar, coeff);
169689Skan		}
169689Skan
169689Skan	      /* newname = mult */
169689Skan	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult);
169689Skan	      newname = make_ssa_name (resvar, stmt);
169689Skan	      TREE_OPERAND (stmt, 0) = newname;
169689Skan	      fold_stmt (&stmt);
169689Skan	      tsi = tsi_last (stmts);
169689Skan	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
169689Skan
169689Skan	      /* name = name + newname */
169689Skan	      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
169689Skan			     build2 (PLUS_EXPR, type, name, newname));
169689Skan	      name = make_ssa_name (resvar, stmt);
169689Skan	      TREE_OPERAND (stmt, 0) = name;
169689Skan	      fold_stmt (&stmt);
169689Skan	      tsi = tsi_last (stmts);
169689Skan	      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
169689Skan	    }
169689Skan	}
169689Skan
169689Skan      /* Now handle the constant.
169689Skan         name = name + constant.  */
169689Skan      if (LLE_CONSTANT (lle) != 0)
169689Skan	{
169689Skan	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
169689Skan			 build2 (PLUS_EXPR, type, name,
169689Skan			         build_int_cst (type, LLE_CONSTANT (lle))));
169689Skan	  name = make_ssa_name (resvar, stmt);
169689Skan	  TREE_OPERAND (stmt, 0) = name;
169689Skan	  fold_stmt (&stmt);
169689Skan	  tsi = tsi_last (stmts);
169689Skan	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
169689Skan	}
169689Skan
169689Skan      /* Now handle the offset.
169689Skan         name = name + linear offset.  */
169689Skan      if (LLE_CONSTANT (offset) != 0)
169689Skan	{
169689Skan	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
169689Skan			 build2 (PLUS_EXPR, type, name,
169689Skan			         build_int_cst (type, LLE_CONSTANT (offset))));
169689Skan	  name = make_ssa_name (resvar, stmt);
169689Skan	  TREE_OPERAND (stmt, 0) = name;
169689Skan	  fold_stmt (&stmt);
169689Skan	  tsi = tsi_last (stmts);
169689Skan	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
169689Skan	}
169689Skan
169689Skan      /* Handle any denominator that occurs.  */
169689Skan      if (LLE_DENOMINATOR (lle) != 1)
169689Skan	{
169689Skan	  stmt = build_int_cst (type, LLE_DENOMINATOR (lle));
169689Skan	  stmt = build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR,
169689Skan			 type, name, stmt);
169689Skan	  stmt = build2 (MODIFY_EXPR, void_type_node, resvar, stmt);
169689Skan
169689Skan	  /* name = {ceil, floor}(name/denominator) */
169689Skan	  name = make_ssa_name (resvar, stmt);
169689Skan	  TREE_OPERAND (stmt, 0) = name;
169689Skan	  tsi = tsi_last (stmts);
169689Skan	  tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
169689Skan	}
169689Skan      VEC_safe_push (tree, heap, results, name);
169689Skan    }
169689Skan
169689Skan  /* Again, out of laziness, we don't handle this case yet.  It's not
169689Skan     hard, it just hasn't occurred.  */
169689Skan  gcc_assert (VEC_length (tree, results) <= 2);
169689Skan
169689Skan  /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR.  */
169689Skan  if (VEC_length (tree, results) > 1)
169689Skan    {
169689Skan      tree op1 = VEC_index (tree, results, 0);
169689Skan      tree op2 = VEC_index (tree, results, 1);
169689Skan      stmt = build2 (MODIFY_EXPR, void_type_node, resvar,
169689Skan		     build2 (wrap, type, op1, op2));
169689Skan      name = make_ssa_name (resvar, stmt);
169689Skan      TREE_OPERAND (stmt, 0) = name;
169689Skan      tsi = tsi_last (stmts);
169689Skan      tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING);
169689Skan    }
169689Skan
169689Skan  VEC_free (tree, heap, results);
169689Skan
169689Skan  *stmts_to_insert = stmts;
169689Skan  return name;
169689Skan}
169689Skan
169689Skan/* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to
169689Skan   it, back into gcc code.  This changes the
169689Skan   loops, their induction variables, and their bodies, so that they
169689Skan   match the transformed loopnest.
169689Skan   OLD_LOOPNEST is the loopnest before we've replaced it with the new
169689Skan   loopnest.
169689Skan   OLD_IVS is a vector of induction variables from the old loopnest.
169689Skan   INVARIANTS is a vector of loop invariants from the old loopnest.
169689Skan   NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with.
169689Skan   TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get
169689Skan   NEW_LOOPNEST.  */
169689Skan
169689Skanvoid
169689Skanlambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest,
169689Skan				 VEC(tree,heap) *old_ivs,
169689Skan				 VEC(tree,heap) *invariants,
169689Skan				 lambda_loopnest new_loopnest,
169689Skan				 lambda_trans_matrix transform)
169689Skan{
169689Skan  struct loop *temp;
169689Skan  size_t i = 0;
169689Skan  size_t depth = 0;
169689Skan  VEC(tree,heap) *new_ivs = NULL;
169689Skan  tree oldiv;
169689Skan
169689Skan  block_stmt_iterator bsi;
169689Skan
169689Skan  if (dump_file)
169689Skan    {
169689Skan      transform = lambda_trans_matrix_inverse (transform);
169689Skan      fprintf (dump_file, "Inverse of transformation matrix:\n");
169689Skan      print_lambda_trans_matrix (dump_file, transform);
169689Skan    }
169689Skan  depth = depth_of_nest (old_loopnest);
169689Skan  temp = old_loopnest;
169689Skan
169689Skan  while (temp)
169689Skan    {
169689Skan      lambda_loop newloop;
169689Skan      basic_block bb;
169689Skan      edge exit;
169689Skan      tree ivvar, ivvarinced, exitcond, stmts;
169689Skan      enum tree_code testtype;
169689Skan      tree newupperbound, newlowerbound;
169689Skan      lambda_linear_expression offset;
169689Skan      tree type;
169689Skan      bool insert_after;
169689Skan      tree inc_stmt;
169689Skan
169689Skan      oldiv = VEC_index (tree, old_ivs, i);
169689Skan      type = TREE_TYPE (oldiv);
169689Skan
169689Skan      /* First, build the new induction variable temporary  */
169689Skan
169689Skan      ivvar = create_tmp_var (type, "lnivtmp");
169689Skan      add_referenced_var (ivvar);
169689Skan
169689Skan      VEC_safe_push (tree, heap, new_ivs, ivvar);
169689Skan
169689Skan      newloop = LN_LOOPS (new_loopnest)[i];
169689Skan
169689Skan      /* Linear offset is a bit tricky to handle.  Punt on the unhandled
169689Skan         cases for now.  */
169689Skan      offset = LL_LINEAR_OFFSET (newloop);
169689Skan
169689Skan      gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
169689Skan		  lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
169689Skan
169689Skan      /* Now build the  new lower bounds, and insert the statements
169689Skan         necessary to generate it on the loop preheader.  */
169689Skan      newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
169689Skan					     LL_LINEAR_OFFSET (newloop),
169689Skan					     type,
169689Skan					     new_ivs,
169689Skan					     invariants, MAX_EXPR, &stmts);
169689Skan      bsi_insert_on_edge (loop_preheader_edge (temp), stmts);
169689Skan      bsi_commit_edge_inserts ();
169689Skan      /* Build the new upper bound and insert its statements in the
169689Skan         basic block of the exit condition */
169689Skan      newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop),
169689Skan					     LL_LINEAR_OFFSET (newloop),
169689Skan					     type,
169689Skan					     new_ivs,
169689Skan					     invariants, MIN_EXPR, &stmts);
169689Skan      exit = temp->single_exit;
169689Skan      exitcond = get_loop_exit_condition (temp);
169689Skan      bb = bb_for_stmt (exitcond);
169689Skan      bsi = bsi_start (bb);
169689Skan      bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
169689Skan
169689Skan      /* Create the new iv.  */
169689Skan
169689Skan      standard_iv_increment_position (temp, &bsi, &insert_after);
169689Skan      create_iv (newlowerbound,
169689Skan		 build_int_cst (type, LL_STEP (newloop)),
169689Skan		 ivvar, temp, &bsi, insert_after, &ivvar,
169689Skan		 NULL);
169689Skan
169689Skan      /* Unfortunately, the incremented ivvar that create_iv inserted may not
169689Skan	 dominate the block containing the exit condition.
169689Skan	 So we simply create our own incremented iv to use in the new exit
169689Skan	 test,  and let redundancy elimination sort it out.  */
169689Skan      inc_stmt = build2 (PLUS_EXPR, type,
169689Skan			 ivvar, build_int_cst (type, LL_STEP (newloop)));
169689Skan      inc_stmt = build2 (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar),
169689Skan			 inc_stmt);
169689Skan      ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt);
169689Skan      TREE_OPERAND (inc_stmt, 0) = ivvarinced;
169689Skan      bsi = bsi_for_stmt (exitcond);
169689Skan      bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT);
169689Skan
169689Skan      /* Replace the exit condition with the new upper bound
169689Skan         comparison.  */
169689Skan
169689Skan      testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR;
169689Skan
169689Skan      /* We want to build a conditional where true means exit the loop, and
169689Skan	 false means continue the loop.
169689Skan	 So swap the testtype if this isn't the way things are.*/
169689Skan
169689Skan      if (exit->flags & EDGE_FALSE_VALUE)
169689Skan	testtype = swap_tree_comparison (testtype);
169689Skan
169689Skan      COND_EXPR_COND (exitcond) = build2 (testtype,
169689Skan					  boolean_type_node,
169689Skan					  newupperbound, ivvarinced);
169689Skan      update_stmt (exitcond);
169689Skan      VEC_replace (tree, new_ivs, i, ivvar);
169689Skan
169689Skan      i++;
169689Skan      temp = temp->inner;
169689Skan    }
169689Skan
169689Skan  /* Rewrite uses of the old ivs so that they are now specified in terms of
169689Skan     the new ivs.  */
169689Skan
169689Skan  for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++)
169689Skan    {
169689Skan      imm_use_iterator imm_iter;
169689Skan      use_operand_p use_p;
169689Skan      tree oldiv_def;
169689Skan      tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv);
169689Skan      tree stmt;
169689Skan
169689Skan      if (TREE_CODE (oldiv_stmt) == PHI_NODE)
169689Skan        oldiv_def = PHI_RESULT (oldiv_stmt);
169689Skan      else
169689Skan	oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF);
169689Skan      gcc_assert (oldiv_def != NULL_TREE);
169689Skan
169689Skan      FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def)
169689Skan        {
169689Skan	  tree newiv, stmts;
169689Skan	  lambda_body_vector lbv, newlbv;
169689Skan
169689Skan	  gcc_assert (TREE_CODE (stmt) != PHI_NODE);
169689Skan
169689Skan	  /* Compute the new expression for the induction
169689Skan	     variable.  */
169689Skan	  depth = VEC_length (tree, new_ivs);
169689Skan	  lbv = lambda_body_vector_new (depth);
169689Skan	  LBV_COEFFICIENTS (lbv)[i] = 1;
169689Skan
169689Skan	  newlbv = lambda_body_vector_compute_new (transform, lbv);
169689Skan
169689Skan	  newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv),
169689Skan					 new_ivs, &stmts);
169689Skan	  bsi = bsi_for_stmt (stmt);
169689Skan	  /* Insert the statements to build that
169689Skan	     expression.  */
169689Skan	  bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
169689Skan
169689Skan	  FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
169689Skan	    propagate_value (use_p, newiv);
169689Skan	  update_stmt (stmt);
169689Skan	}
169689Skan    }
169689Skan  VEC_free (tree, heap, new_ivs);
169689Skan}
169689Skan
169689Skan/* Return TRUE if this is not interesting statement from the perspective of
169689Skan   determining if we have a perfect loop nest.  */
169689Skan
169689Skanstatic bool
169689Skannot_interesting_stmt (tree stmt)
169689Skan{
169689Skan  /* Note that COND_EXPR's aren't interesting because if they were exiting the
169689Skan     loop, we would have already failed the number of exits tests.  */
169689Skan  if (TREE_CODE (stmt) == LABEL_EXPR
169689Skan      || TREE_CODE (stmt) == GOTO_EXPR
169689Skan      || TREE_CODE (stmt) == COND_EXPR)
169689Skan    return true;
169689Skan  return false;
169689Skan}
169689Skan
169689Skan/* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP.  */
169689Skan
169689Skanstatic bool
169689Skanphi_loop_edge_uses_def (struct loop *loop, tree phi, tree def)
169689Skan{
169689Skan  int i;
169689Skan  for (i = 0; i < PHI_NUM_ARGS (phi); i++)
169689Skan    if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src))
169689Skan      if (PHI_ARG_DEF (phi, i) == def)
169689Skan	return true;
169689Skan  return false;
169689Skan}
169689Skan
169689Skan/* Return TRUE if STMT is a use of PHI_RESULT.  */
169689Skan
169689Skanstatic bool
169689Skanstmt_uses_phi_result (tree stmt, tree phi_result)
169689Skan{
169689Skan  tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE);
169689Skan
169689Skan  /* This is conservatively true, because we only want SIMPLE bumpers
169689Skan     of the form x +- constant for our pass.  */
169689Skan  return (use == phi_result);
169689Skan}
169689Skan
169689Skan/* STMT is a bumper stmt for LOOP if the version it defines is used in the
169689Skan   in-loop-edge in a phi node, and the operand it uses is the result of that
169689Skan   phi node.
169689Skan   I.E. i_29 = i_3 + 1
169689Skan        i_3 = PHI (0, i_29);  */
169689Skan
169689Skanstatic bool
169689Skanstmt_is_bumper_for_loop (struct loop *loop, tree stmt)
169689Skan{
169689Skan  tree use;
169689Skan  tree def;
169689Skan  imm_use_iterator iter;
169689Skan  use_operand_p use_p;
169689Skan
169689Skan  def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
169689Skan  if (!def)
169689Skan    return false;
169689Skan
169689Skan  FOR_EACH_IMM_USE_FAST (use_p, iter, def)
169689Skan    {
169689Skan      use = USE_STMT (use_p);
169689Skan      if (TREE_CODE (use) == PHI_NODE)
169689Skan	{
169689Skan	  if (phi_loop_edge_uses_def (loop, use, def))
169689Skan	    if (stmt_uses_phi_result (stmt, PHI_RESULT (use)))
169689Skan	      return true;
169689Skan	}
169689Skan    }
169689Skan  return false;
169689Skan}
169689Skan
169689Skan
169689Skan/* Return true if LOOP is a perfect loop nest.
169689Skan   Perfect loop nests are those loop nests where all code occurs in the
169689Skan   innermost loop body.
169689Skan   If S is a program statement, then
169689Skan
169689Skan   i.e.
169689Skan   DO I = 1, 20
169689Skan       S1
169689Skan       DO J = 1, 20
169689Skan       ...
169689Skan       END DO
169689Skan   END DO
169689Skan   is not a perfect loop nest because of S1.
169689Skan
169689Skan   DO I = 1, 20
169689Skan      DO J = 1, 20
169689Skan        S1
169689Skan	...
169689Skan      END DO
169689Skan   END DO
169689Skan   is a perfect loop nest.
169689Skan
169689Skan   Since we don't have high level loops anymore, we basically have to walk our
169689Skan   statements and ignore those that are there because the loop needs them (IE
169689Skan   the induction variable increment, and jump back to the top of the loop).  */
169689Skan
169689Skanbool
169689Skanperfect_nest_p (struct loop *loop)
169689Skan{
169689Skan  basic_block *bbs;
169689Skan  size_t i;
169689Skan  tree exit_cond;
169689Skan
169689Skan  if (!loop->inner)
169689Skan    return true;
169689Skan  bbs = get_loop_body (loop);
169689Skan  exit_cond = get_loop_exit_condition (loop);
169689Skan  for (i = 0; i < loop->num_nodes; i++)
169689Skan    {
169689Skan      if (bbs[i]->loop_father == loop)
169689Skan	{
169689Skan	  block_stmt_iterator bsi;
169689Skan	  for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
169689Skan	    {
169689Skan	      tree stmt = bsi_stmt (bsi);
169689Skan	      if (stmt == exit_cond
169689Skan		  || not_interesting_stmt (stmt)
169689Skan		  || stmt_is_bumper_for_loop (loop, stmt))
169689Skan		continue;
169689Skan	      free (bbs);
169689Skan	      return false;
169689Skan	    }
169689Skan	}
169689Skan    }
169689Skan  free (bbs);
169689Skan  /* See if the inner loops are perfectly nested as well.  */
169689Skan  if (loop->inner)
169689Skan    return perfect_nest_p (loop->inner);
169689Skan  return true;
169689Skan}
169689Skan
169689Skan/* Replace the USES of X in STMT, or uses with the same step as X with Y.
169689Skan   YINIT is the initial value of Y, REPLACEMENTS is a hash table to
169689Skan   avoid creating duplicate temporaries and FIRSTBSI is statement
169689Skan   iterator where new temporaries should be inserted at the beginning
169689Skan   of body basic block.  */
169689Skan
169689Skanstatic void
169689Skanreplace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x,
169689Skan				int xstep, tree y, tree yinit,
169689Skan				htab_t replacements,
169689Skan				block_stmt_iterator *firstbsi)
169689Skan{
169689Skan  ssa_op_iter iter;
169689Skan  use_operand_p use_p;
169689Skan
169689Skan  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
169689Skan    {
169689Skan      tree use = USE_FROM_PTR (use_p);
169689Skan      tree step = NULL_TREE;
169689Skan      tree scev, init, val, var, setstmt;
169689Skan      struct tree_map *h, in;
169689Skan      void **loc;
169689Skan
169689Skan      /* Replace uses of X with Y right away.  */
169689Skan      if (use == x)
169689Skan	{
169689Skan	  SET_USE (use_p, y);
169689Skan	  continue;
169689Skan	}
169689Skan
169689Skan      scev = instantiate_parameters (loop,
169689Skan				     analyze_scalar_evolution (loop, use));
169689Skan
169689Skan      if (scev == NULL || scev == chrec_dont_know)
169689Skan	continue;
169689Skan
169689Skan      step = evolution_part_in_loop_num (scev, loop->num);
169689Skan      if (step == NULL
169689Skan	  || step == chrec_dont_know
169689Skan	  || TREE_CODE (step) != INTEGER_CST
169689Skan	  || int_cst_value (step) != xstep)
169689Skan	continue;
169689Skan
169689Skan      /* Use REPLACEMENTS hash table to cache already created
169689Skan	 temporaries.  */
169689Skan      in.hash = htab_hash_pointer (use);
169689Skan      in.from = use;
169689Skan      h = htab_find_with_hash (replacements, &in, in.hash);
169689Skan      if (h != NULL)
169689Skan	{
169689Skan	  SET_USE (use_p, h->to);
169689Skan	  continue;
169689Skan	}
169689Skan
169689Skan      /* USE which has the same step as X should be replaced
169689Skan	 with a temporary set to Y + YINIT - INIT.  */
169689Skan      init = initial_condition_in_loop_num (scev, loop->num);
169689Skan      gcc_assert (init != NULL && init != chrec_dont_know);
169689Skan      if (TREE_TYPE (use) == TREE_TYPE (y))
169689Skan	{
169689Skan	  val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit);
169689Skan	  val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val);
169689Skan	  if (val == y)
169689Skan 	    {
169689Skan	      /* If X has the same type as USE, the same step
169689Skan		 and same initial value, it can be replaced by Y.  */
169689Skan	      SET_USE (use_p, y);
169689Skan	      continue;
169689Skan	    }
169689Skan	}
169689Skan      else
169689Skan	{
169689Skan	  val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit);
169689Skan	  val = fold_convert (TREE_TYPE (use), val);
169689Skan	  val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init);
169689Skan	}
169689Skan
169689Skan      /* Create a temporary variable and insert it at the beginning
169689Skan	 of the loop body basic block, right after the PHI node
169689Skan	 which sets Y.  */
169689Skan      var = create_tmp_var (TREE_TYPE (use), "perfecttmp");
169689Skan      add_referenced_var (var);
169689Skan      val = force_gimple_operand_bsi (firstbsi, val, false, NULL);
169689Skan      setstmt = build2 (MODIFY_EXPR, void_type_node, var, val);
169689Skan      var = make_ssa_name (var, setstmt);
169689Skan      TREE_OPERAND (setstmt, 0) = var;
169689Skan      bsi_insert_before (firstbsi, setstmt, BSI_SAME_STMT);
169689Skan      update_stmt (setstmt);
169689Skan      SET_USE (use_p, var);
169689Skan      h = ggc_alloc (sizeof (struct tree_map));
169689Skan      h->hash = in.hash;
169689Skan      h->from = use;
169689Skan      h->to = var;
169689Skan      loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT);
169689Skan      gcc_assert ((*(struct tree_map **)loc) == NULL);
169689Skan      *(struct tree_map **) loc = h;
169689Skan    }
169689Skan}
169689Skan
169689Skan/* Return true if STMT is an exit PHI for LOOP */
169689Skan
169689Skanstatic bool
169689Skanexit_phi_for_loop_p (struct loop *loop, tree stmt)
169689Skan{
169689Skan
169689Skan  if (TREE_CODE (stmt) != PHI_NODE
169689Skan      || PHI_NUM_ARGS (stmt) != 1
169689Skan      || bb_for_stmt (stmt) != loop->single_exit->dest)
169689Skan    return false;
169689Skan
169689Skan  return true;
169689Skan}
169689Skan
169689Skan/* Return true if STMT can be put back into the loop INNER, by
169689Skan   copying it to the beginning of that loop and changing the uses.  */
169689Skan
169689Skanstatic bool
169689Skancan_put_in_inner_loop (struct loop *inner, tree stmt)
169689Skan{
169689Skan  imm_use_iterator imm_iter;
169689Skan  use_operand_p use_p;
169689Skan
169689Skan  gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
169689Skan  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)
169689Skan      || !expr_invariant_in_loop_p (inner, TREE_OPERAND (stmt, 1)))
169689Skan    return false;
169689Skan
169689Skan  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
169689Skan    {
169689Skan      if (!exit_phi_for_loop_p (inner, USE_STMT (use_p)))
169689Skan	{
169689Skan	  basic_block immbb = bb_for_stmt (USE_STMT (use_p));
169689Skan
169689Skan	  if (!flow_bb_inside_loop_p (inner, immbb))
169689Skan	    return false;
169689Skan	}
169689Skan    }
169689Skan  return true;
169689Skan}
169689Skan
169689Skan/* Return true if STMT can be put *after* the inner loop of LOOP.  */
169689Skanstatic bool
169689Skancan_put_after_inner_loop (struct loop *loop, tree stmt)
169689Skan{
169689Skan  imm_use_iterator imm_iter;
169689Skan  use_operand_p use_p;
169689Skan
169689Skan  if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
169689Skan    return false;
169689Skan
169689Skan  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0))
169689Skan    {
169689Skan      if (!exit_phi_for_loop_p (loop, USE_STMT (use_p)))
169689Skan	{
169689Skan	  basic_block immbb = bb_for_stmt (USE_STMT (use_p));
169689Skan
169689Skan	  if (!dominated_by_p (CDI_DOMINATORS,
169689Skan			       immbb,
169689Skan			       loop->inner->header)
169689Skan	      && !can_put_in_inner_loop (loop->inner, stmt))
169689Skan	    return false;
169689Skan	}
169689Skan    }
169689Skan  return true;
169689Skan}
169689Skan
169689Skan
169689Skan
169689Skan/* Return TRUE if LOOP is an imperfect nest that we can convert to a
169689Skan   perfect one.  At the moment, we only handle imperfect nests of
169689Skan   depth 2, where all of the statements occur after the inner loop.  */
169689Skan
169689Skanstatic bool
169689Skancan_convert_to_perfect_nest (struct loop *loop)
169689Skan{
169689Skan  basic_block *bbs;
169689Skan  tree exit_condition, phi;
169689Skan  size_t i;
169689Skan  block_stmt_iterator bsi;
169689Skan  basic_block exitdest;
169689Skan
169689Skan  /* Can't handle triply nested+ loops yet.  */
169689Skan  if (!loop->inner || loop->inner->inner)
169689Skan    return false;
169689Skan
169689Skan  bbs = get_loop_body (loop);
169689Skan  exit_condition = get_loop_exit_condition (loop);
169689Skan  for (i = 0; i < loop->num_nodes; i++)
169689Skan    {
169689Skan      if (bbs[i]->loop_father == loop)
169689Skan	{
169689Skan	  for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi))
169689Skan	    {
169689Skan	      tree stmt = bsi_stmt (bsi);
169689Skan
169689Skan	      if (stmt == exit_condition
169689Skan		  || not_interesting_stmt (stmt)
169689Skan		  || stmt_is_bumper_for_loop (loop, stmt))
169689Skan		continue;
169689Skan
169689Skan	      /* If this is a scalar operation that can be put back
169689Skan	         into the inner loop, or after the inner loop, through
169689Skan		 copying, then do so. This works on the theory that
169689Skan		 any amount of scalar code we have to reduplicate
169689Skan		 into or after the loops is less expensive that the
169689Skan		 win we get from rearranging the memory walk
169689Skan		 the loop is doing so that it has better
169689Skan		 cache behavior.  */
169689Skan	      if (TREE_CODE (stmt) == MODIFY_EXPR)
169689Skan		{
169689Skan		  use_operand_p use_a, use_b;
169689Skan		  imm_use_iterator imm_iter;
169689Skan		  ssa_op_iter op_iter, op_iter1;
169689Skan		  tree op0 = TREE_OPERAND (stmt, 0);
169689Skan		  tree scev = instantiate_parameters
169689Skan		    (loop, analyze_scalar_evolution (loop, op0));
169689Skan
169689Skan		  /* If the IV is simple, it can be duplicated.  */
169689Skan		  if (!automatically_generated_chrec_p (scev))
169689Skan		    {
169689Skan		      tree step = evolution_part_in_loop_num (scev, loop->num);
169689Skan		      if (step && step != chrec_dont_know
169689Skan			  && TREE_CODE (step) == INTEGER_CST)
169689Skan			continue;
169689Skan		    }
169689Skan
169689Skan		  /* The statement should not define a variable used
169689Skan		     in the inner loop.  */
169689Skan		  if (TREE_CODE (op0) == SSA_NAME)
169689Skan		    FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0)
169689Skan		      if (bb_for_stmt (USE_STMT (use_a))->loop_father
169689Skan			  == loop->inner)
169689Skan			goto fail;
169689Skan
169689Skan		  FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE)
169689Skan		    {
169689Skan		      tree node, op = USE_FROM_PTR (use_a);
169689Skan
169689Skan		      /* The variables should not be used in both loops.  */
169689Skan		      FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op)
169689Skan		      if (bb_for_stmt (USE_STMT (use_b))->loop_father
169689Skan			  == loop->inner)
169689Skan			goto fail;
169689Skan
169689Skan		      /* The statement should not use the value of a
169689Skan			 scalar that was modified in the loop.  */
169689Skan		      node = SSA_NAME_DEF_STMT (op);
169689Skan		      if (TREE_CODE (node) == PHI_NODE)
169689Skan			FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE)
169689Skan			  {
169689Skan			    tree arg = USE_FROM_PTR (use_b);
169689Skan
169689Skan			    if (TREE_CODE (arg) == SSA_NAME)
169689Skan			      {
169689Skan				tree arg_stmt = SSA_NAME_DEF_STMT (arg);
169689Skan
169689Skan				if (bb_for_stmt (arg_stmt)->loop_father
169689Skan				    == loop->inner)
169689Skan				  goto fail;
169689Skan			      }
169689Skan			  }
169689Skan		    }
169689Skan
169689Skan		  if (can_put_in_inner_loop (loop->inner, stmt)
169689Skan		      || can_put_after_inner_loop (loop, stmt))
169689Skan		    continue;
169689Skan		}
169689Skan
169689Skan	      /* Otherwise, if the bb of a statement we care about isn't
169689Skan		 dominated by the header of the inner loop, then we can't
169689Skan		 handle this case right now.  This test ensures that the
169689Skan		 statement comes completely *after* the inner loop.  */
169689Skan	      if (!dominated_by_p (CDI_DOMINATORS,
169689Skan				   bb_for_stmt (stmt),
169689Skan				   loop->inner->header))
169689Skan		goto fail;
169689Skan	    }
169689Skan	}
169689Skan    }
169689Skan
169689Skan  /* We also need to make sure the loop exit only has simple copy phis in it,
169689Skan     otherwise we don't know how to transform it into a perfect nest right
169689Skan     now.  */
169689Skan  exitdest = loop->single_exit->dest;
169689Skan
169689Skan  for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi))
169689Skan    if (PHI_NUM_ARGS (phi) != 1)
169689Skan      goto fail;
169689Skan
169689Skan  free (bbs);
169689Skan  return true;
169689Skan
169689Skan fail:
169689Skan  free (bbs);
169689Skan  return false;
169689Skan}
169689Skan
169689Skan/* Transform the loop nest into a perfect nest, if possible.
169689Skan   LOOPS is the current struct loops *
169689Skan   LOOP is the loop nest to transform into a perfect nest
169689Skan   LBOUNDS are the lower bounds for the loops to transform
169689Skan   UBOUNDS are the upper bounds for the loops to transform
169689Skan   STEPS is the STEPS for the loops to transform.
169689Skan   LOOPIVS is the induction variables for the loops to transform.
169689Skan
169689Skan   Basically, for the case of
169689Skan
169689Skan   FOR (i = 0; i < 50; i++)
169689Skan    {
169689Skan     FOR (j =0; j < 50; j++)
169689Skan     {
169689Skan        <whatever>
169689Skan     }
169689Skan     <some code>
169689Skan    }
169689Skan
169689Skan   This function will transform it into a perfect loop nest by splitting the
169689Skan   outer loop into two loops, like so:
169689Skan
169689Skan   FOR (i = 0; i < 50; i++)
169689Skan   {
169689Skan     FOR (j = 0; j < 50; j++)
169689Skan     {
169689Skan         <whatever>
169689Skan     }
169689Skan   }
169689Skan
169689Skan   FOR (i = 0; i < 50; i ++)
169689Skan   {
169689Skan    <some code>
169689Skan   }
169689Skan
169689Skan   Return FALSE if we can't make this loop into a perfect nest.  */
169689Skan
169689Skanstatic bool
169689Skanperfect_nestify (struct loops *loops,
169689Skan		 struct loop *loop,
169689Skan		 VEC(tree,heap) *lbounds,
169689Skan		 VEC(tree,heap) *ubounds,
169689Skan		 VEC(int,heap) *steps,
169689Skan		 VEC(tree,heap) *loopivs)
169689Skan{
169689Skan  basic_block *bbs;
169689Skan  tree exit_condition;
169689Skan  tree then_label, else_label, cond_stmt;
169689Skan  basic_block preheaderbb, headerbb, bodybb, latchbb, olddest;
169689Skan  int i;
169689Skan  block_stmt_iterator bsi, firstbsi;
169689Skan  bool insert_after;
169689Skan  edge e;
169689Skan  struct loop *newloop;
169689Skan  tree phi;
169689Skan  tree uboundvar;
169689Skan  tree stmt;
169689Skan  tree oldivvar, ivvar, ivvarinced;
169689Skan  VEC(tree,heap) *phis = NULL;
169689Skan  htab_t replacements = NULL;
169689Skan
169689Skan  /* Create the new loop.  */
169689Skan  olddest = loop->single_exit->dest;
169689Skan  preheaderbb = loop_split_edge_with (loop->single_exit, NULL);
169689Skan  headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
169689Skan
169689Skan  /* Push the exit phi nodes that we are moving.  */
169689Skan  for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi))
169689Skan    {
169689Skan      VEC_reserve (tree, heap, phis, 2);
169689Skan      VEC_quick_push (tree, phis, PHI_RESULT (phi));
169689Skan      VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0));
169689Skan    }
169689Skan  e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb);
169689Skan
169689Skan  /* Remove the exit phis from the old basic block.  Make sure to set
169689Skan     PHI_RESULT to null so it doesn't get released.  */
169689Skan  while (phi_nodes (olddest) != NULL)
169689Skan    {
169689Skan      SET_PHI_RESULT (phi_nodes (olddest), NULL);
169689Skan      remove_phi_node (phi_nodes (olddest), NULL);
169689Skan    }
169689Skan
169689Skan  /* and add them back to the new basic block.  */
169689Skan  while (VEC_length (tree, phis) != 0)
169689Skan    {
169689Skan      tree def;
169689Skan      tree phiname;
169689Skan      def = VEC_pop (tree, phis);
169689Skan      phiname = VEC_pop (tree, phis);
169689Skan      phi = create_phi_node (phiname, preheaderbb);
169689Skan      add_phi_arg (phi, def, single_pred_edge (preheaderbb));
169689Skan    }
169689Skan  flush_pending_stmts (e);
169689Skan  VEC_free (tree, heap, phis);
169689Skan
169689Skan  bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
169689Skan  latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb);
169689Skan  make_edge (headerbb, bodybb, EDGE_FALLTHRU);
169689Skan  then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb));
169689Skan  else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest));
169689Skan  cond_stmt = build3 (COND_EXPR, void_type_node,
169689Skan		      build2 (NE_EXPR, boolean_type_node,
169689Skan			      integer_one_node,
169689Skan			      integer_zero_node),
169689Skan		      then_label, else_label);
169689Skan  bsi = bsi_start (bodybb);
169689Skan  bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
169689Skan  e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE);
169689Skan  make_edge (bodybb, latchbb, EDGE_TRUE_VALUE);
169689Skan  make_edge (latchbb, headerbb, EDGE_FALLTHRU);
169689Skan
169689Skan  /* Update the loop structures.  */
169689Skan  newloop = duplicate_loop (loops, loop, olddest->loop_father);
169689Skan  newloop->header = headerbb;
169689Skan  newloop->latch = latchbb;
169689Skan  newloop->single_exit = e;
169689Skan  add_bb_to_loop (latchbb, newloop);
169689Skan  add_bb_to_loop (bodybb, newloop);
169689Skan  add_bb_to_loop (headerbb, newloop);
169689Skan  set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb);
169689Skan  set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb);
169689Skan  set_immediate_dominator (CDI_DOMINATORS, preheaderbb,
169689Skan			   loop->single_exit->src);
169689Skan  set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb);
169689Skan  set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb);
169689Skan  /* Create the new iv.  */
169689Skan  oldivvar = VEC_index (tree, loopivs, 0);
169689Skan  ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv");
169689Skan  add_referenced_var (ivvar);
169689Skan  standard_iv_increment_position (newloop, &bsi, &insert_after);
169689Skan  create_iv (VEC_index (tree, lbounds, 0),
169689Skan	     build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)),
169689Skan	     ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced);
169689Skan
169689Skan  /* Create the new upper bound.  This may be not just a variable, so we copy
169689Skan     it to one just in case.  */
169689Skan
169689Skan  exit_condition = get_loop_exit_condition (newloop);
169689Skan  uboundvar = create_tmp_var (integer_type_node, "uboundvar");
169689Skan  add_referenced_var (uboundvar);
169689Skan  stmt = build2 (MODIFY_EXPR, void_type_node, uboundvar,
169689Skan		 VEC_index (tree, ubounds, 0));
169689Skan  uboundvar = make_ssa_name (uboundvar, stmt);
169689Skan  TREE_OPERAND (stmt, 0) = uboundvar;
169689Skan
169689Skan  if (insert_after)
169689Skan    bsi_insert_after (&bsi, stmt, BSI_SAME_STMT);
169689Skan  else
169689Skan    bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
169689Skan  update_stmt (stmt);
169689Skan  COND_EXPR_COND (exit_condition) = build2 (GE_EXPR,
169689Skan					    boolean_type_node,
169689Skan					    uboundvar,
169689Skan					    ivvarinced);
169689Skan  update_stmt (exit_condition);
169689Skan  replacements = htab_create_ggc (20, tree_map_hash,
169689Skan				  tree_map_eq, NULL);
169689Skan  bbs = get_loop_body_in_dom_order (loop);
169689Skan  /* Now move the statements, and replace the induction variable in the moved
169689Skan     statements with the correct loop induction variable.  */
169689Skan  oldivvar = VEC_index (tree, loopivs, 0);
169689Skan  firstbsi = bsi_start (bodybb);
169689Skan  for (i = loop->num_nodes - 1; i >= 0 ; i--)
169689Skan    {
169689Skan      block_stmt_iterator tobsi = bsi_last (bodybb);
169689Skan      if (bbs[i]->loop_father == loop)
169689Skan	{
169689Skan	  /* If this is true, we are *before* the inner loop.
169689Skan	     If this isn't true, we are *after* it.
169689Skan
169689Skan	     The only time can_convert_to_perfect_nest returns true when we
169689Skan	     have statements before the inner loop is if they can be moved
169689Skan	     into the inner loop.
169689Skan
169689Skan	     The only time can_convert_to_perfect_nest returns true when we
169689Skan	     have statements after the inner loop is if they can be moved into
169689Skan	     the new split loop.  */
169689Skan
169689Skan	  if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i]))
169689Skan	    {
169689Skan	      block_stmt_iterator header_bsi
169689Skan		= bsi_after_labels (loop->inner->header);
169689Skan
169689Skan	      for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
169689Skan		{
169689Skan		  tree stmt = bsi_stmt (bsi);
169689Skan
169689Skan		  if (stmt == exit_condition
169689Skan		      || not_interesting_stmt (stmt)
169689Skan		      || stmt_is_bumper_for_loop (loop, stmt))
169689Skan		    {
169689Skan		      bsi_next (&bsi);
169689Skan		      continue;
169689Skan		    }
169689Skan
169689Skan		  bsi_move_before (&bsi, &header_bsi);
169689Skan		}
169689Skan	    }
169689Skan	  else
169689Skan	    {
169689Skan	      /* Note that the bsi only needs to be explicitly incremented
169689Skan		 when we don't move something, since it is automatically
169689Skan		 incremented when we do.  */
169689Skan	      for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);)
169689Skan		{
169689Skan		  ssa_op_iter i;
169689Skan		  tree n, stmt = bsi_stmt (bsi);
169689Skan
169689Skan		  if (stmt == exit_condition
169689Skan		      || not_interesting_stmt (stmt)
169689Skan		      || stmt_is_bumper_for_loop (loop, stmt))
169689Skan		    {
169689Skan		      bsi_next (&bsi);
169689Skan		      continue;
169689Skan		    }
169689Skan
169689Skan		  replace_uses_equiv_to_x_with_y
169689Skan		    (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar,
169689Skan		     VEC_index (tree, lbounds, 0), replacements, &firstbsi);
169689Skan
169689Skan		  bsi_move_before (&bsi, &tobsi);
169689Skan
169689Skan		  /* If the statement has any virtual operands, they may
169689Skan		     need to be rewired because the original loop may
169689Skan		     still reference them.  */
169689Skan		  FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS)
169689Skan		    mark_sym_for_renaming (SSA_NAME_VAR (n));
169689Skan		}
169689Skan	    }
169689Skan
169689Skan	}
169689Skan    }
169689Skan
169689Skan  free (bbs);
169689Skan  htab_delete (replacements);
169689Skan  return perfect_nest_p (loop);
169689Skan}
169689Skan
169689Skan/* Return true if TRANS is a legal transformation matrix that respects
169689Skan   the dependence vectors in DISTS and DIRS.  The conservative answer
169689Skan   is false.
169689Skan
169689Skan   "Wolfe proves that a unimodular transformation represented by the
169689Skan   matrix T is legal when applied to a loop nest with a set of
169689Skan   lexicographically non-negative distance vectors RDG if and only if
169689Skan   for each vector d in RDG, (T.d >= 0) is lexicographically positive.
169689Skan   i.e.: if and only if it transforms the lexicographically positive
169689Skan   distance vectors to lexicographically positive vectors.  Note that
169689Skan   a unimodular matrix must transform the zero vector (and only it) to
169689Skan   the zero vector." S.Muchnick.  */
169689Skan
169689Skanbool
169689Skanlambda_transform_legal_p (lambda_trans_matrix trans,
169689Skan			  int nb_loops,
169689Skan			  VEC (ddr_p, heap) *dependence_relations)
169689Skan{
169689Skan  unsigned int i, j;
169689Skan  lambda_vector distres;
169689Skan  struct data_dependence_relation *ddr;
169689Skan
169689Skan  gcc_assert (LTM_COLSIZE (trans) == nb_loops
169689Skan	      && LTM_ROWSIZE (trans) == nb_loops);
169689Skan
169689Skan  /* When there is an unknown relation in the dependence_relations, we
169689Skan     know that it is no worth looking at this loop nest: give up.  */
169689Skan  ddr = VEC_index (ddr_p, dependence_relations, 0);
169689Skan  if (ddr == NULL)
169689Skan    return true;
169689Skan  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
169689Skan    return false;
169689Skan
169689Skan  distres = lambda_vector_new (nb_loops);
169689Skan
169689Skan  /* For each distance vector in the dependence graph.  */
169689Skan  for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
169689Skan    {
169689Skan      /* Don't care about relations for which we know that there is no
169689Skan	 dependence, nor about read-read (aka. output-dependences):
169689Skan	 these data accesses can happen in any order.  */
169689Skan      if (DDR_ARE_DEPENDENT (ddr) == chrec_known
169689Skan	  || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
169689Skan	continue;
169689Skan
169689Skan      /* Conservatively answer: "this transformation is not valid".  */
169689Skan      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
169689Skan	return false;
169689Skan
169689Skan      /* If the dependence could not be captured by a distance vector,
169689Skan	 conservatively answer that the transform is not valid.  */
169689Skan      if (DDR_NUM_DIST_VECTS (ddr) == 0)
169689Skan	return false;
169689Skan
169689Skan      /* Compute trans.dist_vect */
169689Skan      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
169689Skan	{
169689Skan	  lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
169689Skan				     DDR_DIST_VECT (ddr, j), distres);
169689Skan
169689Skan	  if (!lambda_vector_lexico_pos (distres, nb_loops))
169689Skan	    return false;
169689Skan	}
169689Skan    }
169689Skan  return true;
169689Skan}