1169689Skan/* Loop transformation code generation 2169689Skan Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc. 3169689Skan Contributed by Daniel Berlin <dberlin@dberlin.org> 4169689Skan 5169689Skan This file is part of GCC. 6169689Skan 7169689Skan GCC is free software; you can redistribute it and/or modify it under 8169689Skan the terms of the GNU General Public License as published by the Free 9169689Skan Software Foundation; either version 2, or (at your option) any later 10169689Skan version. 11169689Skan 12169689Skan GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13169689Skan WARRANTY; without even the implied warranty of MERCHANTABILITY or 14169689Skan FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15169689Skan for more details. 16169689Skan 17169689Skan You should have received a copy of the GNU General Public License 18169689Skan along with GCC; see the file COPYING. If not, write to the Free 19169689Skan Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 20169689Skan 02110-1301, USA. */ 21169689Skan 22169689Skan#include "config.h" 23169689Skan#include "system.h" 24169689Skan#include "coretypes.h" 25169689Skan#include "tm.h" 26169689Skan#include "ggc.h" 27169689Skan#include "tree.h" 28169689Skan#include "target.h" 29169689Skan#include "rtl.h" 30169689Skan#include "basic-block.h" 31169689Skan#include "diagnostic.h" 32169689Skan#include "tree-flow.h" 33169689Skan#include "tree-dump.h" 34169689Skan#include "timevar.h" 35169689Skan#include "cfgloop.h" 36169689Skan#include "expr.h" 37169689Skan#include "optabs.h" 38169689Skan#include "tree-chrec.h" 39169689Skan#include "tree-data-ref.h" 40169689Skan#include "tree-pass.h" 41169689Skan#include "tree-scalar-evolution.h" 42169689Skan#include "vec.h" 43169689Skan#include "lambda.h" 44169689Skan#include "vecprim.h" 45169689Skan 46169689Skan/* This loop nest code generation is based on non-singular matrix 47169689Skan math. 48169689Skan 49169689Skan A little terminology and a general sketch of the algorithm. See "A singular 50169689Skan loop transformation framework based on non-singular matrices" by Wei Li and 51169689Skan Keshav Pingali for formal proofs that the various statements below are 52169689Skan correct. 53169689Skan 54169689Skan A loop iteration space represents the points traversed by the loop. A point in the 55169689Skan iteration space can be represented by a vector of size <loop depth>. You can 56169689Skan therefore represent the iteration space as an integral combinations of a set 57169689Skan of basis vectors. 58169689Skan 59169689Skan A loop iteration space is dense if every integer point between the loop 60169689Skan bounds is a point in the iteration space. Every loop with a step of 1 61169689Skan therefore has a dense iteration space. 62169689Skan 63169689Skan for i = 1 to 3, step 1 is a dense iteration space. 64169689Skan 65169689Skan A loop iteration space is sparse if it is not dense. That is, the iteration 66169689Skan space skips integer points that are within the loop bounds. 67169689Skan 68169689Skan for i = 1 to 3, step 2 is a sparse iteration space, because the integer point 69169689Skan 2 is skipped. 70169689Skan 71169689Skan Dense source spaces are easy to transform, because they don't skip any 72169689Skan points to begin with. Thus we can compute the exact bounds of the target 73169689Skan space using min/max and floor/ceil. 74169689Skan 75169689Skan For a dense source space, we take the transformation matrix, decompose it 76169689Skan into a lower triangular part (H) and a unimodular part (U). 77169689Skan We then compute the auxiliary space from the unimodular part (source loop 78169689Skan nest . U = auxiliary space) , which has two important properties: 79169689Skan 1. It traverses the iterations in the same lexicographic order as the source 80169689Skan space. 81169689Skan 2. It is a dense space when the source is a dense space (even if the target 82169689Skan space is going to be sparse). 83169689Skan 84169689Skan Given the auxiliary space, we use the lower triangular part to compute the 85169689Skan bounds in the target space by simple matrix multiplication. 86169689Skan The gaps in the target space (IE the new loop step sizes) will be the 87169689Skan diagonals of the H matrix. 88169689Skan 89169689Skan Sparse source spaces require another step, because you can't directly compute 90169689Skan the exact bounds of the auxiliary and target space from the sparse space. 91169689Skan Rather than try to come up with a separate algorithm to handle sparse source 92169689Skan spaces directly, we just find a legal transformation matrix that gives you 93169689Skan the sparse source space, from a dense space, and then transform the dense 94169689Skan space. 95169689Skan 96169689Skan For a regular sparse space, you can represent the source space as an integer 97169689Skan lattice, and the base space of that lattice will always be dense. Thus, we 98169689Skan effectively use the lattice to figure out the transformation from the lattice 99169689Skan base space, to the sparse iteration space (IE what transform was applied to 100169689Skan the dense space to make it sparse). We then compose this transform with the 101169689Skan transformation matrix specified by the user (since our matrix transformations 102169689Skan are closed under composition, this is okay). We can then use the base space 103169689Skan (which is dense) plus the composed transformation matrix, to compute the rest 104169689Skan of the transform using the dense space algorithm above. 105169689Skan 106169689Skan In other words, our sparse source space (B) is decomposed into a dense base 107169689Skan space (A), and a matrix (L) that transforms A into B, such that A.L = B. 108169689Skan We then compute the composition of L and the user transformation matrix (T), 109169689Skan so that T is now a transform from A to the result, instead of from B to the 110169689Skan result. 111169689Skan IE A.(LT) = result instead of B.T = result 112169689Skan Since A is now a dense source space, we can use the dense source space 113169689Skan algorithm above to compute the result of applying transform (LT) to A. 114169689Skan 115169689Skan Fourier-Motzkin elimination is used to compute the bounds of the base space 116169689Skan of the lattice. */ 117169689Skan 118169689Skanstatic bool perfect_nestify (struct loops *, 119169689Skan struct loop *, VEC(tree,heap) *, 120169689Skan VEC(tree,heap) *, VEC(int,heap) *, 121169689Skan VEC(tree,heap) *); 122169689Skan/* Lattice stuff that is internal to the code generation algorithm. */ 123169689Skan 124169689Skantypedef struct 125169689Skan{ 126169689Skan /* Lattice base matrix. */ 127169689Skan lambda_matrix base; 128169689Skan /* Lattice dimension. */ 129169689Skan int dimension; 130169689Skan /* Origin vector for the coefficients. */ 131169689Skan lambda_vector origin; 132169689Skan /* Origin matrix for the invariants. */ 133169689Skan lambda_matrix origin_invariants; 134169689Skan /* Number of invariants. */ 135169689Skan int invariants; 136169689Skan} *lambda_lattice; 137169689Skan 138169689Skan#define LATTICE_BASE(T) ((T)->base) 139169689Skan#define LATTICE_DIMENSION(T) ((T)->dimension) 140169689Skan#define LATTICE_ORIGIN(T) ((T)->origin) 141169689Skan#define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants) 142169689Skan#define LATTICE_INVARIANTS(T) ((T)->invariants) 143169689Skan 144169689Skanstatic bool lle_equal (lambda_linear_expression, lambda_linear_expression, 145169689Skan int, int); 146169689Skanstatic lambda_lattice lambda_lattice_new (int, int); 147169689Skanstatic lambda_lattice lambda_lattice_compute_base (lambda_loopnest); 148169689Skan 149169689Skanstatic tree find_induction_var_from_exit_cond (struct loop *); 150169689Skanstatic bool can_convert_to_perfect_nest (struct loop *); 151169689Skan 152169689Skan/* Create a new lambda body vector. */ 153169689Skan 154169689Skanlambda_body_vector 155169689Skanlambda_body_vector_new (int size) 156169689Skan{ 157169689Skan lambda_body_vector ret; 158169689Skan 159169689Skan ret = ggc_alloc (sizeof (*ret)); 160169689Skan LBV_COEFFICIENTS (ret) = lambda_vector_new (size); 161169689Skan LBV_SIZE (ret) = size; 162169689Skan LBV_DENOMINATOR (ret) = 1; 163169689Skan return ret; 164169689Skan} 165169689Skan 166169689Skan/* Compute the new coefficients for the vector based on the 167169689Skan *inverse* of the transformation matrix. */ 168169689Skan 169169689Skanlambda_body_vector 170169689Skanlambda_body_vector_compute_new (lambda_trans_matrix transform, 171169689Skan lambda_body_vector vect) 172169689Skan{ 173169689Skan lambda_body_vector temp; 174169689Skan int depth; 175169689Skan 176169689Skan /* Make sure the matrix is square. */ 177169689Skan gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform)); 178169689Skan 179169689Skan depth = LTM_ROWSIZE (transform); 180169689Skan 181169689Skan temp = lambda_body_vector_new (depth); 182169689Skan LBV_DENOMINATOR (temp) = 183169689Skan LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform); 184169689Skan lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth, 185169689Skan LTM_MATRIX (transform), depth, 186169689Skan LBV_COEFFICIENTS (temp)); 187169689Skan LBV_SIZE (temp) = LBV_SIZE (vect); 188169689Skan return temp; 189169689Skan} 190169689Skan 191169689Skan/* Print out a lambda body vector. */ 192169689Skan 193169689Skanvoid 194169689Skanprint_lambda_body_vector (FILE * outfile, lambda_body_vector body) 195169689Skan{ 196169689Skan print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body)); 197169689Skan} 198169689Skan 199169689Skan/* Return TRUE if two linear expressions are equal. */ 200169689Skan 201169689Skanstatic bool 202169689Skanlle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2, 203169689Skan int depth, int invariants) 204169689Skan{ 205169689Skan int i; 206169689Skan 207169689Skan if (lle1 == NULL || lle2 == NULL) 208169689Skan return false; 209169689Skan if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2)) 210169689Skan return false; 211169689Skan if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2)) 212169689Skan return false; 213169689Skan for (i = 0; i < depth; i++) 214169689Skan if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i]) 215169689Skan return false; 216169689Skan for (i = 0; i < invariants; i++) 217169689Skan if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] != 218169689Skan LLE_INVARIANT_COEFFICIENTS (lle2)[i]) 219169689Skan return false; 220169689Skan return true; 221169689Skan} 222169689Skan 223169689Skan/* Create a new linear expression with dimension DIM, and total number 224169689Skan of invariants INVARIANTS. */ 225169689Skan 226169689Skanlambda_linear_expression 227169689Skanlambda_linear_expression_new (int dim, int invariants) 228169689Skan{ 229169689Skan lambda_linear_expression ret; 230169689Skan 231169689Skan ret = ggc_alloc_cleared (sizeof (*ret)); 232169689Skan 233169689Skan LLE_COEFFICIENTS (ret) = lambda_vector_new (dim); 234169689Skan LLE_CONSTANT (ret) = 0; 235169689Skan LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants); 236169689Skan LLE_DENOMINATOR (ret) = 1; 237169689Skan LLE_NEXT (ret) = NULL; 238169689Skan 239169689Skan return ret; 240169689Skan} 241169689Skan 242169689Skan/* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE. 243169689Skan The starting letter used for variable names is START. */ 244169689Skan 245169689Skanstatic void 246169689Skanprint_linear_expression (FILE * outfile, lambda_vector expr, int size, 247169689Skan char start) 248169689Skan{ 249169689Skan int i; 250169689Skan bool first = true; 251169689Skan for (i = 0; i < size; i++) 252169689Skan { 253169689Skan if (expr[i] != 0) 254169689Skan { 255169689Skan if (first) 256169689Skan { 257169689Skan if (expr[i] < 0) 258169689Skan fprintf (outfile, "-"); 259169689Skan first = false; 260169689Skan } 261169689Skan else if (expr[i] > 0) 262169689Skan fprintf (outfile, " + "); 263169689Skan else 264169689Skan fprintf (outfile, " - "); 265169689Skan if (abs (expr[i]) == 1) 266169689Skan fprintf (outfile, "%c", start + i); 267169689Skan else 268169689Skan fprintf (outfile, "%d%c", abs (expr[i]), start + i); 269169689Skan } 270169689Skan } 271169689Skan} 272169689Skan 273169689Skan/* Print out a lambda linear expression structure, EXPR, to OUTFILE. The 274169689Skan depth/number of coefficients is given by DEPTH, the number of invariants is 275169689Skan given by INVARIANTS, and the character to start variable names with is given 276169689Skan by START. */ 277169689Skan 278169689Skanvoid 279169689Skanprint_lambda_linear_expression (FILE * outfile, 280169689Skan lambda_linear_expression expr, 281169689Skan int depth, int invariants, char start) 282169689Skan{ 283169689Skan fprintf (outfile, "\tLinear expression: "); 284169689Skan print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start); 285169689Skan fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr)); 286169689Skan fprintf (outfile, " invariants: "); 287169689Skan print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr), 288169689Skan invariants, 'A'); 289169689Skan fprintf (outfile, " denominator: %d\n", LLE_DENOMINATOR (expr)); 290169689Skan} 291169689Skan 292169689Skan/* Print a lambda loop structure LOOP to OUTFILE. The depth/number of 293169689Skan coefficients is given by DEPTH, the number of invariants is 294169689Skan given by INVARIANTS, and the character to start variable names with is given 295169689Skan by START. */ 296169689Skan 297169689Skanvoid 298169689Skanprint_lambda_loop (FILE * outfile, lambda_loop loop, int depth, 299169689Skan int invariants, char start) 300169689Skan{ 301169689Skan int step; 302169689Skan lambda_linear_expression expr; 303169689Skan 304169689Skan gcc_assert (loop); 305169689Skan 306169689Skan expr = LL_LINEAR_OFFSET (loop); 307169689Skan step = LL_STEP (loop); 308169689Skan fprintf (outfile, " step size = %d \n", step); 309169689Skan 310169689Skan if (expr) 311169689Skan { 312169689Skan fprintf (outfile, " linear offset: \n"); 313169689Skan print_lambda_linear_expression (outfile, expr, depth, invariants, 314169689Skan start); 315169689Skan } 316169689Skan 317169689Skan fprintf (outfile, " lower bound: \n"); 318169689Skan for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr)) 319169689Skan print_lambda_linear_expression (outfile, expr, depth, invariants, start); 320169689Skan fprintf (outfile, " upper bound: \n"); 321169689Skan for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr)) 322169689Skan print_lambda_linear_expression (outfile, expr, depth, invariants, start); 323169689Skan} 324169689Skan 325169689Skan/* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the 326169689Skan number of invariants. */ 327169689Skan 328169689Skanlambda_loopnest 329169689Skanlambda_loopnest_new (int depth, int invariants) 330169689Skan{ 331169689Skan lambda_loopnest ret; 332169689Skan ret = ggc_alloc (sizeof (*ret)); 333169689Skan 334169689Skan LN_LOOPS (ret) = ggc_alloc_cleared (depth * sizeof (lambda_loop)); 335169689Skan LN_DEPTH (ret) = depth; 336169689Skan LN_INVARIANTS (ret) = invariants; 337169689Skan 338169689Skan return ret; 339169689Skan} 340169689Skan 341169689Skan/* Print a lambda loopnest structure, NEST, to OUTFILE. The starting 342169689Skan character to use for loop names is given by START. */ 343169689Skan 344169689Skanvoid 345169689Skanprint_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start) 346169689Skan{ 347169689Skan int i; 348169689Skan for (i = 0; i < LN_DEPTH (nest); i++) 349169689Skan { 350169689Skan fprintf (outfile, "Loop %c\n", start + i); 351169689Skan print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest), 352169689Skan LN_INVARIANTS (nest), 'i'); 353169689Skan fprintf (outfile, "\n"); 354169689Skan } 355169689Skan} 356169689Skan 357169689Skan/* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number 358169689Skan of invariants. */ 359169689Skan 360169689Skanstatic lambda_lattice 361169689Skanlambda_lattice_new (int depth, int invariants) 362169689Skan{ 363169689Skan lambda_lattice ret; 364169689Skan ret = ggc_alloc (sizeof (*ret)); 365169689Skan LATTICE_BASE (ret) = lambda_matrix_new (depth, depth); 366169689Skan LATTICE_ORIGIN (ret) = lambda_vector_new (depth); 367169689Skan LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants); 368169689Skan LATTICE_DIMENSION (ret) = depth; 369169689Skan LATTICE_INVARIANTS (ret) = invariants; 370169689Skan return ret; 371169689Skan} 372169689Skan 373169689Skan/* Compute the lattice base for NEST. The lattice base is essentially a 374169689Skan non-singular transform from a dense base space to a sparse iteration space. 375169689Skan We use it so that we don't have to specially handle the case of a sparse 376169689Skan iteration space in other parts of the algorithm. As a result, this routine 377169689Skan only does something interesting (IE produce a matrix that isn't the 378169689Skan identity matrix) if NEST is a sparse space. */ 379169689Skan 380169689Skanstatic lambda_lattice 381169689Skanlambda_lattice_compute_base (lambda_loopnest nest) 382169689Skan{ 383169689Skan lambda_lattice ret; 384169689Skan int depth, invariants; 385169689Skan lambda_matrix base; 386169689Skan 387169689Skan int i, j, step; 388169689Skan lambda_loop loop; 389169689Skan lambda_linear_expression expression; 390169689Skan 391169689Skan depth = LN_DEPTH (nest); 392169689Skan invariants = LN_INVARIANTS (nest); 393169689Skan 394169689Skan ret = lambda_lattice_new (depth, invariants); 395169689Skan base = LATTICE_BASE (ret); 396169689Skan for (i = 0; i < depth; i++) 397169689Skan { 398169689Skan loop = LN_LOOPS (nest)[i]; 399169689Skan gcc_assert (loop); 400169689Skan step = LL_STEP (loop); 401169689Skan /* If we have a step of 1, then the base is one, and the 402169689Skan origin and invariant coefficients are 0. */ 403169689Skan if (step == 1) 404169689Skan { 405169689Skan for (j = 0; j < depth; j++) 406169689Skan base[i][j] = 0; 407169689Skan base[i][i] = 1; 408169689Skan LATTICE_ORIGIN (ret)[i] = 0; 409169689Skan for (j = 0; j < invariants; j++) 410169689Skan LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0; 411169689Skan } 412169689Skan else 413169689Skan { 414169689Skan /* Otherwise, we need the lower bound expression (which must 415169689Skan be an affine function) to determine the base. */ 416169689Skan expression = LL_LOWER_BOUND (loop); 417169689Skan gcc_assert (expression && !LLE_NEXT (expression) 418169689Skan && LLE_DENOMINATOR (expression) == 1); 419169689Skan 420169689Skan /* The lower triangular portion of the base is going to be the 421169689Skan coefficient times the step */ 422169689Skan for (j = 0; j < i; j++) 423169689Skan base[i][j] = LLE_COEFFICIENTS (expression)[j] 424169689Skan * LL_STEP (LN_LOOPS (nest)[j]); 425169689Skan base[i][i] = step; 426169689Skan for (j = i + 1; j < depth; j++) 427169689Skan base[i][j] = 0; 428169689Skan 429169689Skan /* Origin for this loop is the constant of the lower bound 430169689Skan expression. */ 431169689Skan LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression); 432169689Skan 433169689Skan /* Coefficient for the invariants are equal to the invariant 434169689Skan coefficients in the expression. */ 435169689Skan for (j = 0; j < invariants; j++) 436169689Skan LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 437169689Skan LLE_INVARIANT_COEFFICIENTS (expression)[j]; 438169689Skan } 439169689Skan } 440169689Skan return ret; 441169689Skan} 442169689Skan 443169689Skan/* Compute the least common multiple of two numbers A and B . */ 444169689Skan 445169689Skanstatic int 446169689Skanlcm (int a, int b) 447169689Skan{ 448169689Skan return (abs (a) * abs (b) / gcd (a, b)); 449169689Skan} 450169689Skan 451169689Skan/* Perform Fourier-Motzkin elimination to calculate the bounds of the 452169689Skan auxiliary nest. 453169689Skan Fourier-Motzkin is a way of reducing systems of linear inequalities so that 454169689Skan it is easy to calculate the answer and bounds. 455169689Skan A sketch of how it works: 456169689Skan Given a system of linear inequalities, ai * xj >= bk, you can always 457169689Skan rewrite the constraints so they are all of the form 458169689Skan a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b 459169689Skan in b1 ... bk, and some a in a1...ai) 460169689Skan You can then eliminate this x from the non-constant inequalities by 461169689Skan rewriting these as a <= b, x >= constant, and delete the x variable. 462169689Skan You can then repeat this for any remaining x variables, and then we have 463169689Skan an easy to use variable <= constant (or no variables at all) form that we 464169689Skan can construct our bounds from. 465169689Skan 466169689Skan In our case, each time we eliminate, we construct part of the bound from 467169689Skan the ith variable, then delete the ith variable. 468169689Skan 469169689Skan Remember the constant are in our vector a, our coefficient matrix is A, 470169689Skan and our invariant coefficient matrix is B. 471169689Skan 472169689Skan SIZE is the size of the matrices being passed. 473169689Skan DEPTH is the loop nest depth. 474169689Skan INVARIANTS is the number of loop invariants. 475169689Skan A, B, and a are the coefficient matrix, invariant coefficient, and a 476169689Skan vector of constants, respectively. */ 477169689Skan 478169689Skanstatic lambda_loopnest 479169689Skancompute_nest_using_fourier_motzkin (int size, 480169689Skan int depth, 481169689Skan int invariants, 482169689Skan lambda_matrix A, 483169689Skan lambda_matrix B, 484169689Skan lambda_vector a) 485169689Skan{ 486169689Skan 487169689Skan int multiple, f1, f2; 488169689Skan int i, j, k; 489169689Skan lambda_linear_expression expression; 490169689Skan lambda_loop loop; 491169689Skan lambda_loopnest auxillary_nest; 492169689Skan lambda_matrix swapmatrix, A1, B1; 493169689Skan lambda_vector swapvector, a1; 494169689Skan int newsize; 495169689Skan 496169689Skan A1 = lambda_matrix_new (128, depth); 497169689Skan B1 = lambda_matrix_new (128, invariants); 498169689Skan a1 = lambda_vector_new (128); 499169689Skan 500169689Skan auxillary_nest = lambda_loopnest_new (depth, invariants); 501169689Skan 502169689Skan for (i = depth - 1; i >= 0; i--) 503169689Skan { 504169689Skan loop = lambda_loop_new (); 505169689Skan LN_LOOPS (auxillary_nest)[i] = loop; 506169689Skan LL_STEP (loop) = 1; 507169689Skan 508169689Skan for (j = 0; j < size; j++) 509169689Skan { 510169689Skan if (A[j][i] < 0) 511169689Skan { 512169689Skan /* Any linear expression in the matrix with a coefficient less 513169689Skan than 0 becomes part of the new lower bound. */ 514169689Skan expression = lambda_linear_expression_new (depth, invariants); 515169689Skan 516169689Skan for (k = 0; k < i; k++) 517169689Skan LLE_COEFFICIENTS (expression)[k] = A[j][k]; 518169689Skan 519169689Skan for (k = 0; k < invariants; k++) 520169689Skan LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k]; 521169689Skan 522169689Skan LLE_DENOMINATOR (expression) = -1 * A[j][i]; 523169689Skan LLE_CONSTANT (expression) = -1 * a[j]; 524169689Skan 525169689Skan /* Ignore if identical to the existing lower bound. */ 526169689Skan if (!lle_equal (LL_LOWER_BOUND (loop), 527169689Skan expression, depth, invariants)) 528169689Skan { 529169689Skan LLE_NEXT (expression) = LL_LOWER_BOUND (loop); 530169689Skan LL_LOWER_BOUND (loop) = expression; 531169689Skan } 532169689Skan 533169689Skan } 534169689Skan else if (A[j][i] > 0) 535169689Skan { 536169689Skan /* Any linear expression with a coefficient greater than 0 537169689Skan becomes part of the new upper bound. */ 538169689Skan expression = lambda_linear_expression_new (depth, invariants); 539169689Skan for (k = 0; k < i; k++) 540169689Skan LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k]; 541169689Skan 542169689Skan for (k = 0; k < invariants; k++) 543169689Skan LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k]; 544169689Skan 545169689Skan LLE_DENOMINATOR (expression) = A[j][i]; 546169689Skan LLE_CONSTANT (expression) = a[j]; 547169689Skan 548169689Skan /* Ignore if identical to the existing upper bound. */ 549169689Skan if (!lle_equal (LL_UPPER_BOUND (loop), 550169689Skan expression, depth, invariants)) 551169689Skan { 552169689Skan LLE_NEXT (expression) = LL_UPPER_BOUND (loop); 553169689Skan LL_UPPER_BOUND (loop) = expression; 554169689Skan } 555169689Skan 556169689Skan } 557169689Skan } 558169689Skan 559169689Skan /* This portion creates a new system of linear inequalities by deleting 560169689Skan the i'th variable, reducing the system by one variable. */ 561169689Skan newsize = 0; 562169689Skan for (j = 0; j < size; j++) 563169689Skan { 564169689Skan /* If the coefficient for the i'th variable is 0, then we can just 565169689Skan eliminate the variable straightaway. Otherwise, we have to 566169689Skan multiply through by the coefficients we are eliminating. */ 567169689Skan if (A[j][i] == 0) 568169689Skan { 569169689Skan lambda_vector_copy (A[j], A1[newsize], depth); 570169689Skan lambda_vector_copy (B[j], B1[newsize], invariants); 571169689Skan a1[newsize] = a[j]; 572169689Skan newsize++; 573169689Skan } 574169689Skan else if (A[j][i] > 0) 575169689Skan { 576169689Skan for (k = 0; k < size; k++) 577169689Skan { 578169689Skan if (A[k][i] < 0) 579169689Skan { 580169689Skan multiple = lcm (A[j][i], A[k][i]); 581169689Skan f1 = multiple / A[j][i]; 582169689Skan f2 = -1 * multiple / A[k][i]; 583169689Skan 584169689Skan lambda_vector_add_mc (A[j], f1, A[k], f2, 585169689Skan A1[newsize], depth); 586169689Skan lambda_vector_add_mc (B[j], f1, B[k], f2, 587169689Skan B1[newsize], invariants); 588169689Skan a1[newsize] = f1 * a[j] + f2 * a[k]; 589169689Skan newsize++; 590169689Skan } 591169689Skan } 592169689Skan } 593169689Skan } 594169689Skan 595169689Skan swapmatrix = A; 596169689Skan A = A1; 597169689Skan A1 = swapmatrix; 598169689Skan 599169689Skan swapmatrix = B; 600169689Skan B = B1; 601169689Skan B1 = swapmatrix; 602169689Skan 603169689Skan swapvector = a; 604169689Skan a = a1; 605169689Skan a1 = swapvector; 606169689Skan 607169689Skan size = newsize; 608169689Skan } 609169689Skan 610169689Skan return auxillary_nest; 611169689Skan} 612169689Skan 613169689Skan/* Compute the loop bounds for the auxiliary space NEST. 614169689Skan Input system used is Ax <= b. TRANS is the unimodular transformation. 615169689Skan Given the original nest, this function will 616169689Skan 1. Convert the nest into matrix form, which consists of a matrix for the 617169689Skan coefficients, a matrix for the 618169689Skan invariant coefficients, and a vector for the constants. 619169689Skan 2. Use the matrix form to calculate the lattice base for the nest (which is 620169689Skan a dense space) 621169689Skan 3. Compose the dense space transform with the user specified transform, to 622169689Skan get a transform we can easily calculate transformed bounds for. 623169689Skan 4. Multiply the composed transformation matrix times the matrix form of the 624169689Skan loop. 625169689Skan 5. Transform the newly created matrix (from step 4) back into a loop nest 626169689Skan using Fourier-Motzkin elimination to figure out the bounds. */ 627169689Skan 628169689Skanstatic lambda_loopnest 629169689Skanlambda_compute_auxillary_space (lambda_loopnest nest, 630169689Skan lambda_trans_matrix trans) 631169689Skan{ 632169689Skan lambda_matrix A, B, A1, B1; 633169689Skan lambda_vector a, a1; 634169689Skan lambda_matrix invertedtrans; 635169689Skan int depth, invariants, size; 636169689Skan int i, j; 637169689Skan lambda_loop loop; 638169689Skan lambda_linear_expression expression; 639169689Skan lambda_lattice lattice; 640169689Skan 641169689Skan depth = LN_DEPTH (nest); 642169689Skan invariants = LN_INVARIANTS (nest); 643169689Skan 644169689Skan /* Unfortunately, we can't know the number of constraints we'll have 645169689Skan ahead of time, but this should be enough even in ridiculous loop nest 646169689Skan cases. We must not go over this limit. */ 647169689Skan A = lambda_matrix_new (128, depth); 648169689Skan B = lambda_matrix_new (128, invariants); 649169689Skan a = lambda_vector_new (128); 650169689Skan 651169689Skan A1 = lambda_matrix_new (128, depth); 652169689Skan B1 = lambda_matrix_new (128, invariants); 653169689Skan a1 = lambda_vector_new (128); 654169689Skan 655169689Skan /* Store the bounds in the equation matrix A, constant vector a, and 656169689Skan invariant matrix B, so that we have Ax <= a + B. 657169689Skan This requires a little equation rearranging so that everything is on the 658169689Skan correct side of the inequality. */ 659169689Skan size = 0; 660169689Skan for (i = 0; i < depth; i++) 661169689Skan { 662169689Skan loop = LN_LOOPS (nest)[i]; 663169689Skan 664169689Skan /* First we do the lower bound. */ 665169689Skan if (LL_STEP (loop) > 0) 666169689Skan expression = LL_LOWER_BOUND (loop); 667169689Skan else 668169689Skan expression = LL_UPPER_BOUND (loop); 669169689Skan 670169689Skan for (; expression != NULL; expression = LLE_NEXT (expression)) 671169689Skan { 672169689Skan /* Fill in the coefficient. */ 673169689Skan for (j = 0; j < i; j++) 674169689Skan A[size][j] = LLE_COEFFICIENTS (expression)[j]; 675169689Skan 676169689Skan /* And the invariant coefficient. */ 677169689Skan for (j = 0; j < invariants; j++) 678169689Skan B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j]; 679169689Skan 680169689Skan /* And the constant. */ 681169689Skan a[size] = LLE_CONSTANT (expression); 682169689Skan 683169689Skan /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b. IE put all 684169689Skan constants and single variables on */ 685169689Skan A[size][i] = -1 * LLE_DENOMINATOR (expression); 686169689Skan a[size] *= -1; 687169689Skan for (j = 0; j < invariants; j++) 688169689Skan B[size][j] *= -1; 689169689Skan 690169689Skan size++; 691169689Skan /* Need to increase matrix sizes above. */ 692169689Skan gcc_assert (size <= 127); 693169689Skan 694169689Skan } 695169689Skan 696169689Skan /* Then do the exact same thing for the upper bounds. */ 697169689Skan if (LL_STEP (loop) > 0) 698169689Skan expression = LL_UPPER_BOUND (loop); 699169689Skan else 700169689Skan expression = LL_LOWER_BOUND (loop); 701169689Skan 702169689Skan for (; expression != NULL; expression = LLE_NEXT (expression)) 703169689Skan { 704169689Skan /* Fill in the coefficient. */ 705169689Skan for (j = 0; j < i; j++) 706169689Skan A[size][j] = LLE_COEFFICIENTS (expression)[j]; 707169689Skan 708169689Skan /* And the invariant coefficient. */ 709169689Skan for (j = 0; j < invariants; j++) 710169689Skan B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j]; 711169689Skan 712169689Skan /* And the constant. */ 713169689Skan a[size] = LLE_CONSTANT (expression); 714169689Skan 715169689Skan /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b. */ 716169689Skan for (j = 0; j < i; j++) 717169689Skan A[size][j] *= -1; 718169689Skan A[size][i] = LLE_DENOMINATOR (expression); 719169689Skan size++; 720169689Skan /* Need to increase matrix sizes above. */ 721169689Skan gcc_assert (size <= 127); 722169689Skan 723169689Skan } 724169689Skan } 725169689Skan 726169689Skan /* Compute the lattice base x = base * y + origin, where y is the 727169689Skan base space. */ 728169689Skan lattice = lambda_lattice_compute_base (nest); 729169689Skan 730169689Skan /* Ax <= a + B then becomes ALy <= a+B - A*origin. L is the lattice base */ 731169689Skan 732169689Skan /* A1 = A * L */ 733169689Skan lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth); 734169689Skan 735169689Skan /* a1 = a - A * origin constant. */ 736169689Skan lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1); 737169689Skan lambda_vector_add_mc (a, 1, a1, -1, a1, size); 738169689Skan 739169689Skan /* B1 = B - A * origin invariant. */ 740169689Skan lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth, 741169689Skan invariants); 742169689Skan lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants); 743169689Skan 744169689Skan /* Now compute the auxiliary space bounds by first inverting U, multiplying 745169689Skan it by A1, then performing Fourier-Motzkin. */ 746169689Skan 747169689Skan invertedtrans = lambda_matrix_new (depth, depth); 748169689Skan 749169689Skan /* Compute the inverse of U. */ 750169689Skan lambda_matrix_inverse (LTM_MATRIX (trans), 751169689Skan invertedtrans, depth); 752169689Skan 753169689Skan /* A = A1 inv(U). */ 754169689Skan lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth); 755169689Skan 756169689Skan return compute_nest_using_fourier_motzkin (size, depth, invariants, 757169689Skan A, B1, a1); 758169689Skan} 759169689Skan 760169689Skan/* Compute the loop bounds for the target space, using the bounds of 761169689Skan the auxiliary nest AUXILLARY_NEST, and the triangular matrix H. 762169689Skan The target space loop bounds are computed by multiplying the triangular 763169689Skan matrix H by the auxiliary nest, to get the new loop bounds. The sign of 764169689Skan the loop steps (positive or negative) is then used to swap the bounds if 765169689Skan the loop counts downwards. 766169689Skan Return the target loopnest. */ 767169689Skan 768169689Skanstatic lambda_loopnest 769169689Skanlambda_compute_target_space (lambda_loopnest auxillary_nest, 770169689Skan lambda_trans_matrix H, lambda_vector stepsigns) 771169689Skan{ 772169689Skan lambda_matrix inverse, H1; 773169689Skan int determinant, i, j; 774169689Skan int gcd1, gcd2; 775169689Skan int factor; 776169689Skan 777169689Skan lambda_loopnest target_nest; 778169689Skan int depth, invariants; 779169689Skan lambda_matrix target; 780169689Skan 781169689Skan lambda_loop auxillary_loop, target_loop; 782169689Skan lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr; 783169689Skan 784169689Skan depth = LN_DEPTH (auxillary_nest); 785169689Skan invariants = LN_INVARIANTS (auxillary_nest); 786169689Skan 787169689Skan inverse = lambda_matrix_new (depth, depth); 788169689Skan determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth); 789169689Skan 790169689Skan /* H1 is H excluding its diagonal. */ 791169689Skan H1 = lambda_matrix_new (depth, depth); 792169689Skan lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth); 793169689Skan 794169689Skan for (i = 0; i < depth; i++) 795169689Skan H1[i][i] = 0; 796169689Skan 797169689Skan /* Computes the linear offsets of the loop bounds. */ 798169689Skan target = lambda_matrix_new (depth, depth); 799169689Skan lambda_matrix_mult (H1, inverse, target, depth, depth, depth); 800169689Skan 801169689Skan target_nest = lambda_loopnest_new (depth, invariants); 802169689Skan 803169689Skan for (i = 0; i < depth; i++) 804169689Skan { 805169689Skan 806169689Skan /* Get a new loop structure. */ 807169689Skan target_loop = lambda_loop_new (); 808169689Skan LN_LOOPS (target_nest)[i] = target_loop; 809169689Skan 810169689Skan /* Computes the gcd of the coefficients of the linear part. */ 811169689Skan gcd1 = lambda_vector_gcd (target[i], i); 812169689Skan 813169689Skan /* Include the denominator in the GCD. */ 814169689Skan gcd1 = gcd (gcd1, determinant); 815169689Skan 816169689Skan /* Now divide through by the gcd. */ 817169689Skan for (j = 0; j < i; j++) 818169689Skan target[i][j] = target[i][j] / gcd1; 819169689Skan 820169689Skan expression = lambda_linear_expression_new (depth, invariants); 821169689Skan lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth); 822169689Skan LLE_DENOMINATOR (expression) = determinant / gcd1; 823169689Skan LLE_CONSTANT (expression) = 0; 824169689Skan lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression), 825169689Skan invariants); 826169689Skan LL_LINEAR_OFFSET (target_loop) = expression; 827169689Skan } 828169689Skan 829169689Skan /* For each loop, compute the new bounds from H. */ 830169689Skan for (i = 0; i < depth; i++) 831169689Skan { 832169689Skan auxillary_loop = LN_LOOPS (auxillary_nest)[i]; 833169689Skan target_loop = LN_LOOPS (target_nest)[i]; 834169689Skan LL_STEP (target_loop) = LTM_MATRIX (H)[i][i]; 835169689Skan factor = LTM_MATRIX (H)[i][i]; 836169689Skan 837169689Skan /* First we do the lower bound. */ 838169689Skan auxillary_expr = LL_LOWER_BOUND (auxillary_loop); 839169689Skan 840169689Skan for (; auxillary_expr != NULL; 841169689Skan auxillary_expr = LLE_NEXT (auxillary_expr)) 842169689Skan { 843169689Skan target_expr = lambda_linear_expression_new (depth, invariants); 844169689Skan lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr), 845169689Skan depth, inverse, depth, 846169689Skan LLE_COEFFICIENTS (target_expr)); 847169689Skan lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr), 848169689Skan LLE_COEFFICIENTS (target_expr), depth, 849169689Skan factor); 850169689Skan 851169689Skan LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor; 852169689Skan lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr), 853169689Skan LLE_INVARIANT_COEFFICIENTS (target_expr), 854169689Skan invariants); 855169689Skan lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr), 856169689Skan LLE_INVARIANT_COEFFICIENTS (target_expr), 857169689Skan invariants, factor); 858169689Skan LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr); 859169689Skan 860169689Skan if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth)) 861169689Skan { 862169689Skan LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr) 863169689Skan * determinant; 864169689Skan lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS 865169689Skan (target_expr), 866169689Skan LLE_INVARIANT_COEFFICIENTS 867169689Skan (target_expr), invariants, 868169689Skan determinant); 869169689Skan LLE_DENOMINATOR (target_expr) = 870169689Skan LLE_DENOMINATOR (target_expr) * determinant; 871169689Skan } 872169689Skan /* Find the gcd and divide by it here, rather than doing it 873169689Skan at the tree level. */ 874169689Skan gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth); 875169689Skan gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr), 876169689Skan invariants); 877169689Skan gcd1 = gcd (gcd1, gcd2); 878169689Skan gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr)); 879169689Skan gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr)); 880169689Skan for (j = 0; j < depth; j++) 881169689Skan LLE_COEFFICIENTS (target_expr)[j] /= gcd1; 882169689Skan for (j = 0; j < invariants; j++) 883169689Skan LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1; 884169689Skan LLE_CONSTANT (target_expr) /= gcd1; 885169689Skan LLE_DENOMINATOR (target_expr) /= gcd1; 886169689Skan /* Ignore if identical to existing bound. */ 887169689Skan if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth, 888169689Skan invariants)) 889169689Skan { 890169689Skan LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop); 891169689Skan LL_LOWER_BOUND (target_loop) = target_expr; 892169689Skan } 893169689Skan } 894169689Skan /* Now do the upper bound. */ 895169689Skan auxillary_expr = LL_UPPER_BOUND (auxillary_loop); 896169689Skan 897169689Skan for (; auxillary_expr != NULL; 898169689Skan auxillary_expr = LLE_NEXT (auxillary_expr)) 899169689Skan { 900169689Skan target_expr = lambda_linear_expression_new (depth, invariants); 901169689Skan lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr), 902169689Skan depth, inverse, depth, 903169689Skan LLE_COEFFICIENTS (target_expr)); 904169689Skan lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr), 905169689Skan LLE_COEFFICIENTS (target_expr), depth, 906169689Skan factor); 907169689Skan LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor; 908169689Skan lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr), 909169689Skan LLE_INVARIANT_COEFFICIENTS (target_expr), 910169689Skan invariants); 911169689Skan lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr), 912169689Skan LLE_INVARIANT_COEFFICIENTS (target_expr), 913169689Skan invariants, factor); 914169689Skan LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr); 915169689Skan 916169689Skan if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth)) 917169689Skan { 918169689Skan LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr) 919169689Skan * determinant; 920169689Skan lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS 921169689Skan (target_expr), 922169689Skan LLE_INVARIANT_COEFFICIENTS 923169689Skan (target_expr), invariants, 924169689Skan determinant); 925169689Skan LLE_DENOMINATOR (target_expr) = 926169689Skan LLE_DENOMINATOR (target_expr) * determinant; 927169689Skan } 928169689Skan /* Find the gcd and divide by it here, instead of at the 929169689Skan tree level. */ 930169689Skan gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth); 931169689Skan gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr), 932169689Skan invariants); 933169689Skan gcd1 = gcd (gcd1, gcd2); 934169689Skan gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr)); 935169689Skan gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr)); 936169689Skan for (j = 0; j < depth; j++) 937169689Skan LLE_COEFFICIENTS (target_expr)[j] /= gcd1; 938169689Skan for (j = 0; j < invariants; j++) 939169689Skan LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1; 940169689Skan LLE_CONSTANT (target_expr) /= gcd1; 941169689Skan LLE_DENOMINATOR (target_expr) /= gcd1; 942169689Skan /* Ignore if equal to existing bound. */ 943169689Skan if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth, 944169689Skan invariants)) 945169689Skan { 946169689Skan LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop); 947169689Skan LL_UPPER_BOUND (target_loop) = target_expr; 948169689Skan } 949169689Skan } 950169689Skan } 951169689Skan for (i = 0; i < depth; i++) 952169689Skan { 953169689Skan target_loop = LN_LOOPS (target_nest)[i]; 954169689Skan /* If necessary, exchange the upper and lower bounds and negate 955169689Skan the step size. */ 956169689Skan if (stepsigns[i] < 0) 957169689Skan { 958169689Skan LL_STEP (target_loop) *= -1; 959169689Skan tmp_expr = LL_LOWER_BOUND (target_loop); 960169689Skan LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop); 961169689Skan LL_UPPER_BOUND (target_loop) = tmp_expr; 962169689Skan } 963169689Skan } 964169689Skan return target_nest; 965169689Skan} 966169689Skan 967169689Skan/* Compute the step signs of TRANS, using TRANS and stepsigns. Return the new 968169689Skan result. */ 969169689Skan 970169689Skanstatic lambda_vector 971169689Skanlambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns) 972169689Skan{ 973169689Skan lambda_matrix matrix, H; 974169689Skan int size; 975169689Skan lambda_vector newsteps; 976169689Skan int i, j, factor, minimum_column; 977169689Skan int temp; 978169689Skan 979169689Skan matrix = LTM_MATRIX (trans); 980169689Skan size = LTM_ROWSIZE (trans); 981169689Skan H = lambda_matrix_new (size, size); 982169689Skan 983169689Skan newsteps = lambda_vector_new (size); 984169689Skan lambda_vector_copy (stepsigns, newsteps, size); 985169689Skan 986169689Skan lambda_matrix_copy (matrix, H, size, size); 987169689Skan 988169689Skan for (j = 0; j < size; j++) 989169689Skan { 990169689Skan lambda_vector row; 991169689Skan row = H[j]; 992169689Skan for (i = j; i < size; i++) 993169689Skan if (row[i] < 0) 994169689Skan lambda_matrix_col_negate (H, size, i); 995169689Skan while (lambda_vector_first_nz (row, size, j + 1) < size) 996169689Skan { 997169689Skan minimum_column = lambda_vector_min_nz (row, size, j); 998169689Skan lambda_matrix_col_exchange (H, size, j, minimum_column); 999169689Skan 1000169689Skan temp = newsteps[j]; 1001169689Skan newsteps[j] = newsteps[minimum_column]; 1002169689Skan newsteps[minimum_column] = temp; 1003169689Skan 1004169689Skan for (i = j + 1; i < size; i++) 1005169689Skan { 1006169689Skan factor = row[i] / row[j]; 1007169689Skan lambda_matrix_col_add (H, size, j, i, -1 * factor); 1008169689Skan } 1009169689Skan } 1010169689Skan } 1011169689Skan return newsteps; 1012169689Skan} 1013169689Skan 1014169689Skan/* Transform NEST according to TRANS, and return the new loopnest. 1015169689Skan This involves 1016169689Skan 1. Computing a lattice base for the transformation 1017169689Skan 2. Composing the dense base with the specified transformation (TRANS) 1018169689Skan 3. Decomposing the combined transformation into a lower triangular portion, 1019169689Skan and a unimodular portion. 1020169689Skan 4. Computing the auxiliary nest using the unimodular portion. 1021169689Skan 5. Computing the target nest using the auxiliary nest and the lower 1022169689Skan triangular portion. */ 1023169689Skan 1024169689Skanlambda_loopnest 1025169689Skanlambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans) 1026169689Skan{ 1027169689Skan lambda_loopnest auxillary_nest, target_nest; 1028169689Skan 1029169689Skan int depth, invariants; 1030169689Skan int i, j; 1031169689Skan lambda_lattice lattice; 1032169689Skan lambda_trans_matrix trans1, H, U; 1033169689Skan lambda_loop loop; 1034169689Skan lambda_linear_expression expression; 1035169689Skan lambda_vector origin; 1036169689Skan lambda_matrix origin_invariants; 1037169689Skan lambda_vector stepsigns; 1038169689Skan int f; 1039169689Skan 1040169689Skan depth = LN_DEPTH (nest); 1041169689Skan invariants = LN_INVARIANTS (nest); 1042169689Skan 1043169689Skan /* Keep track of the signs of the loop steps. */ 1044169689Skan stepsigns = lambda_vector_new (depth); 1045169689Skan for (i = 0; i < depth; i++) 1046169689Skan { 1047169689Skan if (LL_STEP (LN_LOOPS (nest)[i]) > 0) 1048169689Skan stepsigns[i] = 1; 1049169689Skan else 1050169689Skan stepsigns[i] = -1; 1051169689Skan } 1052169689Skan 1053169689Skan /* Compute the lattice base. */ 1054169689Skan lattice = lambda_lattice_compute_base (nest); 1055169689Skan trans1 = lambda_trans_matrix_new (depth, depth); 1056169689Skan 1057169689Skan /* Multiply the transformation matrix by the lattice base. */ 1058169689Skan 1059169689Skan lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice), 1060169689Skan LTM_MATRIX (trans1), depth, depth, depth); 1061169689Skan 1062169689Skan /* Compute the Hermite normal form for the new transformation matrix. */ 1063169689Skan H = lambda_trans_matrix_new (depth, depth); 1064169689Skan U = lambda_trans_matrix_new (depth, depth); 1065169689Skan lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H), 1066169689Skan LTM_MATRIX (U)); 1067169689Skan 1068169689Skan /* Compute the auxiliary loop nest's space from the unimodular 1069169689Skan portion. */ 1070169689Skan auxillary_nest = lambda_compute_auxillary_space (nest, U); 1071169689Skan 1072169689Skan /* Compute the loop step signs from the old step signs and the 1073169689Skan transformation matrix. */ 1074169689Skan stepsigns = lambda_compute_step_signs (trans1, stepsigns); 1075169689Skan 1076169689Skan /* Compute the target loop nest space from the auxiliary nest and 1077169689Skan the lower triangular matrix H. */ 1078169689Skan target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns); 1079169689Skan origin = lambda_vector_new (depth); 1080169689Skan origin_invariants = lambda_matrix_new (depth, invariants); 1081169689Skan lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth, 1082169689Skan LATTICE_ORIGIN (lattice), origin); 1083169689Skan lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice), 1084169689Skan origin_invariants, depth, depth, invariants); 1085169689Skan 1086169689Skan for (i = 0; i < depth; i++) 1087169689Skan { 1088169689Skan loop = LN_LOOPS (target_nest)[i]; 1089169689Skan expression = LL_LINEAR_OFFSET (loop); 1090169689Skan if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth)) 1091169689Skan f = 1; 1092169689Skan else 1093169689Skan f = LLE_DENOMINATOR (expression); 1094169689Skan 1095169689Skan LLE_CONSTANT (expression) += f * origin[i]; 1096169689Skan 1097169689Skan for (j = 0; j < invariants; j++) 1098169689Skan LLE_INVARIANT_COEFFICIENTS (expression)[j] += 1099169689Skan f * origin_invariants[i][j]; 1100169689Skan } 1101169689Skan 1102169689Skan return target_nest; 1103169689Skan 1104169689Skan} 1105169689Skan 1106169689Skan/* Convert a gcc tree expression EXPR to a lambda linear expression, and 1107169689Skan return the new expression. DEPTH is the depth of the loopnest. 1108169689Skan OUTERINDUCTIONVARS is an array of the induction variables for outer loops 1109169689Skan in this nest. INVARIANTS is the array of invariants for the loop. EXTRA 1110169689Skan is the amount we have to add/subtract from the expression because of the 1111169689Skan type of comparison it is used in. */ 1112169689Skan 1113169689Skanstatic lambda_linear_expression 1114169689Skangcc_tree_to_linear_expression (int depth, tree expr, 1115169689Skan VEC(tree,heap) *outerinductionvars, 1116169689Skan VEC(tree,heap) *invariants, int extra) 1117169689Skan{ 1118169689Skan lambda_linear_expression lle = NULL; 1119169689Skan switch (TREE_CODE (expr)) 1120169689Skan { 1121169689Skan case INTEGER_CST: 1122169689Skan { 1123169689Skan lle = lambda_linear_expression_new (depth, 2 * depth); 1124169689Skan LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr); 1125169689Skan if (extra != 0) 1126169689Skan LLE_CONSTANT (lle) += extra; 1127169689Skan 1128169689Skan LLE_DENOMINATOR (lle) = 1; 1129169689Skan } 1130169689Skan break; 1131169689Skan case SSA_NAME: 1132169689Skan { 1133169689Skan tree iv, invar; 1134169689Skan size_t i; 1135169689Skan for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++) 1136169689Skan if (iv != NULL) 1137169689Skan { 1138169689Skan if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr)) 1139169689Skan { 1140169689Skan lle = lambda_linear_expression_new (depth, 2 * depth); 1141169689Skan LLE_COEFFICIENTS (lle)[i] = 1; 1142169689Skan if (extra != 0) 1143169689Skan LLE_CONSTANT (lle) = extra; 1144169689Skan 1145169689Skan LLE_DENOMINATOR (lle) = 1; 1146169689Skan } 1147169689Skan } 1148169689Skan for (i = 0; VEC_iterate (tree, invariants, i, invar); i++) 1149169689Skan if (invar != NULL) 1150169689Skan { 1151169689Skan if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr)) 1152169689Skan { 1153169689Skan lle = lambda_linear_expression_new (depth, 2 * depth); 1154169689Skan LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1; 1155169689Skan if (extra != 0) 1156169689Skan LLE_CONSTANT (lle) = extra; 1157169689Skan LLE_DENOMINATOR (lle) = 1; 1158169689Skan } 1159169689Skan } 1160169689Skan } 1161169689Skan break; 1162169689Skan default: 1163169689Skan return NULL; 1164169689Skan } 1165169689Skan 1166169689Skan return lle; 1167169689Skan} 1168169689Skan 1169169689Skan/* Return the depth of the loopnest NEST */ 1170169689Skan 1171169689Skanstatic int 1172169689Skandepth_of_nest (struct loop *nest) 1173169689Skan{ 1174169689Skan size_t depth = 0; 1175169689Skan while (nest) 1176169689Skan { 1177169689Skan depth++; 1178169689Skan nest = nest->inner; 1179169689Skan } 1180169689Skan return depth; 1181169689Skan} 1182169689Skan 1183169689Skan 1184169689Skan/* Return true if OP is invariant in LOOP and all outer loops. */ 1185169689Skan 1186169689Skanstatic bool 1187169689Skaninvariant_in_loop_and_outer_loops (struct loop *loop, tree op) 1188169689Skan{ 1189169689Skan if (is_gimple_min_invariant (op)) 1190169689Skan return true; 1191169689Skan if (loop->depth == 0) 1192169689Skan return true; 1193169689Skan if (!expr_invariant_in_loop_p (loop, op)) 1194169689Skan return false; 1195169689Skan if (loop->outer 1196169689Skan && !invariant_in_loop_and_outer_loops (loop->outer, op)) 1197169689Skan return false; 1198169689Skan return true; 1199169689Skan} 1200169689Skan 1201169689Skan/* Generate a lambda loop from a gcc loop LOOP. Return the new lambda loop, 1202169689Skan or NULL if it could not be converted. 1203169689Skan DEPTH is the depth of the loop. 1204169689Skan INVARIANTS is a pointer to the array of loop invariants. 1205169689Skan The induction variable for this loop should be stored in the parameter 1206169689Skan OURINDUCTIONVAR. 1207169689Skan OUTERINDUCTIONVARS is an array of induction variables for outer loops. */ 1208169689Skan 1209169689Skanstatic lambda_loop 1210169689Skangcc_loop_to_lambda_loop (struct loop *loop, int depth, 1211169689Skan VEC(tree,heap) ** invariants, 1212169689Skan tree * ourinductionvar, 1213169689Skan VEC(tree,heap) * outerinductionvars, 1214169689Skan VEC(tree,heap) ** lboundvars, 1215169689Skan VEC(tree,heap) ** uboundvars, 1216169689Skan VEC(int,heap) ** steps) 1217169689Skan{ 1218169689Skan tree phi; 1219169689Skan tree exit_cond; 1220169689Skan tree access_fn, inductionvar; 1221169689Skan tree step; 1222169689Skan lambda_loop lloop = NULL; 1223169689Skan lambda_linear_expression lbound, ubound; 1224169689Skan tree test; 1225169689Skan int stepint; 1226169689Skan int extra = 0; 1227169689Skan tree lboundvar, uboundvar, uboundresult; 1228169689Skan 1229169689Skan /* Find out induction var and exit condition. */ 1230169689Skan inductionvar = find_induction_var_from_exit_cond (loop); 1231169689Skan exit_cond = get_loop_exit_condition (loop); 1232169689Skan 1233169689Skan if (inductionvar == NULL || exit_cond == NULL) 1234169689Skan { 1235169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 1236169689Skan fprintf (dump_file, 1237169689Skan "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n"); 1238169689Skan return NULL; 1239169689Skan } 1240169689Skan 1241169689Skan test = TREE_OPERAND (exit_cond, 0); 1242169689Skan 1243169689Skan if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE) 1244169689Skan { 1245169689Skan 1246169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 1247169689Skan fprintf (dump_file, 1248169689Skan "Unable to convert loop: Cannot find PHI node for induction variable\n"); 1249169689Skan 1250169689Skan return NULL; 1251169689Skan } 1252169689Skan 1253169689Skan phi = SSA_NAME_DEF_STMT (inductionvar); 1254169689Skan if (TREE_CODE (phi) != PHI_NODE) 1255169689Skan { 1256169689Skan phi = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE); 1257169689Skan if (!phi) 1258169689Skan { 1259169689Skan 1260169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 1261169689Skan fprintf (dump_file, 1262169689Skan "Unable to convert loop: Cannot find PHI node for induction variable\n"); 1263169689Skan 1264169689Skan return NULL; 1265169689Skan } 1266169689Skan 1267169689Skan phi = SSA_NAME_DEF_STMT (phi); 1268169689Skan if (TREE_CODE (phi) != PHI_NODE) 1269169689Skan { 1270169689Skan 1271169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 1272169689Skan fprintf (dump_file, 1273169689Skan "Unable to convert loop: Cannot find PHI node for induction variable\n"); 1274169689Skan return NULL; 1275169689Skan } 1276169689Skan 1277169689Skan } 1278169689Skan 1279169689Skan /* The induction variable name/version we want to put in the array is the 1280169689Skan result of the induction variable phi node. */ 1281169689Skan *ourinductionvar = PHI_RESULT (phi); 1282169689Skan access_fn = instantiate_parameters 1283169689Skan (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); 1284169689Skan if (access_fn == chrec_dont_know) 1285169689Skan { 1286169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 1287169689Skan fprintf (dump_file, 1288169689Skan "Unable to convert loop: Access function for induction variable phi is unknown\n"); 1289169689Skan 1290169689Skan return NULL; 1291169689Skan } 1292169689Skan 1293169689Skan step = evolution_part_in_loop_num (access_fn, loop->num); 1294169689Skan if (!step || step == chrec_dont_know) 1295169689Skan { 1296169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 1297169689Skan fprintf (dump_file, 1298169689Skan "Unable to convert loop: Cannot determine step of loop.\n"); 1299169689Skan 1300169689Skan return NULL; 1301169689Skan } 1302169689Skan if (TREE_CODE (step) != INTEGER_CST) 1303169689Skan { 1304169689Skan 1305169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 1306169689Skan fprintf (dump_file, 1307169689Skan "Unable to convert loop: Step of loop is not integer.\n"); 1308169689Skan return NULL; 1309169689Skan } 1310169689Skan 1311169689Skan stepint = TREE_INT_CST_LOW (step); 1312169689Skan 1313169689Skan /* Only want phis for induction vars, which will have two 1314169689Skan arguments. */ 1315169689Skan if (PHI_NUM_ARGS (phi) != 2) 1316169689Skan { 1317169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 1318169689Skan fprintf (dump_file, 1319169689Skan "Unable to convert loop: PHI node for induction variable has >2 arguments\n"); 1320169689Skan return NULL; 1321169689Skan } 1322169689Skan 1323169689Skan /* Another induction variable check. One argument's source should be 1324169689Skan in the loop, one outside the loop. */ 1325169689Skan if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src) 1326169689Skan && flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 1)->src)) 1327169689Skan { 1328169689Skan 1329169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 1330169689Skan fprintf (dump_file, 1331169689Skan "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n"); 1332169689Skan 1333169689Skan return NULL; 1334169689Skan } 1335169689Skan 1336169689Skan if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)) 1337169689Skan { 1338169689Skan lboundvar = PHI_ARG_DEF (phi, 1); 1339169689Skan lbound = gcc_tree_to_linear_expression (depth, lboundvar, 1340169689Skan outerinductionvars, *invariants, 1341169689Skan 0); 1342169689Skan } 1343169689Skan else 1344169689Skan { 1345169689Skan lboundvar = PHI_ARG_DEF (phi, 0); 1346169689Skan lbound = gcc_tree_to_linear_expression (depth, lboundvar, 1347169689Skan outerinductionvars, *invariants, 1348169689Skan 0); 1349169689Skan } 1350169689Skan 1351169689Skan if (!lbound) 1352169689Skan { 1353169689Skan 1354169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 1355169689Skan fprintf (dump_file, 1356169689Skan "Unable to convert loop: Cannot convert lower bound to linear expression\n"); 1357169689Skan 1358169689Skan return NULL; 1359169689Skan } 1360169689Skan /* One part of the test may be a loop invariant tree. */ 1361169689Skan VEC_reserve (tree, heap, *invariants, 1); 1362169689Skan if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME 1363169689Skan && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 1))) 1364169689Skan VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 1)); 1365169689Skan else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME 1366169689Skan && invariant_in_loop_and_outer_loops (loop, TREE_OPERAND (test, 0))) 1367169689Skan VEC_quick_push (tree, *invariants, TREE_OPERAND (test, 0)); 1368169689Skan 1369169689Skan /* The non-induction variable part of the test is the upper bound variable. 1370169689Skan */ 1371169689Skan if (TREE_OPERAND (test, 0) == inductionvar) 1372169689Skan uboundvar = TREE_OPERAND (test, 1); 1373169689Skan else 1374169689Skan uboundvar = TREE_OPERAND (test, 0); 1375169689Skan 1376169689Skan 1377169689Skan /* We only size the vectors assuming we have, at max, 2 times as many 1378169689Skan invariants as we do loops (one for each bound). 1379169689Skan This is just an arbitrary number, but it has to be matched against the 1380169689Skan code below. */ 1381169689Skan gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth)); 1382169689Skan 1383169689Skan 1384169689Skan /* We might have some leftover. */ 1385169689Skan if (TREE_CODE (test) == LT_EXPR) 1386169689Skan extra = -1 * stepint; 1387169689Skan else if (TREE_CODE (test) == NE_EXPR) 1388169689Skan extra = -1 * stepint; 1389169689Skan else if (TREE_CODE (test) == GT_EXPR) 1390169689Skan extra = -1 * stepint; 1391169689Skan else if (TREE_CODE (test) == EQ_EXPR) 1392169689Skan extra = 1 * stepint; 1393169689Skan 1394169689Skan ubound = gcc_tree_to_linear_expression (depth, uboundvar, 1395169689Skan outerinductionvars, 1396169689Skan *invariants, extra); 1397169689Skan uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar, 1398169689Skan build_int_cst (TREE_TYPE (uboundvar), extra)); 1399169689Skan VEC_safe_push (tree, heap, *uboundvars, uboundresult); 1400169689Skan VEC_safe_push (tree, heap, *lboundvars, lboundvar); 1401169689Skan VEC_safe_push (int, heap, *steps, stepint); 1402169689Skan if (!ubound) 1403169689Skan { 1404169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 1405169689Skan fprintf (dump_file, 1406169689Skan "Unable to convert loop: Cannot convert upper bound to linear expression\n"); 1407169689Skan return NULL; 1408169689Skan } 1409169689Skan 1410169689Skan lloop = lambda_loop_new (); 1411169689Skan LL_STEP (lloop) = stepint; 1412169689Skan LL_LOWER_BOUND (lloop) = lbound; 1413169689Skan LL_UPPER_BOUND (lloop) = ubound; 1414169689Skan return lloop; 1415169689Skan} 1416169689Skan 1417169689Skan/* Given a LOOP, find the induction variable it is testing against in the exit 1418169689Skan condition. Return the induction variable if found, NULL otherwise. */ 1419169689Skan 1420169689Skanstatic tree 1421169689Skanfind_induction_var_from_exit_cond (struct loop *loop) 1422169689Skan{ 1423169689Skan tree expr = get_loop_exit_condition (loop); 1424169689Skan tree ivarop; 1425169689Skan tree test; 1426169689Skan if (expr == NULL_TREE) 1427169689Skan return NULL_TREE; 1428169689Skan if (TREE_CODE (expr) != COND_EXPR) 1429169689Skan return NULL_TREE; 1430169689Skan test = TREE_OPERAND (expr, 0); 1431169689Skan if (!COMPARISON_CLASS_P (test)) 1432169689Skan return NULL_TREE; 1433169689Skan 1434169689Skan /* Find the side that is invariant in this loop. The ivar must be the other 1435169689Skan side. */ 1436169689Skan 1437169689Skan if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 0))) 1438169689Skan ivarop = TREE_OPERAND (test, 1); 1439169689Skan else if (expr_invariant_in_loop_p (loop, TREE_OPERAND (test, 1))) 1440169689Skan ivarop = TREE_OPERAND (test, 0); 1441169689Skan else 1442169689Skan return NULL_TREE; 1443169689Skan 1444169689Skan if (TREE_CODE (ivarop) != SSA_NAME) 1445169689Skan return NULL_TREE; 1446169689Skan return ivarop; 1447169689Skan} 1448169689Skan 1449169689SkanDEF_VEC_P(lambda_loop); 1450169689SkanDEF_VEC_ALLOC_P(lambda_loop,heap); 1451169689Skan 1452169689Skan/* Generate a lambda loopnest from a gcc loopnest LOOP_NEST. 1453169689Skan Return the new loop nest. 1454169689Skan INDUCTIONVARS is a pointer to an array of induction variables for the 1455169689Skan loopnest that will be filled in during this process. 1456169689Skan INVARIANTS is a pointer to an array of invariants that will be filled in 1457169689Skan during this process. */ 1458169689Skan 1459169689Skanlambda_loopnest 1460169689Skangcc_loopnest_to_lambda_loopnest (struct loops *currloops, 1461169689Skan struct loop *loop_nest, 1462169689Skan VEC(tree,heap) **inductionvars, 1463169689Skan VEC(tree,heap) **invariants) 1464169689Skan{ 1465169689Skan lambda_loopnest ret = NULL; 1466169689Skan struct loop *temp = loop_nest; 1467169689Skan int depth = depth_of_nest (loop_nest); 1468169689Skan size_t i; 1469169689Skan VEC(lambda_loop,heap) *loops = NULL; 1470169689Skan VEC(tree,heap) *uboundvars = NULL; 1471169689Skan VEC(tree,heap) *lboundvars = NULL; 1472169689Skan VEC(int,heap) *steps = NULL; 1473169689Skan lambda_loop newloop; 1474169689Skan tree inductionvar = NULL; 1475169689Skan bool perfect_nest = perfect_nest_p (loop_nest); 1476169689Skan 1477169689Skan if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest)) 1478169689Skan goto fail; 1479169689Skan 1480169689Skan while (temp) 1481169689Skan { 1482169689Skan newloop = gcc_loop_to_lambda_loop (temp, depth, invariants, 1483169689Skan &inductionvar, *inductionvars, 1484169689Skan &lboundvars, &uboundvars, 1485169689Skan &steps); 1486169689Skan if (!newloop) 1487169689Skan goto fail; 1488169689Skan 1489169689Skan VEC_safe_push (tree, heap, *inductionvars, inductionvar); 1490169689Skan VEC_safe_push (lambda_loop, heap, loops, newloop); 1491169689Skan temp = temp->inner; 1492169689Skan } 1493169689Skan 1494169689Skan if (!perfect_nest) 1495169689Skan { 1496169689Skan if (!perfect_nestify (currloops, loop_nest, 1497169689Skan lboundvars, uboundvars, steps, *inductionvars)) 1498169689Skan { 1499169689Skan if (dump_file) 1500169689Skan fprintf (dump_file, 1501169689Skan "Not a perfect loop nest and couldn't convert to one.\n"); 1502169689Skan goto fail; 1503169689Skan } 1504169689Skan else if (dump_file) 1505169689Skan fprintf (dump_file, 1506169689Skan "Successfully converted loop nest to perfect loop nest.\n"); 1507169689Skan } 1508169689Skan 1509169689Skan ret = lambda_loopnest_new (depth, 2 * depth); 1510169689Skan 1511169689Skan for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++) 1512169689Skan LN_LOOPS (ret)[i] = newloop; 1513169689Skan 1514169689Skan fail: 1515169689Skan VEC_free (lambda_loop, heap, loops); 1516169689Skan VEC_free (tree, heap, uboundvars); 1517169689Skan VEC_free (tree, heap, lboundvars); 1518169689Skan VEC_free (int, heap, steps); 1519169689Skan 1520169689Skan return ret; 1521169689Skan} 1522169689Skan 1523169689Skan/* Convert a lambda body vector LBV to a gcc tree, and return the new tree. 1524169689Skan STMTS_TO_INSERT is a pointer to a tree where the statements we need to be 1525169689Skan inserted for us are stored. INDUCTION_VARS is the array of induction 1526169689Skan variables for the loop this LBV is from. TYPE is the tree type to use for 1527169689Skan the variables and trees involved. */ 1528169689Skan 1529169689Skanstatic tree 1530169689Skanlbv_to_gcc_expression (lambda_body_vector lbv, 1531169689Skan tree type, VEC(tree,heap) *induction_vars, 1532169689Skan tree *stmts_to_insert) 1533169689Skan{ 1534169689Skan tree stmts, stmt, resvar, name; 1535169689Skan tree iv; 1536169689Skan size_t i; 1537169689Skan tree_stmt_iterator tsi; 1538169689Skan 1539169689Skan /* Create a statement list and a linear expression temporary. */ 1540169689Skan stmts = alloc_stmt_list (); 1541169689Skan resvar = create_tmp_var (type, "lbvtmp"); 1542169689Skan add_referenced_var (resvar); 1543169689Skan 1544169689Skan /* Start at 0. */ 1545169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node); 1546169689Skan name = make_ssa_name (resvar, stmt); 1547169689Skan TREE_OPERAND (stmt, 0) = name; 1548169689Skan tsi = tsi_last (stmts); 1549169689Skan tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING); 1550169689Skan 1551169689Skan for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++) 1552169689Skan { 1553169689Skan if (LBV_COEFFICIENTS (lbv)[i] != 0) 1554169689Skan { 1555169689Skan tree newname; 1556169689Skan tree coeffmult; 1557169689Skan 1558169689Skan /* newname = coefficient * induction_variable */ 1559169689Skan coeffmult = build_int_cst (type, LBV_COEFFICIENTS (lbv)[i]); 1560169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, resvar, 1561169689Skan fold_build2 (MULT_EXPR, type, iv, coeffmult)); 1562169689Skan 1563169689Skan newname = make_ssa_name (resvar, stmt); 1564169689Skan TREE_OPERAND (stmt, 0) = newname; 1565169689Skan fold_stmt (&stmt); 1566169689Skan tsi = tsi_last (stmts); 1567169689Skan tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING); 1568169689Skan 1569169689Skan /* name = name + newname */ 1570169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, resvar, 1571169689Skan build2 (PLUS_EXPR, type, name, newname)); 1572169689Skan name = make_ssa_name (resvar, stmt); 1573169689Skan TREE_OPERAND (stmt, 0) = name; 1574169689Skan fold_stmt (&stmt); 1575169689Skan tsi = tsi_last (stmts); 1576169689Skan tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING); 1577169689Skan 1578169689Skan } 1579169689Skan } 1580169689Skan 1581169689Skan /* Handle any denominator that occurs. */ 1582169689Skan if (LBV_DENOMINATOR (lbv) != 1) 1583169689Skan { 1584169689Skan tree denominator = build_int_cst (type, LBV_DENOMINATOR (lbv)); 1585169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, resvar, 1586169689Skan build2 (CEIL_DIV_EXPR, type, name, denominator)); 1587169689Skan name = make_ssa_name (resvar, stmt); 1588169689Skan TREE_OPERAND (stmt, 0) = name; 1589169689Skan fold_stmt (&stmt); 1590169689Skan tsi = tsi_last (stmts); 1591169689Skan tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING); 1592169689Skan } 1593169689Skan *stmts_to_insert = stmts; 1594169689Skan return name; 1595169689Skan} 1596169689Skan 1597169689Skan/* Convert a linear expression from coefficient and constant form to a 1598169689Skan gcc tree. 1599169689Skan Return the tree that represents the final value of the expression. 1600169689Skan LLE is the linear expression to convert. 1601169689Skan OFFSET is the linear offset to apply to the expression. 1602169689Skan TYPE is the tree type to use for the variables and math. 1603169689Skan INDUCTION_VARS is a vector of induction variables for the loops. 1604169689Skan INVARIANTS is a vector of the loop nest invariants. 1605169689Skan WRAP specifies what tree code to wrap the results in, if there is more than 1606169689Skan one (it is either MAX_EXPR, or MIN_EXPR). 1607169689Skan STMTS_TO_INSERT Is a pointer to the statement list we fill in with 1608169689Skan statements that need to be inserted for the linear expression. */ 1609169689Skan 1610169689Skanstatic tree 1611169689Skanlle_to_gcc_expression (lambda_linear_expression lle, 1612169689Skan lambda_linear_expression offset, 1613169689Skan tree type, 1614169689Skan VEC(tree,heap) *induction_vars, 1615169689Skan VEC(tree,heap) *invariants, 1616169689Skan enum tree_code wrap, tree *stmts_to_insert) 1617169689Skan{ 1618169689Skan tree stmts, stmt, resvar, name; 1619169689Skan size_t i; 1620169689Skan tree_stmt_iterator tsi; 1621169689Skan tree iv, invar; 1622169689Skan VEC(tree,heap) *results = NULL; 1623169689Skan 1624169689Skan gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR); 1625169689Skan name = NULL_TREE; 1626169689Skan /* Create a statement list and a linear expression temporary. */ 1627169689Skan stmts = alloc_stmt_list (); 1628169689Skan resvar = create_tmp_var (type, "lletmp"); 1629169689Skan add_referenced_var (resvar); 1630169689Skan 1631169689Skan /* Build up the linear expressions, and put the variable representing the 1632169689Skan result in the results array. */ 1633169689Skan for (; lle != NULL; lle = LLE_NEXT (lle)) 1634169689Skan { 1635169689Skan /* Start at name = 0. */ 1636169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, resvar, integer_zero_node); 1637169689Skan name = make_ssa_name (resvar, stmt); 1638169689Skan TREE_OPERAND (stmt, 0) = name; 1639169689Skan fold_stmt (&stmt); 1640169689Skan tsi = tsi_last (stmts); 1641169689Skan tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING); 1642169689Skan 1643169689Skan /* First do the induction variables. 1644169689Skan at the end, name = name + all the induction variables added 1645169689Skan together. */ 1646169689Skan for (i = 0; VEC_iterate (tree, induction_vars, i, iv); i++) 1647169689Skan { 1648169689Skan if (LLE_COEFFICIENTS (lle)[i] != 0) 1649169689Skan { 1650169689Skan tree newname; 1651169689Skan tree mult; 1652169689Skan tree coeff; 1653169689Skan 1654169689Skan /* mult = induction variable * coefficient. */ 1655169689Skan if (LLE_COEFFICIENTS (lle)[i] == 1) 1656169689Skan { 1657169689Skan mult = VEC_index (tree, induction_vars, i); 1658169689Skan } 1659169689Skan else 1660169689Skan { 1661169689Skan coeff = build_int_cst (type, 1662169689Skan LLE_COEFFICIENTS (lle)[i]); 1663169689Skan mult = fold_build2 (MULT_EXPR, type, iv, coeff); 1664169689Skan } 1665169689Skan 1666169689Skan /* newname = mult */ 1667169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult); 1668169689Skan newname = make_ssa_name (resvar, stmt); 1669169689Skan TREE_OPERAND (stmt, 0) = newname; 1670169689Skan fold_stmt (&stmt); 1671169689Skan tsi = tsi_last (stmts); 1672169689Skan tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING); 1673169689Skan 1674169689Skan /* name = name + newname */ 1675169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, resvar, 1676169689Skan build2 (PLUS_EXPR, type, name, newname)); 1677169689Skan name = make_ssa_name (resvar, stmt); 1678169689Skan TREE_OPERAND (stmt, 0) = name; 1679169689Skan fold_stmt (&stmt); 1680169689Skan tsi = tsi_last (stmts); 1681169689Skan tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING); 1682169689Skan } 1683169689Skan } 1684169689Skan 1685169689Skan /* Handle our invariants. 1686169689Skan At the end, we have name = name + result of adding all multiplied 1687169689Skan invariants. */ 1688169689Skan for (i = 0; VEC_iterate (tree, invariants, i, invar); i++) 1689169689Skan { 1690169689Skan if (LLE_INVARIANT_COEFFICIENTS (lle)[i] != 0) 1691169689Skan { 1692169689Skan tree newname; 1693169689Skan tree mult; 1694169689Skan tree coeff; 1695169689Skan int invcoeff = LLE_INVARIANT_COEFFICIENTS (lle)[i]; 1696169689Skan /* mult = invariant * coefficient */ 1697169689Skan if (invcoeff == 1) 1698169689Skan { 1699169689Skan mult = invar; 1700169689Skan } 1701169689Skan else 1702169689Skan { 1703169689Skan coeff = build_int_cst (type, invcoeff); 1704169689Skan mult = fold_build2 (MULT_EXPR, type, invar, coeff); 1705169689Skan } 1706169689Skan 1707169689Skan /* newname = mult */ 1708169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, resvar, mult); 1709169689Skan newname = make_ssa_name (resvar, stmt); 1710169689Skan TREE_OPERAND (stmt, 0) = newname; 1711169689Skan fold_stmt (&stmt); 1712169689Skan tsi = tsi_last (stmts); 1713169689Skan tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING); 1714169689Skan 1715169689Skan /* name = name + newname */ 1716169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, resvar, 1717169689Skan build2 (PLUS_EXPR, type, name, newname)); 1718169689Skan name = make_ssa_name (resvar, stmt); 1719169689Skan TREE_OPERAND (stmt, 0) = name; 1720169689Skan fold_stmt (&stmt); 1721169689Skan tsi = tsi_last (stmts); 1722169689Skan tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING); 1723169689Skan } 1724169689Skan } 1725169689Skan 1726169689Skan /* Now handle the constant. 1727169689Skan name = name + constant. */ 1728169689Skan if (LLE_CONSTANT (lle) != 0) 1729169689Skan { 1730169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, resvar, 1731169689Skan build2 (PLUS_EXPR, type, name, 1732169689Skan build_int_cst (type, LLE_CONSTANT (lle)))); 1733169689Skan name = make_ssa_name (resvar, stmt); 1734169689Skan TREE_OPERAND (stmt, 0) = name; 1735169689Skan fold_stmt (&stmt); 1736169689Skan tsi = tsi_last (stmts); 1737169689Skan tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING); 1738169689Skan } 1739169689Skan 1740169689Skan /* Now handle the offset. 1741169689Skan name = name + linear offset. */ 1742169689Skan if (LLE_CONSTANT (offset) != 0) 1743169689Skan { 1744169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, resvar, 1745169689Skan build2 (PLUS_EXPR, type, name, 1746169689Skan build_int_cst (type, LLE_CONSTANT (offset)))); 1747169689Skan name = make_ssa_name (resvar, stmt); 1748169689Skan TREE_OPERAND (stmt, 0) = name; 1749169689Skan fold_stmt (&stmt); 1750169689Skan tsi = tsi_last (stmts); 1751169689Skan tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING); 1752169689Skan } 1753169689Skan 1754169689Skan /* Handle any denominator that occurs. */ 1755169689Skan if (LLE_DENOMINATOR (lle) != 1) 1756169689Skan { 1757169689Skan stmt = build_int_cst (type, LLE_DENOMINATOR (lle)); 1758169689Skan stmt = build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR, 1759169689Skan type, name, stmt); 1760169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, resvar, stmt); 1761169689Skan 1762169689Skan /* name = {ceil, floor}(name/denominator) */ 1763169689Skan name = make_ssa_name (resvar, stmt); 1764169689Skan TREE_OPERAND (stmt, 0) = name; 1765169689Skan tsi = tsi_last (stmts); 1766169689Skan tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING); 1767169689Skan } 1768169689Skan VEC_safe_push (tree, heap, results, name); 1769169689Skan } 1770169689Skan 1771169689Skan /* Again, out of laziness, we don't handle this case yet. It's not 1772169689Skan hard, it just hasn't occurred. */ 1773169689Skan gcc_assert (VEC_length (tree, results) <= 2); 1774169689Skan 1775169689Skan /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */ 1776169689Skan if (VEC_length (tree, results) > 1) 1777169689Skan { 1778169689Skan tree op1 = VEC_index (tree, results, 0); 1779169689Skan tree op2 = VEC_index (tree, results, 1); 1780169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, resvar, 1781169689Skan build2 (wrap, type, op1, op2)); 1782169689Skan name = make_ssa_name (resvar, stmt); 1783169689Skan TREE_OPERAND (stmt, 0) = name; 1784169689Skan tsi = tsi_last (stmts); 1785169689Skan tsi_link_after (&tsi, stmt, TSI_CONTINUE_LINKING); 1786169689Skan } 1787169689Skan 1788169689Skan VEC_free (tree, heap, results); 1789169689Skan 1790169689Skan *stmts_to_insert = stmts; 1791169689Skan return name; 1792169689Skan} 1793169689Skan 1794169689Skan/* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to 1795169689Skan it, back into gcc code. This changes the 1796169689Skan loops, their induction variables, and their bodies, so that they 1797169689Skan match the transformed loopnest. 1798169689Skan OLD_LOOPNEST is the loopnest before we've replaced it with the new 1799169689Skan loopnest. 1800169689Skan OLD_IVS is a vector of induction variables from the old loopnest. 1801169689Skan INVARIANTS is a vector of loop invariants from the old loopnest. 1802169689Skan NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with. 1803169689Skan TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get 1804169689Skan NEW_LOOPNEST. */ 1805169689Skan 1806169689Skanvoid 1807169689Skanlambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest, 1808169689Skan VEC(tree,heap) *old_ivs, 1809169689Skan VEC(tree,heap) *invariants, 1810169689Skan lambda_loopnest new_loopnest, 1811169689Skan lambda_trans_matrix transform) 1812169689Skan{ 1813169689Skan struct loop *temp; 1814169689Skan size_t i = 0; 1815169689Skan size_t depth = 0; 1816169689Skan VEC(tree,heap) *new_ivs = NULL; 1817169689Skan tree oldiv; 1818169689Skan 1819169689Skan block_stmt_iterator bsi; 1820169689Skan 1821169689Skan if (dump_file) 1822169689Skan { 1823169689Skan transform = lambda_trans_matrix_inverse (transform); 1824169689Skan fprintf (dump_file, "Inverse of transformation matrix:\n"); 1825169689Skan print_lambda_trans_matrix (dump_file, transform); 1826169689Skan } 1827169689Skan depth = depth_of_nest (old_loopnest); 1828169689Skan temp = old_loopnest; 1829169689Skan 1830169689Skan while (temp) 1831169689Skan { 1832169689Skan lambda_loop newloop; 1833169689Skan basic_block bb; 1834169689Skan edge exit; 1835169689Skan tree ivvar, ivvarinced, exitcond, stmts; 1836169689Skan enum tree_code testtype; 1837169689Skan tree newupperbound, newlowerbound; 1838169689Skan lambda_linear_expression offset; 1839169689Skan tree type; 1840169689Skan bool insert_after; 1841169689Skan tree inc_stmt; 1842169689Skan 1843169689Skan oldiv = VEC_index (tree, old_ivs, i); 1844169689Skan type = TREE_TYPE (oldiv); 1845169689Skan 1846169689Skan /* First, build the new induction variable temporary */ 1847169689Skan 1848169689Skan ivvar = create_tmp_var (type, "lnivtmp"); 1849169689Skan add_referenced_var (ivvar); 1850169689Skan 1851169689Skan VEC_safe_push (tree, heap, new_ivs, ivvar); 1852169689Skan 1853169689Skan newloop = LN_LOOPS (new_loopnest)[i]; 1854169689Skan 1855169689Skan /* Linear offset is a bit tricky to handle. Punt on the unhandled 1856169689Skan cases for now. */ 1857169689Skan offset = LL_LINEAR_OFFSET (newloop); 1858169689Skan 1859169689Skan gcc_assert (LLE_DENOMINATOR (offset) == 1 && 1860169689Skan lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth)); 1861169689Skan 1862169689Skan /* Now build the new lower bounds, and insert the statements 1863169689Skan necessary to generate it on the loop preheader. */ 1864169689Skan newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop), 1865169689Skan LL_LINEAR_OFFSET (newloop), 1866169689Skan type, 1867169689Skan new_ivs, 1868169689Skan invariants, MAX_EXPR, &stmts); 1869169689Skan bsi_insert_on_edge (loop_preheader_edge (temp), stmts); 1870169689Skan bsi_commit_edge_inserts (); 1871169689Skan /* Build the new upper bound and insert its statements in the 1872169689Skan basic block of the exit condition */ 1873169689Skan newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop), 1874169689Skan LL_LINEAR_OFFSET (newloop), 1875169689Skan type, 1876169689Skan new_ivs, 1877169689Skan invariants, MIN_EXPR, &stmts); 1878169689Skan exit = temp->single_exit; 1879169689Skan exitcond = get_loop_exit_condition (temp); 1880169689Skan bb = bb_for_stmt (exitcond); 1881169689Skan bsi = bsi_start (bb); 1882169689Skan bsi_insert_after (&bsi, stmts, BSI_NEW_STMT); 1883169689Skan 1884169689Skan /* Create the new iv. */ 1885169689Skan 1886169689Skan standard_iv_increment_position (temp, &bsi, &insert_after); 1887169689Skan create_iv (newlowerbound, 1888169689Skan build_int_cst (type, LL_STEP (newloop)), 1889169689Skan ivvar, temp, &bsi, insert_after, &ivvar, 1890169689Skan NULL); 1891169689Skan 1892169689Skan /* Unfortunately, the incremented ivvar that create_iv inserted may not 1893169689Skan dominate the block containing the exit condition. 1894169689Skan So we simply create our own incremented iv to use in the new exit 1895169689Skan test, and let redundancy elimination sort it out. */ 1896169689Skan inc_stmt = build2 (PLUS_EXPR, type, 1897169689Skan ivvar, build_int_cst (type, LL_STEP (newloop))); 1898169689Skan inc_stmt = build2 (MODIFY_EXPR, void_type_node, SSA_NAME_VAR (ivvar), 1899169689Skan inc_stmt); 1900169689Skan ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt); 1901169689Skan TREE_OPERAND (inc_stmt, 0) = ivvarinced; 1902169689Skan bsi = bsi_for_stmt (exitcond); 1903169689Skan bsi_insert_before (&bsi, inc_stmt, BSI_SAME_STMT); 1904169689Skan 1905169689Skan /* Replace the exit condition with the new upper bound 1906169689Skan comparison. */ 1907169689Skan 1908169689Skan testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR; 1909169689Skan 1910169689Skan /* We want to build a conditional where true means exit the loop, and 1911169689Skan false means continue the loop. 1912169689Skan So swap the testtype if this isn't the way things are.*/ 1913169689Skan 1914169689Skan if (exit->flags & EDGE_FALSE_VALUE) 1915169689Skan testtype = swap_tree_comparison (testtype); 1916169689Skan 1917169689Skan COND_EXPR_COND (exitcond) = build2 (testtype, 1918169689Skan boolean_type_node, 1919169689Skan newupperbound, ivvarinced); 1920169689Skan update_stmt (exitcond); 1921169689Skan VEC_replace (tree, new_ivs, i, ivvar); 1922169689Skan 1923169689Skan i++; 1924169689Skan temp = temp->inner; 1925169689Skan } 1926169689Skan 1927169689Skan /* Rewrite uses of the old ivs so that they are now specified in terms of 1928169689Skan the new ivs. */ 1929169689Skan 1930169689Skan for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++) 1931169689Skan { 1932169689Skan imm_use_iterator imm_iter; 1933169689Skan use_operand_p use_p; 1934169689Skan tree oldiv_def; 1935169689Skan tree oldiv_stmt = SSA_NAME_DEF_STMT (oldiv); 1936169689Skan tree stmt; 1937169689Skan 1938169689Skan if (TREE_CODE (oldiv_stmt) == PHI_NODE) 1939169689Skan oldiv_def = PHI_RESULT (oldiv_stmt); 1940169689Skan else 1941169689Skan oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF); 1942169689Skan gcc_assert (oldiv_def != NULL_TREE); 1943169689Skan 1944169689Skan FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def) 1945169689Skan { 1946169689Skan tree newiv, stmts; 1947169689Skan lambda_body_vector lbv, newlbv; 1948169689Skan 1949169689Skan gcc_assert (TREE_CODE (stmt) != PHI_NODE); 1950169689Skan 1951169689Skan /* Compute the new expression for the induction 1952169689Skan variable. */ 1953169689Skan depth = VEC_length (tree, new_ivs); 1954169689Skan lbv = lambda_body_vector_new (depth); 1955169689Skan LBV_COEFFICIENTS (lbv)[i] = 1; 1956169689Skan 1957169689Skan newlbv = lambda_body_vector_compute_new (transform, lbv); 1958169689Skan 1959169689Skan newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv), 1960169689Skan new_ivs, &stmts); 1961169689Skan bsi = bsi_for_stmt (stmt); 1962169689Skan /* Insert the statements to build that 1963169689Skan expression. */ 1964169689Skan bsi_insert_before (&bsi, stmts, BSI_SAME_STMT); 1965169689Skan 1966169689Skan FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) 1967169689Skan propagate_value (use_p, newiv); 1968169689Skan update_stmt (stmt); 1969169689Skan } 1970169689Skan } 1971169689Skan VEC_free (tree, heap, new_ivs); 1972169689Skan} 1973169689Skan 1974169689Skan/* Return TRUE if this is not interesting statement from the perspective of 1975169689Skan determining if we have a perfect loop nest. */ 1976169689Skan 1977169689Skanstatic bool 1978169689Skannot_interesting_stmt (tree stmt) 1979169689Skan{ 1980169689Skan /* Note that COND_EXPR's aren't interesting because if they were exiting the 1981169689Skan loop, we would have already failed the number of exits tests. */ 1982169689Skan if (TREE_CODE (stmt) == LABEL_EXPR 1983169689Skan || TREE_CODE (stmt) == GOTO_EXPR 1984169689Skan || TREE_CODE (stmt) == COND_EXPR) 1985169689Skan return true; 1986169689Skan return false; 1987169689Skan} 1988169689Skan 1989169689Skan/* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP. */ 1990169689Skan 1991169689Skanstatic bool 1992169689Skanphi_loop_edge_uses_def (struct loop *loop, tree phi, tree def) 1993169689Skan{ 1994169689Skan int i; 1995169689Skan for (i = 0; i < PHI_NUM_ARGS (phi); i++) 1996169689Skan if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src)) 1997169689Skan if (PHI_ARG_DEF (phi, i) == def) 1998169689Skan return true; 1999169689Skan return false; 2000169689Skan} 2001169689Skan 2002169689Skan/* Return TRUE if STMT is a use of PHI_RESULT. */ 2003169689Skan 2004169689Skanstatic bool 2005169689Skanstmt_uses_phi_result (tree stmt, tree phi_result) 2006169689Skan{ 2007169689Skan tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE); 2008169689Skan 2009169689Skan /* This is conservatively true, because we only want SIMPLE bumpers 2010169689Skan of the form x +- constant for our pass. */ 2011169689Skan return (use == phi_result); 2012169689Skan} 2013169689Skan 2014169689Skan/* STMT is a bumper stmt for LOOP if the version it defines is used in the 2015169689Skan in-loop-edge in a phi node, and the operand it uses is the result of that 2016169689Skan phi node. 2017169689Skan I.E. i_29 = i_3 + 1 2018169689Skan i_3 = PHI (0, i_29); */ 2019169689Skan 2020169689Skanstatic bool 2021169689Skanstmt_is_bumper_for_loop (struct loop *loop, tree stmt) 2022169689Skan{ 2023169689Skan tree use; 2024169689Skan tree def; 2025169689Skan imm_use_iterator iter; 2026169689Skan use_operand_p use_p; 2027169689Skan 2028169689Skan def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF); 2029169689Skan if (!def) 2030169689Skan return false; 2031169689Skan 2032169689Skan FOR_EACH_IMM_USE_FAST (use_p, iter, def) 2033169689Skan { 2034169689Skan use = USE_STMT (use_p); 2035169689Skan if (TREE_CODE (use) == PHI_NODE) 2036169689Skan { 2037169689Skan if (phi_loop_edge_uses_def (loop, use, def)) 2038169689Skan if (stmt_uses_phi_result (stmt, PHI_RESULT (use))) 2039169689Skan return true; 2040169689Skan } 2041169689Skan } 2042169689Skan return false; 2043169689Skan} 2044169689Skan 2045169689Skan 2046169689Skan/* Return true if LOOP is a perfect loop nest. 2047169689Skan Perfect loop nests are those loop nests where all code occurs in the 2048169689Skan innermost loop body. 2049169689Skan If S is a program statement, then 2050169689Skan 2051169689Skan i.e. 2052169689Skan DO I = 1, 20 2053169689Skan S1 2054169689Skan DO J = 1, 20 2055169689Skan ... 2056169689Skan END DO 2057169689Skan END DO 2058169689Skan is not a perfect loop nest because of S1. 2059169689Skan 2060169689Skan DO I = 1, 20 2061169689Skan DO J = 1, 20 2062169689Skan S1 2063169689Skan ... 2064169689Skan END DO 2065169689Skan END DO 2066169689Skan is a perfect loop nest. 2067169689Skan 2068169689Skan Since we don't have high level loops anymore, we basically have to walk our 2069169689Skan statements and ignore those that are there because the loop needs them (IE 2070169689Skan the induction variable increment, and jump back to the top of the loop). */ 2071169689Skan 2072169689Skanbool 2073169689Skanperfect_nest_p (struct loop *loop) 2074169689Skan{ 2075169689Skan basic_block *bbs; 2076169689Skan size_t i; 2077169689Skan tree exit_cond; 2078169689Skan 2079169689Skan if (!loop->inner) 2080169689Skan return true; 2081169689Skan bbs = get_loop_body (loop); 2082169689Skan exit_cond = get_loop_exit_condition (loop); 2083169689Skan for (i = 0; i < loop->num_nodes; i++) 2084169689Skan { 2085169689Skan if (bbs[i]->loop_father == loop) 2086169689Skan { 2087169689Skan block_stmt_iterator bsi; 2088169689Skan for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi)) 2089169689Skan { 2090169689Skan tree stmt = bsi_stmt (bsi); 2091169689Skan if (stmt == exit_cond 2092169689Skan || not_interesting_stmt (stmt) 2093169689Skan || stmt_is_bumper_for_loop (loop, stmt)) 2094169689Skan continue; 2095169689Skan free (bbs); 2096169689Skan return false; 2097169689Skan } 2098169689Skan } 2099169689Skan } 2100169689Skan free (bbs); 2101169689Skan /* See if the inner loops are perfectly nested as well. */ 2102169689Skan if (loop->inner) 2103169689Skan return perfect_nest_p (loop->inner); 2104169689Skan return true; 2105169689Skan} 2106169689Skan 2107169689Skan/* Replace the USES of X in STMT, or uses with the same step as X with Y. 2108169689Skan YINIT is the initial value of Y, REPLACEMENTS is a hash table to 2109169689Skan avoid creating duplicate temporaries and FIRSTBSI is statement 2110169689Skan iterator where new temporaries should be inserted at the beginning 2111169689Skan of body basic block. */ 2112169689Skan 2113169689Skanstatic void 2114169689Skanreplace_uses_equiv_to_x_with_y (struct loop *loop, tree stmt, tree x, 2115169689Skan int xstep, tree y, tree yinit, 2116169689Skan htab_t replacements, 2117169689Skan block_stmt_iterator *firstbsi) 2118169689Skan{ 2119169689Skan ssa_op_iter iter; 2120169689Skan use_operand_p use_p; 2121169689Skan 2122169689Skan FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) 2123169689Skan { 2124169689Skan tree use = USE_FROM_PTR (use_p); 2125169689Skan tree step = NULL_TREE; 2126169689Skan tree scev, init, val, var, setstmt; 2127169689Skan struct tree_map *h, in; 2128169689Skan void **loc; 2129169689Skan 2130169689Skan /* Replace uses of X with Y right away. */ 2131169689Skan if (use == x) 2132169689Skan { 2133169689Skan SET_USE (use_p, y); 2134169689Skan continue; 2135169689Skan } 2136169689Skan 2137169689Skan scev = instantiate_parameters (loop, 2138169689Skan analyze_scalar_evolution (loop, use)); 2139169689Skan 2140169689Skan if (scev == NULL || scev == chrec_dont_know) 2141169689Skan continue; 2142169689Skan 2143169689Skan step = evolution_part_in_loop_num (scev, loop->num); 2144169689Skan if (step == NULL 2145169689Skan || step == chrec_dont_know 2146169689Skan || TREE_CODE (step) != INTEGER_CST 2147169689Skan || int_cst_value (step) != xstep) 2148169689Skan continue; 2149169689Skan 2150169689Skan /* Use REPLACEMENTS hash table to cache already created 2151169689Skan temporaries. */ 2152169689Skan in.hash = htab_hash_pointer (use); 2153169689Skan in.from = use; 2154169689Skan h = htab_find_with_hash (replacements, &in, in.hash); 2155169689Skan if (h != NULL) 2156169689Skan { 2157169689Skan SET_USE (use_p, h->to); 2158169689Skan continue; 2159169689Skan } 2160169689Skan 2161169689Skan /* USE which has the same step as X should be replaced 2162169689Skan with a temporary set to Y + YINIT - INIT. */ 2163169689Skan init = initial_condition_in_loop_num (scev, loop->num); 2164169689Skan gcc_assert (init != NULL && init != chrec_dont_know); 2165169689Skan if (TREE_TYPE (use) == TREE_TYPE (y)) 2166169689Skan { 2167169689Skan val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit); 2168169689Skan val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val); 2169169689Skan if (val == y) 2170169689Skan { 2171169689Skan /* If X has the same type as USE, the same step 2172169689Skan and same initial value, it can be replaced by Y. */ 2173169689Skan SET_USE (use_p, y); 2174169689Skan continue; 2175169689Skan } 2176169689Skan } 2177169689Skan else 2178169689Skan { 2179169689Skan val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit); 2180169689Skan val = fold_convert (TREE_TYPE (use), val); 2181169689Skan val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init); 2182169689Skan } 2183169689Skan 2184169689Skan /* Create a temporary variable and insert it at the beginning 2185169689Skan of the loop body basic block, right after the PHI node 2186169689Skan which sets Y. */ 2187169689Skan var = create_tmp_var (TREE_TYPE (use), "perfecttmp"); 2188169689Skan add_referenced_var (var); 2189169689Skan val = force_gimple_operand_bsi (firstbsi, val, false, NULL); 2190169689Skan setstmt = build2 (MODIFY_EXPR, void_type_node, var, val); 2191169689Skan var = make_ssa_name (var, setstmt); 2192169689Skan TREE_OPERAND (setstmt, 0) = var; 2193169689Skan bsi_insert_before (firstbsi, setstmt, BSI_SAME_STMT); 2194169689Skan update_stmt (setstmt); 2195169689Skan SET_USE (use_p, var); 2196169689Skan h = ggc_alloc (sizeof (struct tree_map)); 2197169689Skan h->hash = in.hash; 2198169689Skan h->from = use; 2199169689Skan h->to = var; 2200169689Skan loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT); 2201169689Skan gcc_assert ((*(struct tree_map **)loc) == NULL); 2202169689Skan *(struct tree_map **) loc = h; 2203169689Skan } 2204169689Skan} 2205169689Skan 2206169689Skan/* Return true if STMT is an exit PHI for LOOP */ 2207169689Skan 2208169689Skanstatic bool 2209169689Skanexit_phi_for_loop_p (struct loop *loop, tree stmt) 2210169689Skan{ 2211169689Skan 2212169689Skan if (TREE_CODE (stmt) != PHI_NODE 2213169689Skan || PHI_NUM_ARGS (stmt) != 1 2214169689Skan || bb_for_stmt (stmt) != loop->single_exit->dest) 2215169689Skan return false; 2216169689Skan 2217169689Skan return true; 2218169689Skan} 2219169689Skan 2220169689Skan/* Return true if STMT can be put back into the loop INNER, by 2221169689Skan copying it to the beginning of that loop and changing the uses. */ 2222169689Skan 2223169689Skanstatic bool 2224169689Skancan_put_in_inner_loop (struct loop *inner, tree stmt) 2225169689Skan{ 2226169689Skan imm_use_iterator imm_iter; 2227169689Skan use_operand_p use_p; 2228169689Skan 2229169689Skan gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR); 2230169689Skan if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS) 2231169689Skan || !expr_invariant_in_loop_p (inner, TREE_OPERAND (stmt, 1))) 2232169689Skan return false; 2233169689Skan 2234169689Skan FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0)) 2235169689Skan { 2236169689Skan if (!exit_phi_for_loop_p (inner, USE_STMT (use_p))) 2237169689Skan { 2238169689Skan basic_block immbb = bb_for_stmt (USE_STMT (use_p)); 2239169689Skan 2240169689Skan if (!flow_bb_inside_loop_p (inner, immbb)) 2241169689Skan return false; 2242169689Skan } 2243169689Skan } 2244169689Skan return true; 2245169689Skan} 2246169689Skan 2247169689Skan/* Return true if STMT can be put *after* the inner loop of LOOP. */ 2248169689Skanstatic bool 2249169689Skancan_put_after_inner_loop (struct loop *loop, tree stmt) 2250169689Skan{ 2251169689Skan imm_use_iterator imm_iter; 2252169689Skan use_operand_p use_p; 2253169689Skan 2254169689Skan if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS)) 2255169689Skan return false; 2256169689Skan 2257169689Skan FOR_EACH_IMM_USE_FAST (use_p, imm_iter, TREE_OPERAND (stmt, 0)) 2258169689Skan { 2259169689Skan if (!exit_phi_for_loop_p (loop, USE_STMT (use_p))) 2260169689Skan { 2261169689Skan basic_block immbb = bb_for_stmt (USE_STMT (use_p)); 2262169689Skan 2263169689Skan if (!dominated_by_p (CDI_DOMINATORS, 2264169689Skan immbb, 2265169689Skan loop->inner->header) 2266169689Skan && !can_put_in_inner_loop (loop->inner, stmt)) 2267169689Skan return false; 2268169689Skan } 2269169689Skan } 2270169689Skan return true; 2271169689Skan} 2272169689Skan 2273169689Skan 2274169689Skan 2275169689Skan/* Return TRUE if LOOP is an imperfect nest that we can convert to a 2276169689Skan perfect one. At the moment, we only handle imperfect nests of 2277169689Skan depth 2, where all of the statements occur after the inner loop. */ 2278169689Skan 2279169689Skanstatic bool 2280169689Skancan_convert_to_perfect_nest (struct loop *loop) 2281169689Skan{ 2282169689Skan basic_block *bbs; 2283169689Skan tree exit_condition, phi; 2284169689Skan size_t i; 2285169689Skan block_stmt_iterator bsi; 2286169689Skan basic_block exitdest; 2287169689Skan 2288169689Skan /* Can't handle triply nested+ loops yet. */ 2289169689Skan if (!loop->inner || loop->inner->inner) 2290169689Skan return false; 2291169689Skan 2292169689Skan bbs = get_loop_body (loop); 2293169689Skan exit_condition = get_loop_exit_condition (loop); 2294169689Skan for (i = 0; i < loop->num_nodes; i++) 2295169689Skan { 2296169689Skan if (bbs[i]->loop_father == loop) 2297169689Skan { 2298169689Skan for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi)) 2299169689Skan { 2300169689Skan tree stmt = bsi_stmt (bsi); 2301169689Skan 2302169689Skan if (stmt == exit_condition 2303169689Skan || not_interesting_stmt (stmt) 2304169689Skan || stmt_is_bumper_for_loop (loop, stmt)) 2305169689Skan continue; 2306169689Skan 2307169689Skan /* If this is a scalar operation that can be put back 2308169689Skan into the inner loop, or after the inner loop, through 2309169689Skan copying, then do so. This works on the theory that 2310169689Skan any amount of scalar code we have to reduplicate 2311169689Skan into or after the loops is less expensive that the 2312169689Skan win we get from rearranging the memory walk 2313169689Skan the loop is doing so that it has better 2314169689Skan cache behavior. */ 2315169689Skan if (TREE_CODE (stmt) == MODIFY_EXPR) 2316169689Skan { 2317169689Skan use_operand_p use_a, use_b; 2318169689Skan imm_use_iterator imm_iter; 2319169689Skan ssa_op_iter op_iter, op_iter1; 2320169689Skan tree op0 = TREE_OPERAND (stmt, 0); 2321169689Skan tree scev = instantiate_parameters 2322169689Skan (loop, analyze_scalar_evolution (loop, op0)); 2323169689Skan 2324169689Skan /* If the IV is simple, it can be duplicated. */ 2325169689Skan if (!automatically_generated_chrec_p (scev)) 2326169689Skan { 2327169689Skan tree step = evolution_part_in_loop_num (scev, loop->num); 2328169689Skan if (step && step != chrec_dont_know 2329169689Skan && TREE_CODE (step) == INTEGER_CST) 2330169689Skan continue; 2331169689Skan } 2332169689Skan 2333169689Skan /* The statement should not define a variable used 2334169689Skan in the inner loop. */ 2335169689Skan if (TREE_CODE (op0) == SSA_NAME) 2336169689Skan FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0) 2337169689Skan if (bb_for_stmt (USE_STMT (use_a))->loop_father 2338169689Skan == loop->inner) 2339169689Skan goto fail; 2340169689Skan 2341169689Skan FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE) 2342169689Skan { 2343169689Skan tree node, op = USE_FROM_PTR (use_a); 2344169689Skan 2345169689Skan /* The variables should not be used in both loops. */ 2346169689Skan FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op) 2347169689Skan if (bb_for_stmt (USE_STMT (use_b))->loop_father 2348169689Skan == loop->inner) 2349169689Skan goto fail; 2350169689Skan 2351169689Skan /* The statement should not use the value of a 2352169689Skan scalar that was modified in the loop. */ 2353169689Skan node = SSA_NAME_DEF_STMT (op); 2354169689Skan if (TREE_CODE (node) == PHI_NODE) 2355169689Skan FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE) 2356169689Skan { 2357169689Skan tree arg = USE_FROM_PTR (use_b); 2358169689Skan 2359169689Skan if (TREE_CODE (arg) == SSA_NAME) 2360169689Skan { 2361169689Skan tree arg_stmt = SSA_NAME_DEF_STMT (arg); 2362169689Skan 2363169689Skan if (bb_for_stmt (arg_stmt)->loop_father 2364169689Skan == loop->inner) 2365169689Skan goto fail; 2366169689Skan } 2367169689Skan } 2368169689Skan } 2369169689Skan 2370169689Skan if (can_put_in_inner_loop (loop->inner, stmt) 2371169689Skan || can_put_after_inner_loop (loop, stmt)) 2372169689Skan continue; 2373169689Skan } 2374169689Skan 2375169689Skan /* Otherwise, if the bb of a statement we care about isn't 2376169689Skan dominated by the header of the inner loop, then we can't 2377169689Skan handle this case right now. This test ensures that the 2378169689Skan statement comes completely *after* the inner loop. */ 2379169689Skan if (!dominated_by_p (CDI_DOMINATORS, 2380169689Skan bb_for_stmt (stmt), 2381169689Skan loop->inner->header)) 2382169689Skan goto fail; 2383169689Skan } 2384169689Skan } 2385169689Skan } 2386169689Skan 2387169689Skan /* We also need to make sure the loop exit only has simple copy phis in it, 2388169689Skan otherwise we don't know how to transform it into a perfect nest right 2389169689Skan now. */ 2390169689Skan exitdest = loop->single_exit->dest; 2391169689Skan 2392169689Skan for (phi = phi_nodes (exitdest); phi; phi = PHI_CHAIN (phi)) 2393169689Skan if (PHI_NUM_ARGS (phi) != 1) 2394169689Skan goto fail; 2395169689Skan 2396169689Skan free (bbs); 2397169689Skan return true; 2398169689Skan 2399169689Skan fail: 2400169689Skan free (bbs); 2401169689Skan return false; 2402169689Skan} 2403169689Skan 2404169689Skan/* Transform the loop nest into a perfect nest, if possible. 2405169689Skan LOOPS is the current struct loops * 2406169689Skan LOOP is the loop nest to transform into a perfect nest 2407169689Skan LBOUNDS are the lower bounds for the loops to transform 2408169689Skan UBOUNDS are the upper bounds for the loops to transform 2409169689Skan STEPS is the STEPS for the loops to transform. 2410169689Skan LOOPIVS is the induction variables for the loops to transform. 2411169689Skan 2412169689Skan Basically, for the case of 2413169689Skan 2414169689Skan FOR (i = 0; i < 50; i++) 2415169689Skan { 2416169689Skan FOR (j =0; j < 50; j++) 2417169689Skan { 2418169689Skan <whatever> 2419169689Skan } 2420169689Skan <some code> 2421169689Skan } 2422169689Skan 2423169689Skan This function will transform it into a perfect loop nest by splitting the 2424169689Skan outer loop into two loops, like so: 2425169689Skan 2426169689Skan FOR (i = 0; i < 50; i++) 2427169689Skan { 2428169689Skan FOR (j = 0; j < 50; j++) 2429169689Skan { 2430169689Skan <whatever> 2431169689Skan } 2432169689Skan } 2433169689Skan 2434169689Skan FOR (i = 0; i < 50; i ++) 2435169689Skan { 2436169689Skan <some code> 2437169689Skan } 2438169689Skan 2439169689Skan Return FALSE if we can't make this loop into a perfect nest. */ 2440169689Skan 2441169689Skanstatic bool 2442169689Skanperfect_nestify (struct loops *loops, 2443169689Skan struct loop *loop, 2444169689Skan VEC(tree,heap) *lbounds, 2445169689Skan VEC(tree,heap) *ubounds, 2446169689Skan VEC(int,heap) *steps, 2447169689Skan VEC(tree,heap) *loopivs) 2448169689Skan{ 2449169689Skan basic_block *bbs; 2450169689Skan tree exit_condition; 2451169689Skan tree then_label, else_label, cond_stmt; 2452169689Skan basic_block preheaderbb, headerbb, bodybb, latchbb, olddest; 2453169689Skan int i; 2454169689Skan block_stmt_iterator bsi, firstbsi; 2455169689Skan bool insert_after; 2456169689Skan edge e; 2457169689Skan struct loop *newloop; 2458169689Skan tree phi; 2459169689Skan tree uboundvar; 2460169689Skan tree stmt; 2461169689Skan tree oldivvar, ivvar, ivvarinced; 2462169689Skan VEC(tree,heap) *phis = NULL; 2463169689Skan htab_t replacements = NULL; 2464169689Skan 2465169689Skan /* Create the new loop. */ 2466169689Skan olddest = loop->single_exit->dest; 2467169689Skan preheaderbb = loop_split_edge_with (loop->single_exit, NULL); 2468169689Skan headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); 2469169689Skan 2470169689Skan /* Push the exit phi nodes that we are moving. */ 2471169689Skan for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi)) 2472169689Skan { 2473169689Skan VEC_reserve (tree, heap, phis, 2); 2474169689Skan VEC_quick_push (tree, phis, PHI_RESULT (phi)); 2475169689Skan VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0)); 2476169689Skan } 2477169689Skan e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb); 2478169689Skan 2479169689Skan /* Remove the exit phis from the old basic block. Make sure to set 2480169689Skan PHI_RESULT to null so it doesn't get released. */ 2481169689Skan while (phi_nodes (olddest) != NULL) 2482169689Skan { 2483169689Skan SET_PHI_RESULT (phi_nodes (olddest), NULL); 2484169689Skan remove_phi_node (phi_nodes (olddest), NULL); 2485169689Skan } 2486169689Skan 2487169689Skan /* and add them back to the new basic block. */ 2488169689Skan while (VEC_length (tree, phis) != 0) 2489169689Skan { 2490169689Skan tree def; 2491169689Skan tree phiname; 2492169689Skan def = VEC_pop (tree, phis); 2493169689Skan phiname = VEC_pop (tree, phis); 2494169689Skan phi = create_phi_node (phiname, preheaderbb); 2495169689Skan add_phi_arg (phi, def, single_pred_edge (preheaderbb)); 2496169689Skan } 2497169689Skan flush_pending_stmts (e); 2498169689Skan VEC_free (tree, heap, phis); 2499169689Skan 2500169689Skan bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); 2501169689Skan latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); 2502169689Skan make_edge (headerbb, bodybb, EDGE_FALLTHRU); 2503169689Skan then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb)); 2504169689Skan else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest)); 2505169689Skan cond_stmt = build3 (COND_EXPR, void_type_node, 2506169689Skan build2 (NE_EXPR, boolean_type_node, 2507169689Skan integer_one_node, 2508169689Skan integer_zero_node), 2509169689Skan then_label, else_label); 2510169689Skan bsi = bsi_start (bodybb); 2511169689Skan bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT); 2512169689Skan e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE); 2513169689Skan make_edge (bodybb, latchbb, EDGE_TRUE_VALUE); 2514169689Skan make_edge (latchbb, headerbb, EDGE_FALLTHRU); 2515169689Skan 2516169689Skan /* Update the loop structures. */ 2517169689Skan newloop = duplicate_loop (loops, loop, olddest->loop_father); 2518169689Skan newloop->header = headerbb; 2519169689Skan newloop->latch = latchbb; 2520169689Skan newloop->single_exit = e; 2521169689Skan add_bb_to_loop (latchbb, newloop); 2522169689Skan add_bb_to_loop (bodybb, newloop); 2523169689Skan add_bb_to_loop (headerbb, newloop); 2524169689Skan set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb); 2525169689Skan set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb); 2526169689Skan set_immediate_dominator (CDI_DOMINATORS, preheaderbb, 2527169689Skan loop->single_exit->src); 2528169689Skan set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb); 2529169689Skan set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb); 2530169689Skan /* Create the new iv. */ 2531169689Skan oldivvar = VEC_index (tree, loopivs, 0); 2532169689Skan ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv"); 2533169689Skan add_referenced_var (ivvar); 2534169689Skan standard_iv_increment_position (newloop, &bsi, &insert_after); 2535169689Skan create_iv (VEC_index (tree, lbounds, 0), 2536169689Skan build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)), 2537169689Skan ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced); 2538169689Skan 2539169689Skan /* Create the new upper bound. This may be not just a variable, so we copy 2540169689Skan it to one just in case. */ 2541169689Skan 2542169689Skan exit_condition = get_loop_exit_condition (newloop); 2543169689Skan uboundvar = create_tmp_var (integer_type_node, "uboundvar"); 2544169689Skan add_referenced_var (uboundvar); 2545169689Skan stmt = build2 (MODIFY_EXPR, void_type_node, uboundvar, 2546169689Skan VEC_index (tree, ubounds, 0)); 2547169689Skan uboundvar = make_ssa_name (uboundvar, stmt); 2548169689Skan TREE_OPERAND (stmt, 0) = uboundvar; 2549169689Skan 2550169689Skan if (insert_after) 2551169689Skan bsi_insert_after (&bsi, stmt, BSI_SAME_STMT); 2552169689Skan else 2553169689Skan bsi_insert_before (&bsi, stmt, BSI_SAME_STMT); 2554169689Skan update_stmt (stmt); 2555169689Skan COND_EXPR_COND (exit_condition) = build2 (GE_EXPR, 2556169689Skan boolean_type_node, 2557169689Skan uboundvar, 2558169689Skan ivvarinced); 2559169689Skan update_stmt (exit_condition); 2560169689Skan replacements = htab_create_ggc (20, tree_map_hash, 2561169689Skan tree_map_eq, NULL); 2562169689Skan bbs = get_loop_body_in_dom_order (loop); 2563169689Skan /* Now move the statements, and replace the induction variable in the moved 2564169689Skan statements with the correct loop induction variable. */ 2565169689Skan oldivvar = VEC_index (tree, loopivs, 0); 2566169689Skan firstbsi = bsi_start (bodybb); 2567169689Skan for (i = loop->num_nodes - 1; i >= 0 ; i--) 2568169689Skan { 2569169689Skan block_stmt_iterator tobsi = bsi_last (bodybb); 2570169689Skan if (bbs[i]->loop_father == loop) 2571169689Skan { 2572169689Skan /* If this is true, we are *before* the inner loop. 2573169689Skan If this isn't true, we are *after* it. 2574169689Skan 2575169689Skan The only time can_convert_to_perfect_nest returns true when we 2576169689Skan have statements before the inner loop is if they can be moved 2577169689Skan into the inner loop. 2578169689Skan 2579169689Skan The only time can_convert_to_perfect_nest returns true when we 2580169689Skan have statements after the inner loop is if they can be moved into 2581169689Skan the new split loop. */ 2582169689Skan 2583169689Skan if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i])) 2584169689Skan { 2585169689Skan block_stmt_iterator header_bsi 2586169689Skan = bsi_after_labels (loop->inner->header); 2587169689Skan 2588169689Skan for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);) 2589169689Skan { 2590169689Skan tree stmt = bsi_stmt (bsi); 2591169689Skan 2592169689Skan if (stmt == exit_condition 2593169689Skan || not_interesting_stmt (stmt) 2594169689Skan || stmt_is_bumper_for_loop (loop, stmt)) 2595169689Skan { 2596169689Skan bsi_next (&bsi); 2597169689Skan continue; 2598169689Skan } 2599169689Skan 2600169689Skan bsi_move_before (&bsi, &header_bsi); 2601169689Skan } 2602169689Skan } 2603169689Skan else 2604169689Skan { 2605169689Skan /* Note that the bsi only needs to be explicitly incremented 2606169689Skan when we don't move something, since it is automatically 2607169689Skan incremented when we do. */ 2608169689Skan for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);) 2609169689Skan { 2610169689Skan ssa_op_iter i; 2611169689Skan tree n, stmt = bsi_stmt (bsi); 2612169689Skan 2613169689Skan if (stmt == exit_condition 2614169689Skan || not_interesting_stmt (stmt) 2615169689Skan || stmt_is_bumper_for_loop (loop, stmt)) 2616169689Skan { 2617169689Skan bsi_next (&bsi); 2618169689Skan continue; 2619169689Skan } 2620169689Skan 2621169689Skan replace_uses_equiv_to_x_with_y 2622169689Skan (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar, 2623169689Skan VEC_index (tree, lbounds, 0), replacements, &firstbsi); 2624169689Skan 2625169689Skan bsi_move_before (&bsi, &tobsi); 2626169689Skan 2627169689Skan /* If the statement has any virtual operands, they may 2628169689Skan need to be rewired because the original loop may 2629169689Skan still reference them. */ 2630169689Skan FOR_EACH_SSA_TREE_OPERAND (n, stmt, i, SSA_OP_ALL_VIRTUALS) 2631169689Skan mark_sym_for_renaming (SSA_NAME_VAR (n)); 2632169689Skan } 2633169689Skan } 2634169689Skan 2635169689Skan } 2636169689Skan } 2637169689Skan 2638169689Skan free (bbs); 2639169689Skan htab_delete (replacements); 2640169689Skan return perfect_nest_p (loop); 2641169689Skan} 2642169689Skan 2643169689Skan/* Return true if TRANS is a legal transformation matrix that respects 2644169689Skan the dependence vectors in DISTS and DIRS. The conservative answer 2645169689Skan is false. 2646169689Skan 2647169689Skan "Wolfe proves that a unimodular transformation represented by the 2648169689Skan matrix T is legal when applied to a loop nest with a set of 2649169689Skan lexicographically non-negative distance vectors RDG if and only if 2650169689Skan for each vector d in RDG, (T.d >= 0) is lexicographically positive. 2651169689Skan i.e.: if and only if it transforms the lexicographically positive 2652169689Skan distance vectors to lexicographically positive vectors. Note that 2653169689Skan a unimodular matrix must transform the zero vector (and only it) to 2654169689Skan the zero vector." S.Muchnick. */ 2655169689Skan 2656169689Skanbool 2657169689Skanlambda_transform_legal_p (lambda_trans_matrix trans, 2658169689Skan int nb_loops, 2659169689Skan VEC (ddr_p, heap) *dependence_relations) 2660169689Skan{ 2661169689Skan unsigned int i, j; 2662169689Skan lambda_vector distres; 2663169689Skan struct data_dependence_relation *ddr; 2664169689Skan 2665169689Skan gcc_assert (LTM_COLSIZE (trans) == nb_loops 2666169689Skan && LTM_ROWSIZE (trans) == nb_loops); 2667169689Skan 2668169689Skan /* When there is an unknown relation in the dependence_relations, we 2669169689Skan know that it is no worth looking at this loop nest: give up. */ 2670169689Skan ddr = VEC_index (ddr_p, dependence_relations, 0); 2671169689Skan if (ddr == NULL) 2672169689Skan return true; 2673169689Skan if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 2674169689Skan return false; 2675169689Skan 2676169689Skan distres = lambda_vector_new (nb_loops); 2677169689Skan 2678169689Skan /* For each distance vector in the dependence graph. */ 2679169689Skan for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++) 2680169689Skan { 2681169689Skan /* Don't care about relations for which we know that there is no 2682169689Skan dependence, nor about read-read (aka. output-dependences): 2683169689Skan these data accesses can happen in any order. */ 2684169689Skan if (DDR_ARE_DEPENDENT (ddr) == chrec_known 2685169689Skan || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr)))) 2686169689Skan continue; 2687169689Skan 2688169689Skan /* Conservatively answer: "this transformation is not valid". */ 2689169689Skan if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 2690169689Skan return false; 2691169689Skan 2692169689Skan /* If the dependence could not be captured by a distance vector, 2693169689Skan conservatively answer that the transform is not valid. */ 2694169689Skan if (DDR_NUM_DIST_VECTS (ddr) == 0) 2695169689Skan return false; 2696169689Skan 2697169689Skan /* Compute trans.dist_vect */ 2698169689Skan for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++) 2699169689Skan { 2700169689Skan lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops, 2701169689Skan DDR_DIST_VECT (ddr, j), distres); 2702169689Skan 2703169689Skan if (!lambda_vector_lexico_pos (distres, nb_loops)) 2704169689Skan return false; 2705169689Skan } 2706169689Skan } 2707169689Skan return true; 2708169689Skan} 2709