1169689Skan/* Linear Loop transforms 2169689Skan Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. 3169689Skan Contributed by Daniel Berlin <dberlin@dberlin.org>. 4169689Skan 5169689SkanThis file is part of GCC. 6169689Skan 7169689SkanGCC is free software; you can redistribute it and/or modify it under 8169689Skanthe terms of the GNU General Public License as published by the Free 9169689SkanSoftware Foundation; either version 2, or (at your option) any later 10169689Skanversion. 11169689Skan 12169689SkanGCC is distributed in the hope that it will be useful, but WITHOUT ANY 13169689SkanWARRANTY; without even the implied warranty of MERCHANTABILITY or 14169689SkanFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15169689Skanfor more details. 16169689Skan 17169689SkanYou should have received a copy of the GNU General Public License 18169689Skanalong with GCC; see the file COPYING. If not, write to the Free 19169689SkanSoftware Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 20169689Skan02110-1301, USA. */ 21169689Skan 22169689Skan 23169689Skan#include "config.h" 24169689Skan#include "system.h" 25169689Skan#include "coretypes.h" 26169689Skan#include "tm.h" 27169689Skan#include "ggc.h" 28169689Skan#include "tree.h" 29169689Skan#include "target.h" 30169689Skan 31169689Skan#include "rtl.h" 32169689Skan#include "basic-block.h" 33169689Skan#include "diagnostic.h" 34169689Skan#include "tree-flow.h" 35169689Skan#include "tree-dump.h" 36169689Skan#include "timevar.h" 37169689Skan#include "cfgloop.h" 38169689Skan#include "expr.h" 39169689Skan#include "optabs.h" 40169689Skan#include "tree-chrec.h" 41169689Skan#include "tree-data-ref.h" 42169689Skan#include "tree-scalar-evolution.h" 43169689Skan#include "tree-pass.h" 44169689Skan#include "lambda.h" 45169689Skan 46169689Skan/* Linear loop transforms include any composition of interchange, 47169689Skan scaling, skewing, and reversal. They are used to change the 48169689Skan iteration order of loop nests in order to optimize data locality of 49169689Skan traversals, or remove dependences that prevent 50169689Skan parallelization/vectorization/etc. 51169689Skan 52169689Skan TODO: Determine reuse vectors/matrix and use it to determine optimal 53169689Skan transform matrix for locality purposes. 54169689Skan TODO: Completion of partial transforms. */ 55169689Skan 56169689Skan/* Gather statistics for loop interchange. LOOP is the loop being 57169689Skan considered. The first loop in the considered loop nest is 58169689Skan FIRST_LOOP, and consequently, the index of the considered loop is 59169689Skan obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH 60169689Skan 61169689Skan Initializes: 62169689Skan - DEPENDENCE_STEPS the sum of all the data dependence distances 63169689Skan carried by loop LOOP, 64169689Skan 65169689Skan - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations 66169689Skan for which the loop LOOP is not carrying any dependence, 67169689Skan 68169689Skan - ACCESS_STRIDES the sum of all the strides in LOOP. 69169689Skan 70169689Skan Example: for the following loop, 71169689Skan 72169689Skan | loop_1 runs 1335 times 73169689Skan | loop_2 runs 1335 times 74169689Skan | A[{{0, +, 1}_1, +, 1335}_2] 75169689Skan | B[{{0, +, 1}_1, +, 1335}_2] 76169689Skan | endloop_2 77169689Skan | A[{0, +, 1336}_1] 78169689Skan | endloop_1 79169689Skan 80169689Skan gather_interchange_stats (in loop_1) will return 81169689Skan DEPENDENCE_STEPS = 3002 82169689Skan NB_DEPS_NOT_CARRIED_BY_LOOP = 5 83169689Skan ACCESS_STRIDES = 10694 84169689Skan 85169689Skan gather_interchange_stats (in loop_2) will return 86169689Skan DEPENDENCE_STEPS = 3000 87169689Skan NB_DEPS_NOT_CARRIED_BY_LOOP = 7 88169689Skan ACCESS_STRIDES = 8010 89169689Skan*/ 90169689Skan 91169689Skanstatic void 92169689Skangather_interchange_stats (VEC (ddr_p, heap) *dependence_relations, 93169689Skan VEC (data_reference_p, heap) *datarefs, 94169689Skan struct loop *loop, 95169689Skan struct loop *first_loop, 96169689Skan unsigned int *dependence_steps, 97169689Skan unsigned int *nb_deps_not_carried_by_loop, 98169689Skan unsigned int *access_strides) 99169689Skan{ 100169689Skan unsigned int i, j; 101169689Skan struct data_dependence_relation *ddr; 102169689Skan struct data_reference *dr; 103169689Skan 104169689Skan *dependence_steps = 0; 105169689Skan *nb_deps_not_carried_by_loop = 0; 106169689Skan *access_strides = 0; 107169689Skan 108169689Skan for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++) 109169689Skan { 110169689Skan /* If we don't know anything about this dependence, or the distance 111169689Skan vector is NULL, or there is no dependence, then there is no reuse of 112169689Skan data. */ 113169689Skan if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know 114169689Skan || DDR_ARE_DEPENDENT (ddr) == chrec_known 115169689Skan || DDR_NUM_DIST_VECTS (ddr) == 0) 116169689Skan continue; 117169689Skan 118169689Skan for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++) 119169689Skan { 120169689Skan int dist = DDR_DIST_VECT (ddr, j)[loop->depth - first_loop->depth]; 121169689Skan 122169689Skan if (dist == 0) 123169689Skan (*nb_deps_not_carried_by_loop) += 1; 124169689Skan 125169689Skan else if (dist < 0) 126169689Skan (*dependence_steps) += -dist; 127169689Skan 128169689Skan else 129169689Skan (*dependence_steps) += dist; 130169689Skan } 131169689Skan } 132169689Skan 133169689Skan /* Compute the access strides. */ 134169689Skan for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) 135169689Skan { 136169689Skan unsigned int it; 137169689Skan tree stmt = DR_STMT (dr); 138169689Skan struct loop *stmt_loop = loop_containing_stmt (stmt); 139169689Skan struct loop *inner_loop = first_loop->inner; 140169689Skan 141169689Skan if (inner_loop != stmt_loop 142169689Skan && !flow_loop_nested_p (inner_loop, stmt_loop)) 143169689Skan continue; 144169689Skan for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++) 145169689Skan { 146169689Skan tree chrec = DR_ACCESS_FN (dr, it); 147169689Skan tree tstride = evolution_part_in_loop_num 148169689Skan (chrec, loop->num); 149169689Skan 150169689Skan if (tstride == NULL_TREE 151169689Skan || TREE_CODE (tstride) != INTEGER_CST) 152169689Skan continue; 153169689Skan 154169689Skan (*access_strides) += int_cst_value (tstride); 155169689Skan } 156169689Skan } 157169689Skan} 158169689Skan 159169689Skan/* Attempt to apply interchange transformations to TRANS to maximize the 160169689Skan spatial and temporal locality of the loop. 161169689Skan Returns the new transform matrix. The smaller the reuse vector 162169689Skan distances in the inner loops, the fewer the cache misses. 163169689Skan FIRST_LOOP is the loop->num of the first loop in the analyzed loop 164169689Skan nest. */ 165169689Skan 166169689Skan 167169689Skanstatic lambda_trans_matrix 168169689Skantry_interchange_loops (lambda_trans_matrix trans, 169169689Skan unsigned int depth, 170169689Skan VEC (ddr_p, heap) *dependence_relations, 171169689Skan VEC (data_reference_p, heap) *datarefs, 172169689Skan struct loop *first_loop) 173169689Skan{ 174169689Skan struct loop *loop_i; 175169689Skan struct loop *loop_j; 176169689Skan unsigned int dependence_steps_i, dependence_steps_j; 177169689Skan unsigned int access_strides_i, access_strides_j; 178169689Skan unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j; 179169689Skan struct data_dependence_relation *ddr; 180169689Skan 181169689Skan if (VEC_length (ddr_p, dependence_relations) == 0) 182169689Skan return trans; 183169689Skan 184169689Skan /* When there is an unknown relation in the dependence_relations, we 185169689Skan know that it is no worth looking at this loop nest: give up. */ 186169689Skan ddr = VEC_index (ddr_p, dependence_relations, 0); 187169689Skan if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 188169689Skan return trans; 189169689Skan 190169689Skan /* LOOP_I is always the outer loop. */ 191169689Skan for (loop_j = first_loop->inner; 192169689Skan loop_j; 193169689Skan loop_j = loop_j->inner) 194169689Skan for (loop_i = first_loop; 195169689Skan loop_i->depth < loop_j->depth; 196169689Skan loop_i = loop_i->inner) 197169689Skan { 198169689Skan gather_interchange_stats (dependence_relations, datarefs, 199169689Skan loop_i, first_loop, 200169689Skan &dependence_steps_i, 201169689Skan &nb_deps_not_carried_by_i, 202169689Skan &access_strides_i); 203169689Skan gather_interchange_stats (dependence_relations, datarefs, 204169689Skan loop_j, first_loop, 205169689Skan &dependence_steps_j, 206169689Skan &nb_deps_not_carried_by_j, 207169689Skan &access_strides_j); 208169689Skan 209169689Skan /* Heuristics for loop interchange profitability: 210169689Skan 211169689Skan 1. (spatial locality) Inner loops should have smallest 212169689Skan dependence steps. 213169689Skan 214169689Skan 2. (spatial locality) Inner loops should contain more 215169689Skan dependence relations not carried by the loop. 216169689Skan 217169689Skan 3. (temporal locality) Inner loops should have smallest 218169689Skan array access strides. 219169689Skan */ 220169689Skan if (dependence_steps_i < dependence_steps_j 221169689Skan || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j 222169689Skan || access_strides_i < access_strides_j) 223169689Skan { 224169689Skan lambda_matrix_row_exchange (LTM_MATRIX (trans), 225169689Skan loop_i->depth - first_loop->depth, 226169689Skan loop_j->depth - first_loop->depth); 227169689Skan /* Validate the resulting matrix. When the transformation 228169689Skan is not valid, reverse to the previous transformation. */ 229169689Skan if (!lambda_transform_legal_p (trans, depth, dependence_relations)) 230169689Skan lambda_matrix_row_exchange (LTM_MATRIX (trans), 231169689Skan loop_i->depth - first_loop->depth, 232169689Skan loop_j->depth - first_loop->depth); 233169689Skan } 234169689Skan } 235169689Skan 236169689Skan return trans; 237169689Skan} 238169689Skan 239169689Skan/* Perform a set of linear transforms on LOOPS. */ 240169689Skan 241169689Skanvoid 242169689Skanlinear_transform_loops (struct loops *loops) 243169689Skan{ 244169689Skan bool modified = false; 245169689Skan unsigned int i; 246169689Skan VEC(tree,heap) *oldivs = NULL; 247169689Skan VEC(tree,heap) *invariants = NULL; 248169689Skan 249169689Skan for (i = 1; i < loops->num; i++) 250169689Skan { 251169689Skan unsigned int depth = 0; 252169689Skan VEC (ddr_p, heap) *dependence_relations; 253169689Skan VEC (data_reference_p, heap) *datarefs; 254169689Skan struct loop *loop_nest = loops->parray[i]; 255169689Skan struct loop *temp; 256169689Skan lambda_loopnest before, after; 257169689Skan lambda_trans_matrix trans; 258169689Skan bool problem = false; 259169689Skan /* If it's not a loop nest, we don't want it. 260169689Skan We also don't handle sibling loops properly, 261169689Skan which are loops of the following form: 262169689Skan for (i = 0; i < 50; i++) 263169689Skan { 264169689Skan for (j = 0; j < 50; j++) 265169689Skan { 266169689Skan ... 267169689Skan } 268169689Skan for (j = 0; j < 50; j++) 269169689Skan { 270169689Skan ... 271169689Skan } 272169689Skan } */ 273169689Skan if (!loop_nest || !loop_nest->inner || !loop_nest->single_exit) 274169689Skan continue; 275169689Skan VEC_truncate (tree, oldivs, 0); 276169689Skan VEC_truncate (tree, invariants, 0); 277169689Skan depth = 1; 278169689Skan for (temp = loop_nest->inner; temp; temp = temp->inner) 279169689Skan { 280169689Skan /* If we have a sibling loop or multiple exit edges, jump ship. */ 281169689Skan if (temp->next || !temp->single_exit) 282169689Skan { 283169689Skan problem = true; 284169689Skan break; 285169689Skan } 286169689Skan depth ++; 287169689Skan } 288169689Skan if (problem) 289169689Skan continue; 290169689Skan 291169689Skan /* Analyze data references and dependence relations using scev. */ 292169689Skan 293169689Skan datarefs = VEC_alloc (data_reference_p, heap, 10); 294169689Skan dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10); 295169689Skan compute_data_dependences_for_loop (loop_nest, true, &datarefs, 296169689Skan &dependence_relations); 297169689Skan 298169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 299169689Skan dump_ddrs (dump_file, dependence_relations); 300169689Skan 301169689Skan /* Build the transformation matrix. */ 302169689Skan trans = lambda_trans_matrix_new (depth, depth); 303169689Skan lambda_matrix_id (LTM_MATRIX (trans), depth); 304169689Skan trans = try_interchange_loops (trans, depth, dependence_relations, 305169689Skan datarefs, loop_nest); 306169689Skan 307169689Skan if (lambda_trans_matrix_id_p (trans)) 308169689Skan { 309169689Skan if (dump_file) 310169689Skan fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n"); 311169689Skan goto free_and_continue; 312169689Skan } 313169689Skan 314169689Skan /* Check whether the transformation is legal. */ 315169689Skan if (!lambda_transform_legal_p (trans, depth, dependence_relations)) 316169689Skan { 317169689Skan if (dump_file) 318169689Skan fprintf (dump_file, "Can't transform loop, transform is illegal:\n"); 319169689Skan goto free_and_continue; 320169689Skan } 321169689Skan 322169689Skan before = gcc_loopnest_to_lambda_loopnest (loops, loop_nest, &oldivs, 323169689Skan &invariants); 324169689Skan 325169689Skan if (!before) 326169689Skan goto free_and_continue; 327169689Skan 328169689Skan if (dump_file) 329169689Skan { 330169689Skan fprintf (dump_file, "Before:\n"); 331169689Skan print_lambda_loopnest (dump_file, before, 'i'); 332169689Skan } 333169689Skan 334169689Skan after = lambda_loopnest_transform (before, trans); 335169689Skan 336169689Skan if (dump_file) 337169689Skan { 338169689Skan fprintf (dump_file, "After:\n"); 339169689Skan print_lambda_loopnest (dump_file, after, 'u'); 340169689Skan } 341169689Skan 342169689Skan lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants, 343169689Skan after, trans); 344169689Skan modified = true; 345169689Skan 346169689Skan if (dump_file) 347169689Skan fprintf (dump_file, "Successfully transformed loop.\n"); 348169689Skan 349169689Skan free_and_continue: 350169689Skan free_dependence_relations (dependence_relations); 351169689Skan free_data_refs (datarefs); 352169689Skan } 353169689Skan 354169689Skan VEC_free (tree, heap, oldivs); 355169689Skan VEC_free (tree, heap, invariants); 356169689Skan scev_reset (); 357169689Skan 358169689Skan if (modified) 359169689Skan rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi); 360169689Skan} 361