1/* Linear Loop transforms 2 Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc. 3 Contributed by Daniel Berlin <dberlin@dberlin.org>. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 2, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING. If not, write to the Free 19Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 2002110-1301, USA. */ 21 22 23#include "config.h" 24#include "system.h" 25#include "coretypes.h" 26#include "tm.h" 27#include "ggc.h" 28#include "tree.h" 29#include "target.h" 30 31#include "rtl.h" 32#include "basic-block.h" 33#include "diagnostic.h" 34#include "tree-flow.h" 35#include "tree-dump.h" 36#include "timevar.h" 37#include "cfgloop.h" 38#include "expr.h" 39#include "optabs.h" 40#include "tree-chrec.h" 41#include "tree-data-ref.h" 42#include "tree-scalar-evolution.h" 43#include "tree-pass.h" 44#include "varray.h" 45#include "lambda.h" 46 47/* Linear loop transforms include any composition of interchange, 48 scaling, skewing, and reversal. They are used to change the 49 iteration order of loop nests in order to optimize data locality of 50 traversals, or remove dependences that prevent 51 parallelization/vectorization/etc. 52 53 TODO: Determine reuse vectors/matrix and use it to determine optimal 54 transform matrix for locality purposes. 55 TODO: Completion of partial transforms. */ 56 57/* Gather statistics for loop interchange. LOOP is the loop being 58 considered. The first loop in the considered loop nest is 59 FIRST_LOOP, and consequently, the index of the considered loop is 60 obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH 61 62 Initializes: 63 - DEPENDENCE_STEPS the sum of all the data dependence distances 64 carried by loop LOOP, 65 66 - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations 67 for which the loop LOOP is not carrying any dependence, 68 69 - ACCESS_STRIDES the sum of all the strides in LOOP. 70 71 Example: for the following loop, 72 73 | loop_1 runs 1335 times 74 | loop_2 runs 1335 times 75 | A[{{0, +, 1}_1, +, 1335}_2] 76 | B[{{0, +, 1}_1, +, 1335}_2] 77 | endloop_2 78 | A[{0, +, 1336}_1] 79 | endloop_1 80 81 gather_interchange_stats (in loop_1) will return 82 DEPENDENCE_STEPS = 3002 83 NB_DEPS_NOT_CARRIED_BY_LOOP = 5 84 ACCESS_STRIDES = 10694 85 86 gather_interchange_stats (in loop_2) will return 87 DEPENDENCE_STEPS = 3000 88 NB_DEPS_NOT_CARRIED_BY_LOOP = 7 89 ACCESS_STRIDES = 8010 90*/ 91 92static void 93gather_interchange_stats (varray_type dependence_relations, 94 varray_type datarefs, 95 struct loop *loop, 96 struct loop *first_loop, 97 unsigned int *dependence_steps, 98 unsigned int *nb_deps_not_carried_by_loop, 99 unsigned int *access_strides) 100{ 101 unsigned int i, j; 102 103 *dependence_steps = 0; 104 *nb_deps_not_carried_by_loop = 0; 105 *access_strides = 0; 106 107 for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++) 108 { 109 struct data_dependence_relation *ddr = 110 (struct data_dependence_relation *) 111 VARRAY_GENERIC_PTR (dependence_relations, i); 112 113 /* If we don't know anything about this dependence, or the distance 114 vector is NULL, or there is no dependence, then there is no reuse of 115 data. */ 116 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know 117 || DDR_ARE_DEPENDENT (ddr) == chrec_known 118 || DDR_NUM_DIST_VECTS (ddr) == 0) 119 continue; 120 121 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++) 122 { 123 int dist = DDR_DIST_VECT (ddr, j)[loop->depth - first_loop->depth]; 124 125 if (dist == 0) 126 (*nb_deps_not_carried_by_loop) += 1; 127 128 else if (dist < 0) 129 (*dependence_steps) += -dist; 130 131 else 132 (*dependence_steps) += dist; 133 } 134 } 135 136 /* Compute the access strides. */ 137 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++) 138 { 139 unsigned int it; 140 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i); 141 tree stmt = DR_STMT (dr); 142 struct loop *stmt_loop = loop_containing_stmt (stmt); 143 struct loop *inner_loop = first_loop->inner; 144 145 if (inner_loop != stmt_loop 146 && !flow_loop_nested_p (inner_loop, stmt_loop)) 147 continue; 148 for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++) 149 { 150 tree chrec = DR_ACCESS_FN (dr, it); 151 tree tstride = evolution_part_in_loop_num 152 (chrec, loop->num); 153 154 if (tstride == NULL_TREE 155 || TREE_CODE (tstride) != INTEGER_CST) 156 continue; 157 158 (*access_strides) += int_cst_value (tstride); 159 } 160 } 161} 162 163/* Attempt to apply interchange transformations to TRANS to maximize the 164 spatial and temporal locality of the loop. 165 Returns the new transform matrix. The smaller the reuse vector 166 distances in the inner loops, the fewer the cache misses. 167 FIRST_LOOP is the loop->num of the first loop in the analyzed loop 168 nest. */ 169 170 171static lambda_trans_matrix 172try_interchange_loops (lambda_trans_matrix trans, 173 unsigned int depth, 174 varray_type dependence_relations, 175 varray_type datarefs, 176 struct loop *first_loop) 177{ 178 struct loop *loop_i; 179 struct loop *loop_j; 180 unsigned int dependence_steps_i, dependence_steps_j; 181 unsigned int access_strides_i, access_strides_j; 182 unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j; 183 struct data_dependence_relation *ddr; 184 185 /* When there is an unknown relation in the dependence_relations, we 186 know that it is no worth looking at this loop nest: give up. */ 187 ddr = (struct data_dependence_relation *) 188 VARRAY_GENERIC_PTR (dependence_relations, 0); 189 if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 190 return trans; 191 192 /* LOOP_I is always the outer loop. */ 193 for (loop_j = first_loop->inner; 194 loop_j; 195 loop_j = loop_j->inner) 196 for (loop_i = first_loop; 197 loop_i->depth < loop_j->depth; 198 loop_i = loop_i->inner) 199 { 200 gather_interchange_stats (dependence_relations, datarefs, 201 loop_i, first_loop, 202 &dependence_steps_i, 203 &nb_deps_not_carried_by_i, 204 &access_strides_i); 205 gather_interchange_stats (dependence_relations, datarefs, 206 loop_j, first_loop, 207 &dependence_steps_j, 208 &nb_deps_not_carried_by_j, 209 &access_strides_j); 210 211 /* Heuristics for loop interchange profitability: 212 213 1. (spatial locality) Inner loops should have smallest 214 dependence steps. 215 216 2. (spatial locality) Inner loops should contain more 217 dependence relations not carried by the loop. 218 219 3. (temporal locality) Inner loops should have smallest 220 array access strides. 221 */ 222 if (dependence_steps_i < dependence_steps_j 223 || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j 224 || access_strides_i < access_strides_j) 225 { 226 lambda_matrix_row_exchange (LTM_MATRIX (trans), 227 loop_i->depth - first_loop->depth, 228 loop_j->depth - first_loop->depth); 229 /* Validate the resulting matrix. When the transformation 230 is not valid, reverse to the previous transformation. */ 231 if (!lambda_transform_legal_p (trans, depth, dependence_relations)) 232 lambda_matrix_row_exchange (LTM_MATRIX (trans), 233 loop_i->depth - first_loop->depth, 234 loop_j->depth - first_loop->depth); 235 } 236 } 237 238 return trans; 239} 240 241/* Perform a set of linear transforms on LOOPS. */ 242 243void 244linear_transform_loops (struct loops *loops) 245{ 246 bool modified = false; 247 unsigned int i; 248 VEC(tree,heap) *oldivs = NULL; 249 VEC(tree,heap) *invariants = NULL; 250 251 for (i = 1; i < loops->num; i++) 252 { 253 unsigned int depth = 0; 254 varray_type datarefs; 255 varray_type dependence_relations; 256 struct loop *loop_nest = loops->parray[i]; 257 struct loop *temp; 258 lambda_loopnest before, after; 259 lambda_trans_matrix trans; 260 bool problem = false; 261 /* If it's not a loop nest, we don't want it. 262 We also don't handle sibling loops properly, 263 which are loops of the following form: 264 for (i = 0; i < 50; i++) 265 { 266 for (j = 0; j < 50; j++) 267 { 268 ... 269 } 270 for (j = 0; j < 50; j++) 271 { 272 ... 273 } 274 } */ 275 if (!loop_nest || !loop_nest->inner || !loop_nest->single_exit) 276 continue; 277 VEC_truncate (tree, oldivs, 0); 278 VEC_truncate (tree, invariants, 0); 279 depth = 1; 280 for (temp = loop_nest->inner; temp; temp = temp->inner) 281 { 282 /* If we have a sibling loop or multiple exit edges, jump ship. */ 283 if (temp->next || !temp->single_exit) 284 { 285 problem = true; 286 break; 287 } 288 depth ++; 289 } 290 if (problem) 291 continue; 292 293 /* Analyze data references and dependence relations using scev. */ 294 295 VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs"); 296 VARRAY_GENERIC_PTR_INIT (dependence_relations, 10, 297 "dependence_relations"); 298 299 300 compute_data_dependences_for_loop (loop_nest, true, 301 &datarefs, &dependence_relations); 302 if (dump_file && (dump_flags & TDF_DETAILS)) 303 { 304 unsigned int j; 305 for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++) 306 { 307 struct data_dependence_relation *ddr = 308 (struct data_dependence_relation *) 309 VARRAY_GENERIC_PTR (dependence_relations, j); 310 311 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 312 dump_data_dependence_relation (dump_file, ddr); 313 } 314 fprintf (dump_file, "\n\n"); 315 } 316 /* Build the transformation matrix. */ 317 trans = lambda_trans_matrix_new (depth, depth); 318 lambda_matrix_id (LTM_MATRIX (trans), depth); 319 320 trans = try_interchange_loops (trans, depth, dependence_relations, 321 datarefs, loop_nest); 322 323 if (lambda_trans_matrix_id_p (trans)) 324 { 325 if (dump_file) 326 fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n"); 327 goto free_and_continue; 328 } 329 330 /* Check whether the transformation is legal. */ 331 if (!lambda_transform_legal_p (trans, depth, dependence_relations)) 332 { 333 if (dump_file) 334 fprintf (dump_file, "Can't transform loop, transform is illegal:\n"); 335 goto free_and_continue; 336 } 337 338 before = gcc_loopnest_to_lambda_loopnest (loops, loop_nest, &oldivs, 339 &invariants); 340 if (!before) 341 goto free_and_continue; 342 343 if (dump_file) 344 { 345 fprintf (dump_file, "Before:\n"); 346 print_lambda_loopnest (dump_file, before, 'i'); 347 } 348 349 after = lambda_loopnest_transform (before, trans); 350 if (dump_file) 351 { 352 fprintf (dump_file, "After:\n"); 353 print_lambda_loopnest (dump_file, after, 'u'); 354 } 355 lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants, 356 after, trans); 357 modified = true; 358 359 if (dump_file) 360 fprintf (dump_file, "Successfully transformed loop.\n"); 361 362 free_and_continue: 363 free_dependence_relations (dependence_relations); 364 free_data_refs (datarefs); 365 } 366 VEC_free (tree, heap, oldivs); 367 VEC_free (tree, heap, invariants); 368 scev_reset (); 369 370 if (modified) 371 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi); 372} 373