1/* Heuristics and transform for loop blocking and strip mining on 2 polyhedral representation. 3 4 Copyright (C) 2009-2015 Free Software Foundation, Inc. 5 Contributed by Sebastian Pop <sebastian.pop@amd.com> and 6 Pranav Garg <pranav.garg2107@gmail.com>. 7 8This file is part of GCC. 9 10GCC is free software; you can redistribute it and/or modify 11it under the terms of the GNU General Public License as published by 12the Free Software Foundation; either version 3, or (at your option) 13any later version. 14 15GCC is distributed in the hope that it will be useful, 16but WITHOUT ANY WARRANTY; without even the implied warranty of 17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18GNU General Public License for more details. 19 20You should have received a copy of the GNU General Public License 21along with GCC; see the file COPYING3. If not see 22<http://www.gnu.org/licenses/>. */ 23 24#include "config.h" 25 26#ifdef HAVE_isl 27#include <isl/constraint.h> 28#include <isl/set.h> 29#include <isl/map.h> 30#include <isl/union_map.h> 31#include <isl/constraint.h> 32#endif 33 34#include "system.h" 35#include "coretypes.h" 36#include "hash-set.h" 37#include "machmode.h" 38#include "vec.h" 39#include "double-int.h" 40#include "input.h" 41#include "alias.h" 42#include "symtab.h" 43#include "options.h" 44#include "wide-int.h" 45#include "inchash.h" 46#include "tree.h" 47#include "fold-const.h" 48#include "predict.h" 49#include "tm.h" 50#include "hard-reg-set.h" 51#include "input.h" 52#include "function.h" 53#include "dominance.h" 54#include "basic-block.h" 55#include "tree-ssa-alias.h" 56#include "internal-fn.h" 57#include "gimple-expr.h" 58#include "is-a.h" 59#include "gimple.h" 60#include "gimple-iterator.h" 61#include "tree-ssa-loop.h" 62#include "dumpfile.h" 63#include "cfgloop.h" 64#include "tree-chrec.h" 65#include "tree-data-ref.h" 66#include "sese.h" 67 68#ifdef HAVE_isl 69#include "graphite-poly.h" 70 71 72/* Strip mines with a factor STRIDE the scattering (time) dimension 73 around PBB at depth TIME_DEPTH. 74 75 The following example comes from the wiki page: 76 http://gcc.gnu.org/wiki/Graphite/Strip_mine 77 78 The strip mine of a loop with a tile of 64 can be obtained with a 79 scattering function as follows: 80 81 $ cat ./albert_strip_mine.cloog 82 # language: C 83 c 84 85 # parameter {n | n >= 0} 86 1 3 87 # n 1 88 1 1 0 89 1 90 n 91 92 1 # Number of statements: 93 94 1 95 # {i | 0 <= i <= n} 96 2 4 97 # i n 1 98 1 1 0 0 99 1 -1 1 0 100 101 0 0 0 102 1 103 i 104 105 1 # Scattering functions 106 107 3 6 108 # NEW OLD i n 1 109 1 -64 0 1 0 0 110 1 64 0 -1 0 63 111 0 0 1 -1 0 0 112 113 1 114 NEW OLD 115 116 #the output of CLooG is like this: 117 #$ cloog ./albert_strip_mine.cloog 118 # for (NEW=0;NEW<=floord(n,64);NEW++) { 119 # for (OLD=max(64*NEW,0);OLD<=min(64*NEW+63,n);OLD++) { 120 # S1(i = OLD) ; 121 # } 122 # } 123*/ 124 125static void 126pbb_strip_mine_time_depth (poly_bb_p pbb, int time_depth, int stride) 127{ 128 isl_space *d; 129 isl_constraint *c; 130 int iter, strip; 131 /* STRIP is the dimension that iterates with stride STRIDE. */ 132 /* ITER is the dimension that enumerates single iterations inside 133 one strip that has at most STRIDE iterations. */ 134 strip = time_depth; 135 iter = strip + 2; 136 137 pbb->transformed = isl_map_insert_dims (pbb->transformed, isl_dim_out, 138 strip, 2); 139 140 /* Lower bound of the striped loop. */ 141 d = isl_map_get_space (pbb->transformed); 142 c = isl_inequality_alloc (isl_local_space_from_space (d)); 143 c = isl_constraint_set_coefficient_si (c, isl_dim_out, strip, -stride); 144 c = isl_constraint_set_coefficient_si (c, isl_dim_out, iter, 1); 145 pbb->transformed = isl_map_add_constraint (pbb->transformed, c); 146 147 /* Upper bound of the striped loop. */ 148 d = isl_map_get_space (pbb->transformed); 149 c = isl_inequality_alloc (isl_local_space_from_space (d)); 150 c = isl_constraint_set_coefficient_si (c, isl_dim_out, strip, stride); 151 c = isl_constraint_set_coefficient_si (c, isl_dim_out, iter, -1); 152 c = isl_constraint_set_constant_si (c, stride - 1); 153 pbb->transformed = isl_map_add_constraint (pbb->transformed, c); 154 155 /* Static scheduling for ITER level. 156 This is mandatory to keep the 2d + 1 canonical scheduling format. */ 157 d = isl_map_get_space (pbb->transformed); 158 c = isl_equality_alloc (isl_local_space_from_space (d)); 159 c = isl_constraint_set_coefficient_si (c, isl_dim_out, strip + 1, 1); 160 pbb->transformed = isl_map_add_constraint (pbb->transformed, c); 161} 162 163/* Returns true when strip mining with STRIDE of the loop LST is 164 profitable. */ 165 166static bool 167lst_strip_mine_profitable_p (lst_p lst, int stride) 168{ 169 mpz_t niter, strip_stride; 170 bool res; 171 172 gcc_assert (LST_LOOP_P (lst)); 173 mpz_init (strip_stride); 174 mpz_init (niter); 175 176 mpz_set_si (strip_stride, stride); 177 lst_niter_for_loop (lst, niter); 178 res = (mpz_cmp (niter, strip_stride) > 0); 179 180 mpz_clear (strip_stride); 181 mpz_clear (niter); 182 return res; 183} 184 185/* Strip-mines all the loops of LST with STRIDE. Return the number of 186 loops strip-mined. */ 187 188static int 189lst_do_strip_mine_loop (lst_p lst, int depth, int stride) 190{ 191 int i; 192 lst_p l; 193 poly_bb_p pbb; 194 195 if (!lst) 196 return 0; 197 198 if (LST_LOOP_P (lst)) 199 { 200 int res = 0; 201 202 FOR_EACH_VEC_ELT (LST_SEQ (lst), i, l) 203 res += lst_do_strip_mine_loop (l, depth, stride); 204 205 return res; 206 } 207 208 pbb = LST_PBB (lst); 209 pbb_strip_mine_time_depth (pbb, psct_dynamic_dim (pbb, depth), stride); 210 return 1; 211} 212 213/* Strip-mines all the loops of LST with STRIDE. When STRIDE is zero, 214 read the stride from the PARAM_LOOP_BLOCK_TILE_SIZE. Return the 215 number of strip-mined loops. 216 217 Strip mining transforms a loop 218 219 | for (i = 0; i < N; i++) 220 | S (i); 221 222 into the following loop nest: 223 224 | for (k = 0; k < N; k += STRIDE) 225 | for (j = 0; j < STRIDE; j++) 226 | S (i = k + j); 227*/ 228 229static int 230lst_do_strip_mine (lst_p lst, int stride) 231{ 232 int i; 233 lst_p l; 234 int res = 0; 235 int depth; 236 237 if (!stride) 238 stride = PARAM_VALUE (PARAM_LOOP_BLOCK_TILE_SIZE); 239 240 if (!lst 241 || !LST_LOOP_P (lst)) 242 return false; 243 244 FOR_EACH_VEC_ELT (LST_SEQ (lst), i, l) 245 res += lst_do_strip_mine (l, stride); 246 247 depth = lst_depth (lst); 248 if (depth >= 0 249 && lst_strip_mine_profitable_p (lst, stride)) 250 { 251 res += lst_do_strip_mine_loop (lst, lst_depth (lst), stride); 252 lst_add_loop_under_loop (lst); 253 } 254 255 return res; 256} 257 258/* Strip mines all the loops in SCOP. Returns the number of 259 strip-mined loops. */ 260 261int 262scop_do_strip_mine (scop_p scop, int stride) 263{ 264 return lst_do_strip_mine (SCOP_TRANSFORMED_SCHEDULE (scop), stride); 265} 266 267/* Loop blocks all the loops in SCOP. Returns true when we manage to 268 block some loops. */ 269 270bool 271scop_do_block (scop_p scop) 272{ 273 store_scattering (scop); 274 275 /* If we don't strip mine at least two loops, or not interchange 276 loops, the strip mine alone will not be profitable, and the 277 transform is not a loop blocking: so revert the transform. */ 278 if (lst_do_strip_mine (SCOP_TRANSFORMED_SCHEDULE (scop), 0) < 2 279 || scop_do_interchange (scop) == 0) 280 { 281 restore_scattering (scop); 282 return false; 283 } 284 285 if (dump_file && (dump_flags & TDF_DETAILS)) 286 fprintf (dump_file, "SCoP will be loop blocked.\n"); 287 288 return true; 289} 290 291#endif 292