1/* Heuristics and transform for loop blocking and strip mining on
2   polyhedral representation.
3
4   Copyright (C) 2009-2015 Free Software Foundation, Inc.
5   Contributed by Sebastian Pop <sebastian.pop@amd.com> and
6   Pranav Garg  <pranav.garg2107@gmail.com>.
7
8This file is part of GCC.
9
10GCC is free software; you can redistribute it and/or modify
11it under the terms of the GNU General Public License as published by
12the Free Software Foundation; either version 3, or (at your option)
13any later version.
14
15GCC is distributed in the hope that it will be useful,
16but WITHOUT ANY WARRANTY; without even the implied warranty of
17MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18GNU General Public License for more details.
19
20You should have received a copy of the GNU General Public License
21along with GCC; see the file COPYING3.  If not see
22<http://www.gnu.org/licenses/>.  */
23
24#include "config.h"
25
26#ifdef HAVE_isl
27#include <isl/constraint.h>
28#include <isl/set.h>
29#include <isl/map.h>
30#include <isl/union_map.h>
31#include <isl/constraint.h>
32#endif
33
34#include "system.h"
35#include "coretypes.h"
36#include "hash-set.h"
37#include "machmode.h"
38#include "vec.h"
39#include "double-int.h"
40#include "input.h"
41#include "alias.h"
42#include "symtab.h"
43#include "options.h"
44#include "wide-int.h"
45#include "inchash.h"
46#include "tree.h"
47#include "fold-const.h"
48#include "predict.h"
49#include "tm.h"
50#include "hard-reg-set.h"
51#include "input.h"
52#include "function.h"
53#include "dominance.h"
54#include "basic-block.h"
55#include "tree-ssa-alias.h"
56#include "internal-fn.h"
57#include "gimple-expr.h"
58#include "is-a.h"
59#include "gimple.h"
60#include "gimple-iterator.h"
61#include "tree-ssa-loop.h"
62#include "dumpfile.h"
63#include "cfgloop.h"
64#include "tree-chrec.h"
65#include "tree-data-ref.h"
66#include "sese.h"
67
68#ifdef HAVE_isl
69#include "graphite-poly.h"
70
71
72/* Strip mines with a factor STRIDE the scattering (time) dimension
73   around PBB at depth TIME_DEPTH.
74
75   The following example comes from the wiki page:
76   http://gcc.gnu.org/wiki/Graphite/Strip_mine
77
78   The strip mine of a loop with a tile of 64 can be obtained with a
79   scattering function as follows:
80
81   $ cat ./albert_strip_mine.cloog
82   # language: C
83   c
84
85   # parameter {n | n >= 0}
86   1 3
87   #  n  1
88   1  1  0
89   1
90   n
91
92   1 # Number of statements:
93
94   1
95   # {i | 0 <= i <= n}
96   2 4
97   #  i  n   1
98   1  1  0   0
99   1 -1  1   0
100
101   0  0  0
102   1
103   i
104
105   1 # Scattering functions
106
107   3 6
108   #  NEW  OLD    i    n    1
109   1  -64    0    1    0    0
110   1   64    0   -1    0   63
111   0    0    1   -1    0    0
112
113   1
114   NEW  OLD
115
116   #the output of CLooG is like this:
117   #$ cloog ./albert_strip_mine.cloog
118   # for (NEW=0;NEW<=floord(n,64);NEW++) {
119   #   for (OLD=max(64*NEW,0);OLD<=min(64*NEW+63,n);OLD++) {
120   #     S1(i = OLD) ;
121   #   }
122   # }
123*/
124
125static void
126pbb_strip_mine_time_depth (poly_bb_p pbb, int time_depth, int stride)
127{
128  isl_space *d;
129  isl_constraint *c;
130  int iter, strip;
131  /* STRIP is the dimension that iterates with stride STRIDE.  */
132  /* ITER is the dimension that enumerates single iterations inside
133     one strip that has at most STRIDE iterations.  */
134  strip = time_depth;
135  iter = strip + 2;
136
137  pbb->transformed = isl_map_insert_dims (pbb->transformed, isl_dim_out,
138					  strip, 2);
139
140  /* Lower bound of the striped loop.  */
141  d = isl_map_get_space (pbb->transformed);
142  c = isl_inequality_alloc (isl_local_space_from_space (d));
143  c = isl_constraint_set_coefficient_si (c, isl_dim_out, strip, -stride);
144  c = isl_constraint_set_coefficient_si (c, isl_dim_out, iter, 1);
145  pbb->transformed = isl_map_add_constraint (pbb->transformed, c);
146
147  /* Upper bound of the striped loop.  */
148  d = isl_map_get_space (pbb->transformed);
149  c = isl_inequality_alloc (isl_local_space_from_space (d));
150  c = isl_constraint_set_coefficient_si (c, isl_dim_out, strip, stride);
151  c = isl_constraint_set_coefficient_si (c, isl_dim_out, iter, -1);
152  c = isl_constraint_set_constant_si (c, stride - 1);
153  pbb->transformed = isl_map_add_constraint (pbb->transformed, c);
154
155  /* Static scheduling for ITER level.
156     This is mandatory to keep the 2d + 1 canonical scheduling format.  */
157  d = isl_map_get_space (pbb->transformed);
158  c = isl_equality_alloc (isl_local_space_from_space (d));
159  c = isl_constraint_set_coefficient_si (c, isl_dim_out, strip + 1, 1);
160  pbb->transformed = isl_map_add_constraint (pbb->transformed, c);
161}
162
163/* Returns true when strip mining with STRIDE of the loop LST is
164   profitable.  */
165
166static bool
167lst_strip_mine_profitable_p (lst_p lst, int stride)
168{
169  mpz_t niter, strip_stride;
170  bool res;
171
172  gcc_assert (LST_LOOP_P (lst));
173  mpz_init (strip_stride);
174  mpz_init (niter);
175
176  mpz_set_si (strip_stride, stride);
177  lst_niter_for_loop (lst, niter);
178  res = (mpz_cmp (niter, strip_stride) > 0);
179
180  mpz_clear (strip_stride);
181  mpz_clear (niter);
182  return res;
183}
184
185/* Strip-mines all the loops of LST with STRIDE.  Return the number of
186   loops strip-mined.  */
187
188static int
189lst_do_strip_mine_loop (lst_p lst, int depth, int stride)
190{
191  int i;
192  lst_p l;
193  poly_bb_p pbb;
194
195  if (!lst)
196    return 0;
197
198  if (LST_LOOP_P (lst))
199    {
200      int res = 0;
201
202      FOR_EACH_VEC_ELT (LST_SEQ (lst), i, l)
203	res += lst_do_strip_mine_loop (l, depth, stride);
204
205      return res;
206    }
207
208  pbb = LST_PBB (lst);
209  pbb_strip_mine_time_depth (pbb, psct_dynamic_dim (pbb, depth), stride);
210  return 1;
211}
212
213/* Strip-mines all the loops of LST with STRIDE.  When STRIDE is zero,
214   read the stride from the PARAM_LOOP_BLOCK_TILE_SIZE.  Return the
215   number of strip-mined loops.
216
217   Strip mining transforms a loop
218
219   | for (i = 0; i < N; i++)
220   |   S (i);
221
222   into the following loop nest:
223
224   | for (k = 0; k < N; k += STRIDE)
225   |   for (j = 0; j < STRIDE; j++)
226   |     S (i = k + j);
227*/
228
229static int
230lst_do_strip_mine (lst_p lst, int stride)
231{
232  int i;
233  lst_p l;
234  int res = 0;
235  int depth;
236
237  if (!stride)
238    stride = PARAM_VALUE (PARAM_LOOP_BLOCK_TILE_SIZE);
239
240  if (!lst
241      || !LST_LOOP_P (lst))
242    return false;
243
244  FOR_EACH_VEC_ELT (LST_SEQ (lst), i, l)
245    res += lst_do_strip_mine (l, stride);
246
247  depth = lst_depth (lst);
248  if (depth >= 0
249      && lst_strip_mine_profitable_p (lst, stride))
250    {
251      res += lst_do_strip_mine_loop (lst, lst_depth (lst), stride);
252      lst_add_loop_under_loop (lst);
253    }
254
255  return res;
256}
257
258/* Strip mines all the loops in SCOP.  Returns the number of
259   strip-mined loops.  */
260
261int
262scop_do_strip_mine (scop_p scop, int stride)
263{
264  return lst_do_strip_mine (SCOP_TRANSFORMED_SCHEDULE (scop), stride);
265}
266
267/* Loop blocks all the loops in SCOP.  Returns true when we manage to
268   block some loops.  */
269
270bool
271scop_do_block (scop_p scop)
272{
273  store_scattering (scop);
274
275  /* If we don't strip mine at least two loops, or not interchange
276     loops, the strip mine alone will not be profitable, and the
277     transform is not a loop blocking: so revert the transform.  */
278  if (lst_do_strip_mine (SCOP_TRANSFORMED_SCHEDULE (scop), 0) < 2
279      || scop_do_interchange (scop) == 0)
280    {
281      restore_scattering (scop);
282      return false;
283    }
284
285  if (dump_file && (dump_flags & TDF_DETAILS))
286    fprintf (dump_file, "SCoP will be loop blocked.\n");
287
288  return true;
289}
290
291#endif
292