1/* Linear Loop transforms
2   Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3   Contributed by Daniel Berlin <dberlin@dberlin.org>.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING.  If not, write to the Free
19Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2002110-1301, USA.  */
21
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30
31#include "rtl.h"
32#include "basic-block.h"
33#include "diagnostic.h"
34#include "tree-flow.h"
35#include "tree-dump.h"
36#include "timevar.h"
37#include "cfgloop.h"
38#include "expr.h"
39#include "optabs.h"
40#include "tree-chrec.h"
41#include "tree-data-ref.h"
42#include "tree-scalar-evolution.h"
43#include "tree-pass.h"
44#include "lambda.h"
45
46/* Linear loop transforms include any composition of interchange,
47   scaling, skewing, and reversal.  They are used to change the
48   iteration order of loop nests in order to optimize data locality of
49   traversals, or remove dependences that prevent
50   parallelization/vectorization/etc.
51
52   TODO: Determine reuse vectors/matrix and use it to determine optimal
53   transform matrix for locality purposes.
54   TODO: Completion of partial transforms.  */
55
56/* Gather statistics for loop interchange.  LOOP is the loop being
57   considered. The first loop in the considered loop nest is
58   FIRST_LOOP, and consequently, the index of the considered loop is
59   obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH
60
61   Initializes:
62   - DEPENDENCE_STEPS the sum of all the data dependence distances
63   carried by loop LOOP,
64
65   - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations
66   for which the loop LOOP is not carrying any dependence,
67
68   - ACCESS_STRIDES the sum of all the strides in LOOP.
69
70   Example: for the following loop,
71
72   | loop_1 runs 1335 times
73   |   loop_2 runs 1335 times
74   |     A[{{0, +, 1}_1, +, 1335}_2]
75   |     B[{{0, +, 1}_1, +, 1335}_2]
76   |   endloop_2
77   |   A[{0, +, 1336}_1]
78   | endloop_1
79
80   gather_interchange_stats (in loop_1) will return
81   DEPENDENCE_STEPS = 3002
82   NB_DEPS_NOT_CARRIED_BY_LOOP = 5
83   ACCESS_STRIDES = 10694
84
85   gather_interchange_stats (in loop_2) will return
86   DEPENDENCE_STEPS = 3000
87   NB_DEPS_NOT_CARRIED_BY_LOOP = 7
88   ACCESS_STRIDES = 8010
89*/
90
91static void
92gather_interchange_stats (VEC (ddr_p, heap) *dependence_relations,
93			  VEC (data_reference_p, heap) *datarefs,
94			  struct loop *loop,
95			  struct loop *first_loop,
96			  unsigned int *dependence_steps,
97			  unsigned int *nb_deps_not_carried_by_loop,
98			  unsigned int *access_strides)
99{
100  unsigned int i, j;
101  struct data_dependence_relation *ddr;
102  struct data_reference *dr;
103
104  *dependence_steps = 0;
105  *nb_deps_not_carried_by_loop = 0;
106  *access_strides = 0;
107
108  for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
109    {
110      /* If we don't know anything about this dependence, or the distance
111	 vector is NULL, or there is no dependence, then there is no reuse of
112	 data.  */
113      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
114	  || DDR_ARE_DEPENDENT (ddr) == chrec_known
115	  || DDR_NUM_DIST_VECTS (ddr) == 0)
116	continue;
117
118      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
119	{
120	  int dist = DDR_DIST_VECT (ddr, j)[loop->depth - first_loop->depth];
121
122	  if (dist == 0)
123	    (*nb_deps_not_carried_by_loop) += 1;
124
125	  else if (dist < 0)
126	    (*dependence_steps) += -dist;
127
128	  else
129	    (*dependence_steps) += dist;
130	}
131    }
132
133  /* Compute the access strides.  */
134  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
135    {
136      unsigned int it;
137      tree stmt = DR_STMT (dr);
138      struct loop *stmt_loop = loop_containing_stmt (stmt);
139      struct loop *inner_loop = first_loop->inner;
140
141      if (inner_loop != stmt_loop
142	  && !flow_loop_nested_p (inner_loop, stmt_loop))
143	continue;
144      for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++)
145	{
146	  tree chrec = DR_ACCESS_FN (dr, it);
147	  tree tstride = evolution_part_in_loop_num
148	    (chrec, loop->num);
149
150	  if (tstride == NULL_TREE
151	      || TREE_CODE (tstride) != INTEGER_CST)
152	    continue;
153
154	  (*access_strides) += int_cst_value (tstride);
155	}
156    }
157}
158
159/* Attempt to apply interchange transformations to TRANS to maximize the
160   spatial and temporal locality of the loop.
161   Returns the new transform matrix.  The smaller the reuse vector
162   distances in the inner loops, the fewer the cache misses.
163   FIRST_LOOP is the loop->num of the first loop in the analyzed loop
164   nest.  */
165
166
167static lambda_trans_matrix
168try_interchange_loops (lambda_trans_matrix trans,
169		       unsigned int depth,
170		       VEC (ddr_p, heap) *dependence_relations,
171		       VEC (data_reference_p, heap) *datarefs,
172		       struct loop *first_loop)
173{
174  struct loop *loop_i;
175  struct loop *loop_j;
176  unsigned int dependence_steps_i, dependence_steps_j;
177  unsigned int access_strides_i, access_strides_j;
178  unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
179  struct data_dependence_relation *ddr;
180
181  if (VEC_length (ddr_p, dependence_relations) == 0)
182    return trans;
183
184  /* When there is an unknown relation in the dependence_relations, we
185     know that it is no worth looking at this loop nest: give up.  */
186  ddr = VEC_index (ddr_p, dependence_relations, 0);
187  if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
188    return trans;
189
190  /* LOOP_I is always the outer loop.  */
191  for (loop_j = first_loop->inner;
192       loop_j;
193       loop_j = loop_j->inner)
194    for (loop_i = first_loop;
195	 loop_i->depth < loop_j->depth;
196	 loop_i = loop_i->inner)
197      {
198	gather_interchange_stats (dependence_relations, datarefs,
199				  loop_i, first_loop,
200				  &dependence_steps_i,
201				  &nb_deps_not_carried_by_i,
202				  &access_strides_i);
203	gather_interchange_stats (dependence_relations, datarefs,
204				  loop_j, first_loop,
205				  &dependence_steps_j,
206				  &nb_deps_not_carried_by_j,
207				  &access_strides_j);
208
209	/* Heuristics for loop interchange profitability:
210
211	   1. (spatial locality) Inner loops should have smallest
212              dependence steps.
213
214	   2. (spatial locality) Inner loops should contain more
215	   dependence relations not carried by the loop.
216
217	   3. (temporal locality) Inner loops should have smallest
218	      array access strides.
219	*/
220	if (dependence_steps_i < dependence_steps_j
221	    || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
222	    || access_strides_i < access_strides_j)
223	  {
224	    lambda_matrix_row_exchange (LTM_MATRIX (trans),
225					loop_i->depth - first_loop->depth,
226					loop_j->depth - first_loop->depth);
227	    /* Validate the resulting matrix.  When the transformation
228	       is not valid, reverse to the previous transformation.  */
229	    if (!lambda_transform_legal_p (trans, depth, dependence_relations))
230	      lambda_matrix_row_exchange (LTM_MATRIX (trans),
231					  loop_i->depth - first_loop->depth,
232					  loop_j->depth - first_loop->depth);
233	  }
234      }
235
236  return trans;
237}
238
239/* Perform a set of linear transforms on LOOPS.  */
240
241void
242linear_transform_loops (struct loops *loops)
243{
244  bool modified = false;
245  unsigned int i;
246  VEC(tree,heap) *oldivs = NULL;
247  VEC(tree,heap) *invariants = NULL;
248
249  for (i = 1; i < loops->num; i++)
250    {
251      unsigned int depth = 0;
252      VEC (ddr_p, heap) *dependence_relations;
253      VEC (data_reference_p, heap) *datarefs;
254      struct loop *loop_nest = loops->parray[i];
255      struct loop *temp;
256      lambda_loopnest before, after;
257      lambda_trans_matrix trans;
258      bool problem = false;
259      /* If it's not a loop nest, we don't want it.
260         We also don't handle sibling loops properly,
261         which are loops of the following form:
262         for (i = 0; i < 50; i++)
263           {
264             for (j = 0; j < 50; j++)
265               {
266	        ...
267               }
268             for (j = 0; j < 50; j++)
269               {
270                ...
271               }
272           } */
273      if (!loop_nest || !loop_nest->inner || !loop_nest->single_exit)
274	continue;
275      VEC_truncate (tree, oldivs, 0);
276      VEC_truncate (tree, invariants, 0);
277      depth = 1;
278      for (temp = loop_nest->inner; temp; temp = temp->inner)
279	{
280	  /* If we have a sibling loop or multiple exit edges, jump ship.  */
281	  if (temp->next || !temp->single_exit)
282	    {
283	      problem = true;
284	      break;
285	    }
286	  depth ++;
287	}
288      if (problem)
289	continue;
290
291      /* Analyze data references and dependence relations using scev.  */
292
293      datarefs = VEC_alloc (data_reference_p, heap, 10);
294      dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
295      compute_data_dependences_for_loop (loop_nest, true, &datarefs,
296					 &dependence_relations);
297
298      if (dump_file && (dump_flags & TDF_DETAILS))
299	dump_ddrs (dump_file, dependence_relations);
300
301      /* Build the transformation matrix.  */
302      trans = lambda_trans_matrix_new (depth, depth);
303      lambda_matrix_id (LTM_MATRIX (trans), depth);
304      trans = try_interchange_loops (trans, depth, dependence_relations,
305				     datarefs, loop_nest);
306
307      if (lambda_trans_matrix_id_p (trans))
308	{
309	  if (dump_file)
310	   fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n");
311	  goto free_and_continue;
312	}
313
314      /* Check whether the transformation is legal.  */
315      if (!lambda_transform_legal_p (trans, depth, dependence_relations))
316	{
317	  if (dump_file)
318	    fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
319	  goto free_and_continue;
320	}
321
322      before = gcc_loopnest_to_lambda_loopnest (loops, loop_nest, &oldivs,
323						&invariants);
324
325      if (!before)
326	goto free_and_continue;
327
328      if (dump_file)
329	{
330	  fprintf (dump_file, "Before:\n");
331	  print_lambda_loopnest (dump_file, before, 'i');
332	}
333
334      after = lambda_loopnest_transform (before, trans);
335
336      if (dump_file)
337	{
338	  fprintf (dump_file, "After:\n");
339	  print_lambda_loopnest (dump_file, after, 'u');
340	}
341
342      lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
343				       after, trans);
344      modified = true;
345
346      if (dump_file)
347	fprintf (dump_file, "Successfully transformed loop.\n");
348
349    free_and_continue:
350      free_dependence_relations (dependence_relations);
351      free_data_refs (datarefs);
352    }
353
354  VEC_free (tree, heap, oldivs);
355  VEC_free (tree, heap, invariants);
356  scev_reset ();
357
358  if (modified)
359    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);
360}
361