1/* Linear Loop transforms
2   Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
3   Contributed by Daniel Berlin <dberlin@dberlin.org>.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING.  If not, write to the Free
19Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2002110-1301, USA.  */
21
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30
31#include "rtl.h"
32#include "basic-block.h"
33#include "diagnostic.h"
34#include "tree-flow.h"
35#include "tree-dump.h"
36#include "timevar.h"
37#include "cfgloop.h"
38#include "expr.h"
39#include "optabs.h"
40#include "tree-chrec.h"
41#include "tree-data-ref.h"
42#include "tree-scalar-evolution.h"
43#include "tree-pass.h"
44#include "varray.h"
45#include "lambda.h"
46
47/* Linear loop transforms include any composition of interchange,
48   scaling, skewing, and reversal.  They are used to change the
49   iteration order of loop nests in order to optimize data locality of
50   traversals, or remove dependences that prevent
51   parallelization/vectorization/etc.
52
53   TODO: Determine reuse vectors/matrix and use it to determine optimal
54   transform matrix for locality purposes.
55   TODO: Completion of partial transforms.  */
56
57/* Gather statistics for loop interchange.  LOOP is the loop being
58   considered. The first loop in the considered loop nest is
59   FIRST_LOOP, and consequently, the index of the considered loop is
60   obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH
61
62   Initializes:
63   - DEPENDENCE_STEPS the sum of all the data dependence distances
64   carried by loop LOOP,
65
66   - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations
67   for which the loop LOOP is not carrying any dependence,
68
69   - ACCESS_STRIDES the sum of all the strides in LOOP.
70
71   Example: for the following loop,
72
73   | loop_1 runs 1335 times
74   |   loop_2 runs 1335 times
75   |     A[{{0, +, 1}_1, +, 1335}_2]
76   |     B[{{0, +, 1}_1, +, 1335}_2]
77   |   endloop_2
78   |   A[{0, +, 1336}_1]
79   | endloop_1
80
81   gather_interchange_stats (in loop_1) will return
82   DEPENDENCE_STEPS = 3002
83   NB_DEPS_NOT_CARRIED_BY_LOOP = 5
84   ACCESS_STRIDES = 10694
85
86   gather_interchange_stats (in loop_2) will return
87   DEPENDENCE_STEPS = 3000
88   NB_DEPS_NOT_CARRIED_BY_LOOP = 7
89   ACCESS_STRIDES = 8010
90*/
91
92static void
93gather_interchange_stats (varray_type dependence_relations,
94			  varray_type datarefs,
95			  struct loop *loop,
96			  struct loop *first_loop,
97			  unsigned int *dependence_steps,
98			  unsigned int *nb_deps_not_carried_by_loop,
99			  unsigned int *access_strides)
100{
101  unsigned int i, j;
102
103  *dependence_steps = 0;
104  *nb_deps_not_carried_by_loop = 0;
105  *access_strides = 0;
106
107  for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
108    {
109      struct data_dependence_relation *ddr =
110	(struct data_dependence_relation *)
111	VARRAY_GENERIC_PTR (dependence_relations, i);
112
113      /* If we don't know anything about this dependence, or the distance
114	 vector is NULL, or there is no dependence, then there is no reuse of
115	 data.  */
116      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
117	  || DDR_ARE_DEPENDENT (ddr) == chrec_known
118	  || DDR_NUM_DIST_VECTS (ddr) == 0)
119	continue;
120
121      for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
122	{
123	  int dist = DDR_DIST_VECT (ddr, j)[loop->depth - first_loop->depth];
124
125	  if (dist == 0)
126	    (*nb_deps_not_carried_by_loop) += 1;
127
128	  else if (dist < 0)
129	    (*dependence_steps) += -dist;
130
131	  else
132	    (*dependence_steps) += dist;
133	}
134    }
135
136  /* Compute the access strides.  */
137  for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
138    {
139      unsigned int it;
140      struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
141      tree stmt = DR_STMT (dr);
142      struct loop *stmt_loop = loop_containing_stmt (stmt);
143      struct loop *inner_loop = first_loop->inner;
144
145      if (inner_loop != stmt_loop
146	  && !flow_loop_nested_p (inner_loop, stmt_loop))
147	continue;
148      for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++)
149	{
150	  tree chrec = DR_ACCESS_FN (dr, it);
151	  tree tstride = evolution_part_in_loop_num
152	    (chrec, loop->num);
153
154	  if (tstride == NULL_TREE
155	      || TREE_CODE (tstride) != INTEGER_CST)
156	    continue;
157
158	  (*access_strides) += int_cst_value (tstride);
159	}
160    }
161}
162
163/* Attempt to apply interchange transformations to TRANS to maximize the
164   spatial and temporal locality of the loop.
165   Returns the new transform matrix.  The smaller the reuse vector
166   distances in the inner loops, the fewer the cache misses.
167   FIRST_LOOP is the loop->num of the first loop in the analyzed loop
168   nest.  */
169
170
171static lambda_trans_matrix
172try_interchange_loops (lambda_trans_matrix trans,
173		       unsigned int depth,
174		       varray_type dependence_relations,
175		       varray_type datarefs,
176		       struct loop *first_loop)
177{
178  struct loop *loop_i;
179  struct loop *loop_j;
180  unsigned int dependence_steps_i, dependence_steps_j;
181  unsigned int access_strides_i, access_strides_j;
182  unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
183  struct data_dependence_relation *ddr;
184
185  /* When there is an unknown relation in the dependence_relations, we
186     know that it is no worth looking at this loop nest: give up.  */
187  ddr = (struct data_dependence_relation *)
188    VARRAY_GENERIC_PTR (dependence_relations, 0);
189  if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
190    return trans;
191
192  /* LOOP_I is always the outer loop.  */
193  for (loop_j = first_loop->inner;
194       loop_j;
195       loop_j = loop_j->inner)
196    for (loop_i = first_loop;
197	 loop_i->depth < loop_j->depth;
198	 loop_i = loop_i->inner)
199      {
200	gather_interchange_stats (dependence_relations, datarefs,
201				  loop_i, first_loop,
202				  &dependence_steps_i,
203				  &nb_deps_not_carried_by_i,
204				  &access_strides_i);
205	gather_interchange_stats (dependence_relations, datarefs,
206				  loop_j, first_loop,
207				  &dependence_steps_j,
208				  &nb_deps_not_carried_by_j,
209				  &access_strides_j);
210
211	/* Heuristics for loop interchange profitability:
212
213	   1. (spatial locality) Inner loops should have smallest
214              dependence steps.
215
216	   2. (spatial locality) Inner loops should contain more
217	   dependence relations not carried by the loop.
218
219	   3. (temporal locality) Inner loops should have smallest
220	      array access strides.
221	*/
222	if (dependence_steps_i < dependence_steps_j
223	    || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j
224	    || access_strides_i < access_strides_j)
225	  {
226	    lambda_matrix_row_exchange (LTM_MATRIX (trans),
227					loop_i->depth - first_loop->depth,
228					loop_j->depth - first_loop->depth);
229	    /* Validate the resulting matrix.  When the transformation
230	       is not valid, reverse to the previous transformation.  */
231	    if (!lambda_transform_legal_p (trans, depth, dependence_relations))
232	      lambda_matrix_row_exchange (LTM_MATRIX (trans),
233					  loop_i->depth - first_loop->depth,
234					  loop_j->depth - first_loop->depth);
235	  }
236      }
237
238  return trans;
239}
240
241/* Perform a set of linear transforms on LOOPS.  */
242
243void
244linear_transform_loops (struct loops *loops)
245{
246  bool modified = false;
247  unsigned int i;
248  VEC(tree,heap) *oldivs = NULL;
249  VEC(tree,heap) *invariants = NULL;
250
251  for (i = 1; i < loops->num; i++)
252    {
253      unsigned int depth = 0;
254      varray_type datarefs;
255      varray_type dependence_relations;
256      struct loop *loop_nest = loops->parray[i];
257      struct loop *temp;
258      lambda_loopnest before, after;
259      lambda_trans_matrix trans;
260      bool problem = false;
261      /* If it's not a loop nest, we don't want it.
262         We also don't handle sibling loops properly,
263         which are loops of the following form:
264         for (i = 0; i < 50; i++)
265           {
266             for (j = 0; j < 50; j++)
267               {
268	        ...
269               }
270             for (j = 0; j < 50; j++)
271               {
272                ...
273               }
274           } */
275      if (!loop_nest || !loop_nest->inner || !loop_nest->single_exit)
276	continue;
277      VEC_truncate (tree, oldivs, 0);
278      VEC_truncate (tree, invariants, 0);
279      depth = 1;
280      for (temp = loop_nest->inner; temp; temp = temp->inner)
281	{
282	  /* If we have a sibling loop or multiple exit edges, jump ship.  */
283	  if (temp->next || !temp->single_exit)
284	    {
285	      problem = true;
286	      break;
287	    }
288	  depth ++;
289	}
290      if (problem)
291	continue;
292
293      /* Analyze data references and dependence relations using scev.  */
294
295      VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs");
296      VARRAY_GENERIC_PTR_INIT (dependence_relations, 10,
297			       "dependence_relations");
298
299
300      compute_data_dependences_for_loop (loop_nest, true,
301					 &datarefs, &dependence_relations);
302      if (dump_file && (dump_flags & TDF_DETAILS))
303	{
304	  unsigned int j;
305	  for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++)
306	    {
307	      struct data_dependence_relation *ddr =
308		(struct data_dependence_relation *)
309		VARRAY_GENERIC_PTR (dependence_relations, j);
310
311	      if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
312		dump_data_dependence_relation (dump_file, ddr);
313	    }
314	  fprintf (dump_file, "\n\n");
315	}
316      /* Build the transformation matrix.  */
317      trans = lambda_trans_matrix_new (depth, depth);
318      lambda_matrix_id (LTM_MATRIX (trans), depth);
319
320      trans = try_interchange_loops (trans, depth, dependence_relations,
321				     datarefs, loop_nest);
322
323      if (lambda_trans_matrix_id_p (trans))
324	{
325	  if (dump_file)
326	   fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n");
327	  goto free_and_continue;
328	}
329
330      /* Check whether the transformation is legal.  */
331      if (!lambda_transform_legal_p (trans, depth, dependence_relations))
332	{
333	  if (dump_file)
334	    fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
335	  goto free_and_continue;
336	}
337
338      before = gcc_loopnest_to_lambda_loopnest (loops, loop_nest, &oldivs,
339						&invariants);
340      if (!before)
341	goto free_and_continue;
342
343      if (dump_file)
344	{
345	  fprintf (dump_file, "Before:\n");
346	  print_lambda_loopnest (dump_file, before, 'i');
347	}
348
349      after = lambda_loopnest_transform (before, trans);
350      if (dump_file)
351	{
352	  fprintf (dump_file, "After:\n");
353	  print_lambda_loopnest (dump_file, after, 'u');
354	}
355      lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
356				       after, trans);
357      modified = true;
358
359      if (dump_file)
360	fprintf (dump_file, "Successfully transformed loop.\n");
361
362    free_and_continue:
363      free_dependence_relations (dependence_relations);
364      free_data_refs (datarefs);
365    }
366  VEC_free (tree, heap, oldivs);
367  VEC_free (tree, heap, invariants);
368  scev_reset ();
369
370  if (modified)
371    rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi);
372}
373