1/* Data references and dependences detectors.
2   Copyright (C) 2003-2022 Free Software Foundation, Inc.
3   Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21/* This pass walks a given loop structure searching for array
22   references.  The information about the array accesses is recorded
23   in DATA_REFERENCE structures.
24
25   The basic test for determining the dependences is:
26   given two access functions chrec1 and chrec2 to a same array, and
27   x and y two vectors from the iteration domain, the same element of
28   the array is accessed twice at iterations x and y if and only if:
29   |             chrec1 (x) == chrec2 (y).
30
31   The goals of this analysis are:
32
33   - to determine the independence: the relation between two
34     independent accesses is qualified with the chrec_known (this
35     information allows a loop parallelization),
36
37   - when two data references access the same data, to qualify the
38     dependence relation with classic dependence representations:
39
40       - distance vectors
41       - direction vectors
42       - loop carried level dependence
43       - polyhedron dependence
44     or with the chains of recurrences based representation,
45
46   - to define a knowledge base for storing the data dependence
47     information,
48
49   - to define an interface to access this data.
50
51
52   Definitions:
53
54   - subscript: given two array accesses a subscript is the tuple
55   composed of the access functions for a given dimension.  Example:
56   Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
57   (f1, g1), (f2, g2), (f3, g3).
58
59   - Diophantine equation: an equation whose coefficients and
60   solutions are integer constants, for example the equation
61   |   3*x + 2*y = 1
62   has an integer solution x = 1 and y = -1.
63
64   References:
65
66   - "Advanced Compilation for High Performance Computing" by Randy
67   Allen and Ken Kennedy.
68   http://citeseer.ist.psu.edu/goff91practical.html
69
70   - "Loop Transformations for Restructuring Compilers - The Foundations"
71   by Utpal Banerjee.
72
73
74*/
75
76#include "config.h"
77#include "system.h"
78#include "coretypes.h"
79#include "backend.h"
80#include "rtl.h"
81#include "tree.h"
82#include "gimple.h"
83#include "gimple-pretty-print.h"
84#include "alias.h"
85#include "fold-const.h"
86#include "expr.h"
87#include "gimple-iterator.h"
88#include "tree-ssa-loop-niter.h"
89#include "tree-ssa-loop.h"
90#include "tree-ssa.h"
91#include "cfgloop.h"
92#include "tree-data-ref.h"
93#include "tree-scalar-evolution.h"
94#include "dumpfile.h"
95#include "tree-affine.h"
96#include "builtins.h"
97#include "tree-eh.h"
98#include "ssa.h"
99#include "internal-fn.h"
100#include "vr-values.h"
101#include "range-op.h"
102#include "tree-ssa-loop-ivopts.h"
103
104static struct datadep_stats
105{
106  int num_dependence_tests;
107  int num_dependence_dependent;
108  int num_dependence_independent;
109  int num_dependence_undetermined;
110
111  int num_subscript_tests;
112  int num_subscript_undetermined;
113  int num_same_subscript_function;
114
115  int num_ziv;
116  int num_ziv_independent;
117  int num_ziv_dependent;
118  int num_ziv_unimplemented;
119
120  int num_siv;
121  int num_siv_independent;
122  int num_siv_dependent;
123  int num_siv_unimplemented;
124
125  int num_miv;
126  int num_miv_independent;
127  int num_miv_dependent;
128  int num_miv_unimplemented;
129} dependence_stats;
130
131static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
132					   unsigned int, unsigned int,
133					   class loop *);
134/* Returns true iff A divides B.  */
135
136static inline bool
137tree_fold_divides_p (const_tree a, const_tree b)
138{
139  gcc_assert (TREE_CODE (a) == INTEGER_CST);
140  gcc_assert (TREE_CODE (b) == INTEGER_CST);
141  return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
142}
143
144/* Returns true iff A divides B.  */
145
146static inline bool
147int_divides_p (lambda_int a, lambda_int b)
148{
149  return ((b % a) == 0);
150}
151
152/* Return true if reference REF contains a union access.  */
153
154static bool
155ref_contains_union_access_p (tree ref)
156{
157  while (handled_component_p (ref))
158    {
159      ref = TREE_OPERAND (ref, 0);
160      if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
161	  || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
162	return true;
163    }
164  return false;
165}
166
167
168
169/* Dump into FILE all the data references from DATAREFS.  */
170
171static void
172dump_data_references (FILE *file, vec<data_reference_p> datarefs)
173{
174  for (data_reference *dr : datarefs)
175    dump_data_reference (file, dr);
176}
177
178/* Unified dump into FILE all the data references from DATAREFS.  */
179
180DEBUG_FUNCTION void
181debug (vec<data_reference_p> &ref)
182{
183  dump_data_references (stderr, ref);
184}
185
186DEBUG_FUNCTION void
187debug (vec<data_reference_p> *ptr)
188{
189  if (ptr)
190    debug (*ptr);
191  else
192    fprintf (stderr, "<nil>\n");
193}
194
195
196/* Dump into STDERR all the data references from DATAREFS.  */
197
198DEBUG_FUNCTION void
199debug_data_references (vec<data_reference_p> datarefs)
200{
201  dump_data_references (stderr, datarefs);
202}
203
204/* Print to STDERR the data_reference DR.  */
205
206DEBUG_FUNCTION void
207debug_data_reference (struct data_reference *dr)
208{
209  dump_data_reference (stderr, dr);
210}
211
212/* Dump function for a DATA_REFERENCE structure.  */
213
214void
215dump_data_reference (FILE *outf,
216		     struct data_reference *dr)
217{
218  unsigned int i;
219
220  fprintf (outf, "#(Data Ref: \n");
221  fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
222  fprintf (outf, "#  stmt: ");
223  print_gimple_stmt (outf, DR_STMT (dr), 0);
224  fprintf (outf, "#  ref: ");
225  print_generic_stmt (outf, DR_REF (dr));
226  fprintf (outf, "#  base_object: ");
227  print_generic_stmt (outf, DR_BASE_OBJECT (dr));
228
229  for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
230    {
231      fprintf (outf, "#  Access function %d: ", i);
232      print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
233    }
234  fprintf (outf, "#)\n");
235}
236
237/* Unified dump function for a DATA_REFERENCE structure.  */
238
239DEBUG_FUNCTION void
240debug (data_reference &ref)
241{
242  dump_data_reference (stderr, &ref);
243}
244
245DEBUG_FUNCTION void
246debug (data_reference *ptr)
247{
248  if (ptr)
249    debug (*ptr);
250  else
251    fprintf (stderr, "<nil>\n");
252}
253
254
255/* Dumps the affine function described by FN to the file OUTF.  */
256
257DEBUG_FUNCTION void
258dump_affine_function (FILE *outf, affine_fn fn)
259{
260  unsigned i;
261  tree coef;
262
263  print_generic_expr (outf, fn[0], TDF_SLIM);
264  for (i = 1; fn.iterate (i, &coef); i++)
265    {
266      fprintf (outf, " + ");
267      print_generic_expr (outf, coef, TDF_SLIM);
268      fprintf (outf, " * x_%u", i);
269    }
270}
271
272/* Dumps the conflict function CF to the file OUTF.  */
273
274DEBUG_FUNCTION void
275dump_conflict_function (FILE *outf, conflict_function *cf)
276{
277  unsigned i;
278
279  if (cf->n == NO_DEPENDENCE)
280    fprintf (outf, "no dependence");
281  else if (cf->n == NOT_KNOWN)
282    fprintf (outf, "not known");
283  else
284    {
285      for (i = 0; i < cf->n; i++)
286	{
287	  if (i != 0)
288	    fprintf (outf, " ");
289	  fprintf (outf, "[");
290	  dump_affine_function (outf, cf->fns[i]);
291	  fprintf (outf, "]");
292	}
293    }
294}
295
296/* Dump function for a SUBSCRIPT structure.  */
297
298DEBUG_FUNCTION void
299dump_subscript (FILE *outf, struct subscript *subscript)
300{
301  conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
302
303  fprintf (outf, "\n (subscript \n");
304  fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
305  dump_conflict_function (outf, cf);
306  if (CF_NONTRIVIAL_P (cf))
307    {
308      tree last_iteration = SUB_LAST_CONFLICT (subscript);
309      fprintf (outf, "\n  last_conflict: ");
310      print_generic_expr (outf, last_iteration);
311    }
312
313  cf = SUB_CONFLICTS_IN_B (subscript);
314  fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
315  dump_conflict_function (outf, cf);
316  if (CF_NONTRIVIAL_P (cf))
317    {
318      tree last_iteration = SUB_LAST_CONFLICT (subscript);
319      fprintf (outf, "\n  last_conflict: ");
320      print_generic_expr (outf, last_iteration);
321    }
322
323  fprintf (outf, "\n  (Subscript distance: ");
324  print_generic_expr (outf, SUB_DISTANCE (subscript));
325  fprintf (outf, " ))\n");
326}
327
328/* Print the classic direction vector DIRV to OUTF.  */
329
330DEBUG_FUNCTION void
331print_direction_vector (FILE *outf,
332			lambda_vector dirv,
333			int length)
334{
335  int eq;
336
337  for (eq = 0; eq < length; eq++)
338    {
339      enum data_dependence_direction dir = ((enum data_dependence_direction)
340					    dirv[eq]);
341
342      switch (dir)
343	{
344	case dir_positive:
345	  fprintf (outf, "    +");
346	  break;
347	case dir_negative:
348	  fprintf (outf, "    -");
349	  break;
350	case dir_equal:
351	  fprintf (outf, "    =");
352	  break;
353	case dir_positive_or_equal:
354	  fprintf (outf, "   +=");
355	  break;
356	case dir_positive_or_negative:
357	  fprintf (outf, "   +-");
358	  break;
359	case dir_negative_or_equal:
360	  fprintf (outf, "   -=");
361	  break;
362	case dir_star:
363	  fprintf (outf, "    *");
364	  break;
365	default:
366	  fprintf (outf, "indep");
367	  break;
368	}
369    }
370  fprintf (outf, "\n");
371}
372
373/* Print a vector of direction vectors.  */
374
375DEBUG_FUNCTION void
376print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
377		   int length)
378{
379  for (lambda_vector v : dir_vects)
380    print_direction_vector (outf, v, length);
381}
382
383/* Print out a vector VEC of length N to OUTFILE.  */
384
385DEBUG_FUNCTION void
386print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
387{
388  int i;
389
390  for (i = 0; i < n; i++)
391    fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
392  fprintf (outfile, "\n");
393}
394
395/* Print a vector of distance vectors.  */
396
397DEBUG_FUNCTION void
398print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
399		    int length)
400{
401  for (lambda_vector v : dist_vects)
402    print_lambda_vector (outf, v, length);
403}
404
405/* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
406
407DEBUG_FUNCTION void
408dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
409{
410  struct data_reference *dra, *drb;
411
412  fprintf (outf, "(Data Dep: \n");
413
414  if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
415    {
416      if (ddr)
417	{
418	  dra = DDR_A (ddr);
419	  drb = DDR_B (ddr);
420	  if (dra)
421	    dump_data_reference (outf, dra);
422	  else
423	    fprintf (outf, "    (nil)\n");
424	  if (drb)
425	    dump_data_reference (outf, drb);
426	  else
427	    fprintf (outf, "    (nil)\n");
428	}
429      fprintf (outf, "    (don't know)\n)\n");
430      return;
431    }
432
433  dra = DDR_A (ddr);
434  drb = DDR_B (ddr);
435  dump_data_reference (outf, dra);
436  dump_data_reference (outf, drb);
437
438  if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
439    fprintf (outf, "    (no dependence)\n");
440
441  else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
442    {
443      unsigned int i;
444      class loop *loopi;
445
446      subscript *sub;
447      FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
448	{
449	  fprintf (outf, "  access_fn_A: ");
450	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
451	  fprintf (outf, "  access_fn_B: ");
452	  print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
453	  dump_subscript (outf, sub);
454	}
455
456      fprintf (outf, "  loop nest: (");
457      FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
458	fprintf (outf, "%d ", loopi->num);
459      fprintf (outf, ")\n");
460
461      for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
462	{
463	  fprintf (outf, "  distance_vector: ");
464	  print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
465			       DDR_NB_LOOPS (ddr));
466	}
467
468      for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
469	{
470	  fprintf (outf, "  direction_vector: ");
471	  print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
472				  DDR_NB_LOOPS (ddr));
473	}
474    }
475
476  fprintf (outf, ")\n");
477}
478
479/* Debug version.  */
480
481DEBUG_FUNCTION void
482debug_data_dependence_relation (const struct data_dependence_relation *ddr)
483{
484  dump_data_dependence_relation (stderr, ddr);
485}
486
487/* Dump into FILE all the dependence relations from DDRS.  */
488
489DEBUG_FUNCTION void
490dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
491{
492  for (auto ddr : ddrs)
493    dump_data_dependence_relation (file, ddr);
494}
495
496DEBUG_FUNCTION void
497debug (vec<ddr_p> &ref)
498{
499  dump_data_dependence_relations (stderr, ref);
500}
501
502DEBUG_FUNCTION void
503debug (vec<ddr_p> *ptr)
504{
505  if (ptr)
506    debug (*ptr);
507  else
508    fprintf (stderr, "<nil>\n");
509}
510
511
512/* Dump to STDERR all the dependence relations from DDRS.  */
513
514DEBUG_FUNCTION void
515debug_data_dependence_relations (vec<ddr_p> ddrs)
516{
517  dump_data_dependence_relations (stderr, ddrs);
518}
519
520/* Dumps the distance and direction vectors in FILE.  DDRS contains
521   the dependence relations, and VECT_SIZE is the size of the
522   dependence vectors, or in other words the number of loops in the
523   considered nest.  */
524
525DEBUG_FUNCTION void
526dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
527{
528  for (data_dependence_relation *ddr : ddrs)
529    if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
530      {
531	for (lambda_vector v : DDR_DIST_VECTS (ddr))
532	  {
533	    fprintf (file, "DISTANCE_V (");
534	    print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
535	    fprintf (file, ")\n");
536	  }
537
538	for (lambda_vector v : DDR_DIR_VECTS (ddr))
539	  {
540	    fprintf (file, "DIRECTION_V (");
541	    print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
542	    fprintf (file, ")\n");
543	  }
544      }
545
546  fprintf (file, "\n\n");
547}
548
549/* Dumps the data dependence relations DDRS in FILE.  */
550
551DEBUG_FUNCTION void
552dump_ddrs (FILE *file, vec<ddr_p> ddrs)
553{
554  for (data_dependence_relation *ddr : ddrs)
555    dump_data_dependence_relation (file, ddr);
556
557  fprintf (file, "\n\n");
558}
559
560DEBUG_FUNCTION void
561debug_ddrs (vec<ddr_p> ddrs)
562{
563  dump_ddrs (stderr, ddrs);
564}
565
566/* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
567   OP0 CODE OP1, where:
568
569   - OP0 CODE OP1 has integral type TYPE
570   - the range of OP0 is given by OP0_RANGE and
571   - the range of OP1 is given by OP1_RANGE.
572
573   Independently of RESULT_RANGE, try to compute:
574
575     DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
576	     - (sizetype) (OP0 CODE OP1)
577
578   as a constant and subtract DELTA from the ssizetype constant in *OFF.
579   Return true on success, or false if DELTA is not known at compile time.
580
581   Truncation and sign changes are known to distribute over CODE, i.e.
582
583     (itype) (A CODE B) == (itype) A CODE (itype) B
584
585   for any integral type ITYPE whose precision is no greater than the
586   precision of A and B.  */
587
588static bool
589compute_distributive_range (tree type, value_range &op0_range,
590			    tree_code code, value_range &op1_range,
591			    tree *off, value_range *result_range)
592{
593  gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
594  if (result_range)
595    {
596      range_operator *op = range_op_handler (code, type);
597      op->fold_range (*result_range, type, op0_range, op1_range);
598    }
599
600  /* The distributive property guarantees that if TYPE is no narrower
601     than SIZETYPE,
602
603       (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
604
605     and so we can treat DELTA as zero.  */
606  if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
607    return true;
608
609  /* If overflow is undefined, we can assume that:
610
611       X == (ssizetype) OP0 CODE (ssizetype) OP1
612
613     is within the range of TYPE, i.e.:
614
615       X == (ssizetype) (TYPE) X
616
617     Distributing the (TYPE) truncation over X gives:
618
619       X == (ssizetype) (OP0 CODE OP1)
620
621     Casting both sides to sizetype and distributing the sizetype cast
622     over X gives:
623
624       (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
625
626     and so we can treat DELTA as zero.  */
627  if (TYPE_OVERFLOW_UNDEFINED (type))
628    return true;
629
630  /* Compute the range of:
631
632       (ssizetype) OP0 CODE (ssizetype) OP1
633
634     The distributive property guarantees that this has the same bitpattern as:
635
636       (sizetype) OP0 CODE (sizetype) OP1
637
638     but its range is more conducive to analysis.  */
639  range_cast (op0_range, ssizetype);
640  range_cast (op1_range, ssizetype);
641  value_range wide_range;
642  range_operator *op = range_op_handler (code, ssizetype);
643  bool saved_flag_wrapv = flag_wrapv;
644  flag_wrapv = 1;
645  op->fold_range (wide_range, ssizetype, op0_range, op1_range);
646  flag_wrapv = saved_flag_wrapv;
647  if (wide_range.num_pairs () != 1 || !range_int_cst_p (&wide_range))
648    return false;
649
650  wide_int lb = wide_range.lower_bound ();
651  wide_int ub = wide_range.upper_bound ();
652
653  /* Calculate the number of times that each end of the range overflows or
654     underflows TYPE.  We can only calculate DELTA if the numbers match.  */
655  unsigned int precision = TYPE_PRECISION (type);
656  if (!TYPE_UNSIGNED (type))
657    {
658      wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
659      lb -= type_min;
660      ub -= type_min;
661    }
662  wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
663  lb &= upper_bits;
664  ub &= upper_bits;
665  if (lb != ub)
666    return false;
667
668  /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
669     negative values indicating underflow.  The low PRECISION bits of LB
670     are clear, so DELTA is therefore LB (== UB).  */
671  *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
672  return true;
673}
674
675/* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
676   given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
677   FROM_TYPE are integral types.  */
678
679static bool
680nop_conversion_for_offset_p (tree to_type, tree from_type, value_range &range)
681{
682  gcc_assert (INTEGRAL_TYPE_P (to_type)
683	      && INTEGRAL_TYPE_P (from_type)
684	      && !TYPE_OVERFLOW_TRAPS (to_type)
685	      && !TYPE_OVERFLOW_TRAPS (from_type));
686
687  /* Converting to something no narrower than sizetype and then to sizetype
688     is equivalent to converting directly to sizetype.  */
689  if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
690    return true;
691
692  /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
693  if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
694      && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
695    return true;
696
697  /* For narrowing conversions, we could in principle test whether
698     the bits in FROM_TYPE but not in TO_TYPE have a fixed value
699     and apply a constant adjustment.
700
701     For other conversions (which involve a sign change) we could
702     check that the signs are always equal, and apply a constant
703     adjustment if the signs are negative.
704
705     However, both cases should be rare.  */
706  return range_fits_type_p (&range, TYPE_PRECISION (to_type),
707			    TYPE_SIGN (to_type));
708}
709
710static void
711split_constant_offset (tree type, tree *var, tree *off,
712		       value_range *result_range,
713		       hash_map<tree, std::pair<tree, tree> > &cache,
714		       unsigned *limit);
715
716/* Helper function for split_constant_offset.  If TYPE is a pointer type,
717   try to express OP0 CODE OP1 as:
718
719     POINTER_PLUS <*VAR, (sizetype) *OFF>
720
721   where:
722
723   - *VAR has type TYPE
724   - *OFF is a constant of type ssizetype.
725
726   If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
727
728     *VAR + (sizetype) *OFF
729
730   where:
731
732   - *VAR has type sizetype
733   - *OFF is a constant of type ssizetype.
734
735   In both cases, OP0 CODE OP1 has type TYPE.
736
737   Return true on success.  A false return value indicates that we can't
738   do better than set *OFF to zero.
739
740   When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
741   if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
742
743   CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
744   visited.  LIMIT counts down the number of SSA names that we are
745   allowed to process before giving up.  */
746
747static bool
748split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
749			 tree *var, tree *off, value_range *result_range,
750			 hash_map<tree, std::pair<tree, tree> > &cache,
751			 unsigned *limit)
752{
753  tree var0, var1;
754  tree off0, off1;
755  value_range op0_range, op1_range;
756
757  *var = NULL_TREE;
758  *off = NULL_TREE;
759
760  if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
761    return false;
762
763  switch (code)
764    {
765    case INTEGER_CST:
766      *var = size_int (0);
767      *off = fold_convert (ssizetype, op0);
768      if (result_range)
769	result_range->set (op0, op0);
770      return true;
771
772    case POINTER_PLUS_EXPR:
773      split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
774      split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
775      *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
776      *off = size_binop (PLUS_EXPR, off0, off1);
777      return true;
778
779    case PLUS_EXPR:
780    case MINUS_EXPR:
781      split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
782      split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
783      *off = size_binop (code, off0, off1);
784      if (!compute_distributive_range (type, op0_range, code, op1_range,
785				       off, result_range))
786	return false;
787      *var = fold_build2 (code, sizetype, var0, var1);
788      return true;
789
790    case MULT_EXPR:
791      if (TREE_CODE (op1) != INTEGER_CST)
792	return false;
793
794      split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
795      op1_range.set (op1, op1);
796      *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
797      if (!compute_distributive_range (type, op0_range, code, op1_range,
798				       off, result_range))
799	return false;
800      *var = fold_build2 (MULT_EXPR, sizetype, var0,
801			  fold_convert (sizetype, op1));
802      return true;
803
804    case ADDR_EXPR:
805      {
806	tree base, poffset;
807	poly_int64 pbitsize, pbitpos, pbytepos;
808	machine_mode pmode;
809	int punsignedp, preversep, pvolatilep;
810
811	op0 = TREE_OPERAND (op0, 0);
812	base
813	  = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
814				 &punsignedp, &preversep, &pvolatilep);
815
816	if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
817	  return false;
818	base = build_fold_addr_expr (base);
819	off0 = ssize_int (pbytepos);
820
821	if (poffset)
822	  {
823	    split_constant_offset (poffset, &poffset, &off1, nullptr,
824				   cache, limit);
825	    off0 = size_binop (PLUS_EXPR, off0, off1);
826	    base = fold_build_pointer_plus (base, poffset);
827	  }
828
829	var0 = fold_convert (type, base);
830
831	/* If variable length types are involved, punt, otherwise casts
832	   might be converted into ARRAY_REFs in gimplify_conversion.
833	   To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
834	   possibly no longer appears in current GIMPLE, might resurface.
835	   This perhaps could run
836	   if (CONVERT_EXPR_P (var0))
837	     {
838	       gimplify_conversion (&var0);
839	       // Attempt to fill in any within var0 found ARRAY_REF's
840	       // element size from corresponding op embedded ARRAY_REF,
841	       // if unsuccessful, just punt.
842	     }  */
843	while (POINTER_TYPE_P (type))
844	  type = TREE_TYPE (type);
845	if (int_size_in_bytes (type) < 0)
846	  return false;
847
848	*var = var0;
849	*off = off0;
850	return true;
851      }
852
853    case SSA_NAME:
854      {
855	if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
856	  return false;
857
858	gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
859	enum tree_code subcode;
860
861	if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
862	  return false;
863
864	subcode = gimple_assign_rhs_code (def_stmt);
865
866	/* We are using a cache to avoid un-CSEing large amounts of code.  */
867	bool use_cache = false;
868	if (!has_single_use (op0)
869	    && (subcode == POINTER_PLUS_EXPR
870		|| subcode == PLUS_EXPR
871		|| subcode == MINUS_EXPR
872		|| subcode == MULT_EXPR
873		|| subcode == ADDR_EXPR
874		|| CONVERT_EXPR_CODE_P (subcode)))
875	  {
876	    use_cache = true;
877	    bool existed;
878	    std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
879	    if (existed)
880	      {
881		if (integer_zerop (e.second))
882		  return false;
883		*var = e.first;
884		*off = e.second;
885		/* The caller sets the range in this case.  */
886		return true;
887	      }
888	    e = std::make_pair (op0, ssize_int (0));
889	  }
890
891	if (*limit == 0)
892	  return false;
893	--*limit;
894
895	var0 = gimple_assign_rhs1 (def_stmt);
896	var1 = gimple_assign_rhs2 (def_stmt);
897
898	bool res = split_constant_offset_1 (type, var0, subcode, var1,
899					    var, off, nullptr, cache, limit);
900	if (res && use_cache)
901	  *cache.get (op0) = std::make_pair (*var, *off);
902	/* The caller sets the range in this case.  */
903	return res;
904      }
905    CASE_CONVERT:
906      {
907	/* We can only handle the following conversions:
908
909	   - Conversions from one pointer type to another pointer type.
910
911	   - Conversions from one non-trapping integral type to another
912	     non-trapping integral type.  In this case, the recursive
913	     call makes sure that:
914
915	       (sizetype) OP0
916
917	     can be expressed as a sizetype operation involving VAR and OFF,
918	     and all we need to do is check whether:
919
920	       (sizetype) OP0 == (sizetype) (TYPE) OP0
921
922	   - Conversions from a non-trapping sizetype-size integral type to
923	     a like-sized pointer type.  In this case, the recursive call
924	     makes sure that:
925
926	       (sizetype) OP0 == *VAR + (sizetype) *OFF
927
928	     and we can convert that to:
929
930	       POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
931
932	   - Conversions from a sizetype-sized pointer type to a like-sized
933	     non-trapping integral type.  In this case, the recursive call
934	     makes sure that:
935
936	       OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
937
938	     where the POINTER_PLUS and *VAR have the same precision as
939	     TYPE (and the same precision as sizetype).  Then:
940
941	       (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
942	tree itype = TREE_TYPE (op0);
943	if ((POINTER_TYPE_P (itype)
944	     || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
945	    && (POINTER_TYPE_P (type)
946		|| (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
947	    && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
948		|| (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
949		    && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
950	  {
951	    if (POINTER_TYPE_P (type))
952	      {
953		split_constant_offset (op0, var, off, nullptr, cache, limit);
954		*var = fold_convert (type, *var);
955	      }
956	    else if (POINTER_TYPE_P (itype))
957	      {
958		split_constant_offset (op0, var, off, nullptr, cache, limit);
959		*var = fold_convert (sizetype, *var);
960	      }
961	    else
962	      {
963		split_constant_offset (op0, var, off, &op0_range,
964				       cache, limit);
965		if (!nop_conversion_for_offset_p (type, itype, op0_range))
966		  return false;
967		if (result_range)
968		  {
969		    *result_range = op0_range;
970		    range_cast (*result_range, type);
971		  }
972	      }
973	    return true;
974	  }
975	return false;
976      }
977
978    default:
979      return false;
980    }
981}
982
983/* If EXP has pointer type, try to express it as:
984
985     POINTER_PLUS <*VAR, (sizetype) *OFF>
986
987   where:
988
989   - *VAR has the same type as EXP
990   - *OFF is a constant of type ssizetype.
991
992   If EXP has an integral type, try to express (sizetype) EXP as:
993
994     *VAR + (sizetype) *OFF
995
996   where:
997
998   - *VAR has type sizetype
999   - *OFF is a constant of type ssizetype.
1000
1001   If EXP_RANGE is nonnull, set it to the range of EXP.
1002
1003   CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1004   visited.  LIMIT counts down the number of SSA names that we are
1005   allowed to process before giving up.  */
1006
1007static void
1008split_constant_offset (tree exp, tree *var, tree *off, value_range *exp_range,
1009		       hash_map<tree, std::pair<tree, tree> > &cache,
1010		       unsigned *limit)
1011{
1012  tree type = TREE_TYPE (exp), op0, op1;
1013  enum tree_code code;
1014
1015  code = TREE_CODE (exp);
1016  if (exp_range)
1017    {
1018      *exp_range = type;
1019      if (code == SSA_NAME)
1020	{
1021	  value_range vr;
1022	  get_range_query (cfun)->range_of_expr (vr, exp);
1023	  if (vr.undefined_p ())
1024	    vr.set_varying (TREE_TYPE (exp));
1025	  wide_int var_min = wi::to_wide (vr.min ());
1026	  wide_int var_max = wi::to_wide (vr.max ());
1027	  value_range_kind vr_kind = vr.kind ();
1028	  wide_int var_nonzero = get_nonzero_bits (exp);
1029	  vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1030						       &var_min, &var_max,
1031						       var_nonzero,
1032						       TYPE_SIGN (type));
1033	  /* This check for VR_VARYING is here because the old code
1034	     using get_range_info would return VR_RANGE for the entire
1035	     domain, instead of VR_VARYING.  The new code normalizes
1036	     full-domain ranges to VR_VARYING.  */
1037	  if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1038	    *exp_range = value_range (type, var_min, var_max);
1039	}
1040    }
1041
1042  if (!tree_is_chrec (exp)
1043      && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1044    {
1045      extract_ops_from_tree (exp, &code, &op0, &op1);
1046      if (split_constant_offset_1 (type, op0, code, op1, var, off,
1047				   exp_range, cache, limit))
1048	return;
1049    }
1050
1051  *var = exp;
1052  if (INTEGRAL_TYPE_P (type))
1053    *var = fold_convert (sizetype, *var);
1054  *off = ssize_int (0);
1055
1056  value_range r;
1057  if (exp_range && code != SSA_NAME
1058      && get_range_query (cfun)->range_of_expr (r, exp)
1059      && !r.undefined_p ())
1060    *exp_range = r;
1061}
1062
1063/* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
1064   type as EXP while OFF has type ssizetype.  */
1065
1066void
1067split_constant_offset (tree exp, tree *var, tree *off)
1068{
1069  unsigned limit = param_ssa_name_def_chain_limit;
1070  static hash_map<tree, std::pair<tree, tree> > *cache;
1071  if (!cache)
1072    cache = new hash_map<tree, std::pair<tree, tree> > (37);
1073  split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1074  *var = fold_convert (TREE_TYPE (exp), *var);
1075  cache->empty ();
1076}
1077
1078/* Returns the address ADDR of an object in a canonical shape (without nop
1079   casts, and with type of pointer to the object).  */
1080
1081static tree
1082canonicalize_base_object_address (tree addr)
1083{
1084  tree orig = addr;
1085
1086  STRIP_NOPS (addr);
1087
1088  /* The base address may be obtained by casting from integer, in that case
1089     keep the cast.  */
1090  if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1091    return orig;
1092
1093  if (TREE_CODE (addr) != ADDR_EXPR)
1094    return addr;
1095
1096  return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1097}
1098
1099/* Analyze the behavior of memory reference REF within STMT.
1100   There are two modes:
1101
1102   - BB analysis.  In this case we simply split the address into base,
1103     init and offset components, without reference to any containing loop.
1104     The resulting base and offset are general expressions and they can
1105     vary arbitrarily from one iteration of the containing loop to the next.
1106     The step is always zero.
1107
1108   - loop analysis.  In this case we analyze the reference both wrt LOOP
1109     and on the basis that the reference occurs (is "used") in LOOP;
1110     see the comment above analyze_scalar_evolution_in_loop for more
1111     information about this distinction.  The base, init, offset and
1112     step fields are all invariant in LOOP.
1113
1114   Perform BB analysis if LOOP is null, or if LOOP is the function's
1115   dummy outermost loop.  In other cases perform loop analysis.
1116
1117   Return true if the analysis succeeded and store the results in DRB if so.
1118   BB analysis can only fail for bitfield or reversed-storage accesses.  */
1119
1120opt_result
1121dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1122		      class loop *loop, const gimple *stmt)
1123{
1124  poly_int64 pbitsize, pbitpos;
1125  tree base, poffset;
1126  machine_mode pmode;
1127  int punsignedp, preversep, pvolatilep;
1128  affine_iv base_iv, offset_iv;
1129  tree init, dinit, step;
1130  bool in_loop = (loop && loop->num);
1131
1132  if (dump_file && (dump_flags & TDF_DETAILS))
1133    fprintf (dump_file, "analyze_innermost: ");
1134
1135  base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1136			      &punsignedp, &preversep, &pvolatilep);
1137  gcc_assert (base != NULL_TREE);
1138
1139  poly_int64 pbytepos;
1140  if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1141    return opt_result::failure_at (stmt,
1142				   "failed: bit offset alignment.\n");
1143
1144  if (preversep)
1145    return opt_result::failure_at (stmt,
1146				   "failed: reverse storage order.\n");
1147
1148  /* Calculate the alignment and misalignment for the inner reference.  */
1149  unsigned int HOST_WIDE_INT bit_base_misalignment;
1150  unsigned int bit_base_alignment;
1151  get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1152
1153  /* There are no bitfield references remaining in BASE, so the values
1154     we got back must be whole bytes.  */
1155  gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1156	      && bit_base_misalignment % BITS_PER_UNIT == 0);
1157  unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1158  poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1159
1160  if (TREE_CODE (base) == MEM_REF)
1161    {
1162      if (!integer_zerop (TREE_OPERAND (base, 1)))
1163	{
1164	  /* Subtract MOFF from the base and add it to POFFSET instead.
1165	     Adjust the misalignment to reflect the amount we subtracted.  */
1166	  poly_offset_int moff = mem_ref_offset (base);
1167	  base_misalignment -= moff.force_shwi ();
1168	  tree mofft = wide_int_to_tree (sizetype, moff);
1169	  if (!poffset)
1170	    poffset = mofft;
1171	  else
1172	    poffset = size_binop (PLUS_EXPR, poffset, mofft);
1173	}
1174      base = TREE_OPERAND (base, 0);
1175    }
1176  else
1177    base = build_fold_addr_expr (base);
1178
1179  if (in_loop)
1180    {
1181      if (!simple_iv (loop, loop, base, &base_iv, true))
1182	return opt_result::failure_at
1183	  (stmt, "failed: evolution of base is not affine.\n");
1184    }
1185  else
1186    {
1187      base_iv.base = base;
1188      base_iv.step = ssize_int (0);
1189      base_iv.no_overflow = true;
1190    }
1191
1192  if (!poffset)
1193    {
1194      offset_iv.base = ssize_int (0);
1195      offset_iv.step = ssize_int (0);
1196    }
1197  else
1198    {
1199      if (!in_loop)
1200        {
1201          offset_iv.base = poffset;
1202          offset_iv.step = ssize_int (0);
1203        }
1204      else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1205	return opt_result::failure_at
1206	  (stmt, "failed: evolution of offset is not affine.\n");
1207    }
1208
1209  init = ssize_int (pbytepos);
1210
1211  /* Subtract any constant component from the base and add it to INIT instead.
1212     Adjust the misalignment to reflect the amount we subtracted.  */
1213  split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1214  init = size_binop (PLUS_EXPR, init, dinit);
1215  base_misalignment -= TREE_INT_CST_LOW (dinit);
1216
1217  split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1218  init = size_binop (PLUS_EXPR, init, dinit);
1219
1220  step = size_binop (PLUS_EXPR,
1221		     fold_convert (ssizetype, base_iv.step),
1222		     fold_convert (ssizetype, offset_iv.step));
1223
1224  base = canonicalize_base_object_address (base_iv.base);
1225
1226  /* See if get_pointer_alignment can guarantee a higher alignment than
1227     the one we calculated above.  */
1228  unsigned int HOST_WIDE_INT alt_misalignment;
1229  unsigned int alt_alignment;
1230  get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1231
1232  /* As above, these values must be whole bytes.  */
1233  gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1234	      && alt_misalignment % BITS_PER_UNIT == 0);
1235  alt_alignment /= BITS_PER_UNIT;
1236  alt_misalignment /= BITS_PER_UNIT;
1237
1238  if (base_alignment < alt_alignment)
1239    {
1240      base_alignment = alt_alignment;
1241      base_misalignment = alt_misalignment;
1242    }
1243
1244  drb->base_address = base;
1245  drb->offset = fold_convert (ssizetype, offset_iv.base);
1246  drb->init = init;
1247  drb->step = step;
1248  if (known_misalignment (base_misalignment, base_alignment,
1249			  &drb->base_misalignment))
1250    drb->base_alignment = base_alignment;
1251  else
1252    {
1253      drb->base_alignment = known_alignment (base_misalignment);
1254      drb->base_misalignment = 0;
1255    }
1256  drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1257  drb->step_alignment = highest_pow2_factor (step);
1258
1259  if (dump_file && (dump_flags & TDF_DETAILS))
1260    fprintf (dump_file, "success.\n");
1261
1262  return opt_result::success ();
1263}
1264
1265/* Return true if OP is a valid component reference for a DR access
1266   function.  This accepts a subset of what handled_component_p accepts.  */
1267
1268static bool
1269access_fn_component_p (tree op)
1270{
1271  switch (TREE_CODE (op))
1272    {
1273    case REALPART_EXPR:
1274    case IMAGPART_EXPR:
1275    case ARRAY_REF:
1276      return true;
1277
1278    case COMPONENT_REF:
1279      return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1280
1281    default:
1282      return false;
1283    }
1284}
1285
1286/* Returns whether BASE can have a access_fn_component_p with BASE
1287   as base.  */
1288
1289static bool
1290base_supports_access_fn_components_p (tree base)
1291{
1292  switch (TREE_CODE (TREE_TYPE (base)))
1293    {
1294    case COMPLEX_TYPE:
1295    case ARRAY_TYPE:
1296    case RECORD_TYPE:
1297      return true;
1298    default:
1299      return false;
1300    }
1301}
1302
1303/* Determines the base object and the list of indices of memory reference
1304   DR, analyzed in LOOP and instantiated before NEST.  */
1305
1306static void
1307dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1308{
1309  /* If analyzing a basic-block there are no indices to analyze
1310     and thus no access functions.  */
1311  if (!nest)
1312    {
1313      dri->base_object = ref;
1314      dri->access_fns.create (0);
1315      return;
1316    }
1317
1318  vec<tree> access_fns = vNULL;
1319
1320  /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1321     into a two element array with a constant index.  The base is
1322     then just the immediate underlying object.  */
1323  if (TREE_CODE (ref) == REALPART_EXPR)
1324    {
1325      ref = TREE_OPERAND (ref, 0);
1326      access_fns.safe_push (integer_zero_node);
1327    }
1328  else if (TREE_CODE (ref) == IMAGPART_EXPR)
1329    {
1330      ref = TREE_OPERAND (ref, 0);
1331      access_fns.safe_push (integer_one_node);
1332    }
1333
1334  /* Analyze access functions of dimensions we know to be independent.
1335     The list of component references handled here should be kept in
1336     sync with access_fn_component_p.  */
1337  while (handled_component_p (ref))
1338    {
1339      if (TREE_CODE (ref) == ARRAY_REF)
1340	{
1341	  tree op = TREE_OPERAND (ref, 1);
1342	  tree access_fn = analyze_scalar_evolution (loop, op);
1343	  access_fn = instantiate_scev (nest, loop, access_fn);
1344	  access_fns.safe_push (access_fn);
1345	}
1346      else if (TREE_CODE (ref) == COMPONENT_REF
1347	       && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1348	{
1349	  /* For COMPONENT_REFs of records (but not unions!) use the
1350	     FIELD_DECL offset as constant access function so we can
1351	     disambiguate a[i].f1 and a[i].f2.  */
1352	  tree off = component_ref_field_offset (ref);
1353	  off = size_binop (PLUS_EXPR,
1354			    size_binop (MULT_EXPR,
1355					fold_convert (bitsizetype, off),
1356					bitsize_int (BITS_PER_UNIT)),
1357			    DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1358	  access_fns.safe_push (off);
1359	}
1360      else
1361	/* If we have an unhandled component we could not translate
1362	   to an access function stop analyzing.  We have determined
1363	   our base object in this case.  */
1364	break;
1365
1366      ref = TREE_OPERAND (ref, 0);
1367    }
1368
1369  /* If the address operand of a MEM_REF base has an evolution in the
1370     analyzed nest, add it as an additional independent access-function.  */
1371  if (TREE_CODE (ref) == MEM_REF)
1372    {
1373      tree op = TREE_OPERAND (ref, 0);
1374      tree access_fn = analyze_scalar_evolution (loop, op);
1375      access_fn = instantiate_scev (nest, loop, access_fn);
1376      STRIP_NOPS (access_fn);
1377      if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1378	{
1379	  tree memoff = TREE_OPERAND (ref, 1);
1380	  tree base = initial_condition (access_fn);
1381	  tree orig_type = TREE_TYPE (base);
1382	  STRIP_USELESS_TYPE_CONVERSION (base);
1383	  tree off;
1384	  split_constant_offset (base, &base, &off);
1385	  STRIP_USELESS_TYPE_CONVERSION (base);
1386	  /* Fold the MEM_REF offset into the evolutions initial
1387	     value to make more bases comparable.  */
1388	  if (!integer_zerop (memoff))
1389	    {
1390	      off = size_binop (PLUS_EXPR, off,
1391				fold_convert (ssizetype, memoff));
1392	      memoff = build_int_cst (TREE_TYPE (memoff), 0);
1393	    }
1394	  /* Adjust the offset so it is a multiple of the access type
1395	     size and thus we separate bases that can possibly be used
1396	     to produce partial overlaps (which the access_fn machinery
1397	     cannot handle).  */
1398	  wide_int rem;
1399	  if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1400	      && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1401	      && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1402	    rem = wi::mod_trunc
1403	      (wi::to_wide (off),
1404	       wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1405	       SIGNED);
1406	  else
1407	    /* If we can't compute the remainder simply force the initial
1408	       condition to zero.  */
1409	    rem = wi::to_wide (off);
1410	  off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1411	  memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1412	  /* And finally replace the initial condition.  */
1413	  access_fn = chrec_replace_initial_condition
1414	      (access_fn, fold_convert (orig_type, off));
1415	  /* ???  This is still not a suitable base object for
1416	     dr_may_alias_p - the base object needs to be an
1417	     access that covers the object as whole.  With
1418	     an evolution in the pointer this cannot be
1419	     guaranteed.
1420	     As a band-aid, mark the access so we can special-case
1421	     it in dr_may_alias_p.  */
1422	  tree old = ref;
1423	  ref = fold_build2_loc (EXPR_LOCATION (ref),
1424				 MEM_REF, TREE_TYPE (ref),
1425				 base, memoff);
1426	  MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1427	  MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1428	  dri->unconstrained_base = true;
1429	  access_fns.safe_push (access_fn);
1430	}
1431    }
1432  else if (DECL_P (ref))
1433    {
1434      /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1435      ref = build2 (MEM_REF, TREE_TYPE (ref),
1436		    build_fold_addr_expr (ref),
1437		    build_int_cst (reference_alias_ptr_type (ref), 0));
1438    }
1439
1440  dri->base_object = ref;
1441  dri->access_fns = access_fns;
1442}
1443
1444/* Extracts the alias analysis information from the memory reference DR.  */
1445
1446static void
1447dr_analyze_alias (struct data_reference *dr)
1448{
1449  tree ref = DR_REF (dr);
1450  tree base = get_base_address (ref), addr;
1451
1452  if (INDIRECT_REF_P (base)
1453      || TREE_CODE (base) == MEM_REF)
1454    {
1455      addr = TREE_OPERAND (base, 0);
1456      if (TREE_CODE (addr) == SSA_NAME)
1457	DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1458    }
1459}
1460
1461/* Frees data reference DR.  */
1462
1463void
1464free_data_ref (data_reference_p dr)
1465{
1466  DR_ACCESS_FNS (dr).release ();
1467  if (dr->alt_indices.base_object)
1468    dr->alt_indices.access_fns.release ();
1469  free (dr);
1470}
1471
1472/* Analyze memory reference MEMREF, which is accessed in STMT.
1473   The reference is a read if IS_READ is true, otherwise it is a write.
1474   IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1475   within STMT, i.e. that it might not occur even if STMT is executed
1476   and runs to completion.
1477
1478   Return the data_reference description of MEMREF.  NEST is the outermost
1479   loop in which the reference should be instantiated, LOOP is the loop
1480   in which the data reference should be analyzed.  */
1481
1482struct data_reference *
1483create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1484		 bool is_read, bool is_conditional_in_stmt)
1485{
1486  struct data_reference *dr;
1487
1488  if (dump_file && (dump_flags & TDF_DETAILS))
1489    {
1490      fprintf (dump_file, "Creating dr for ");
1491      print_generic_expr (dump_file, memref, TDF_SLIM);
1492      fprintf (dump_file, "\n");
1493    }
1494
1495  dr = XCNEW (struct data_reference);
1496  DR_STMT (dr) = stmt;
1497  DR_REF (dr) = memref;
1498  DR_IS_READ (dr) = is_read;
1499  DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1500
1501  dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1502			nest != NULL ? loop : NULL, stmt);
1503  dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1504  dr_analyze_alias (dr);
1505
1506  if (dump_file && (dump_flags & TDF_DETAILS))
1507    {
1508      unsigned i;
1509      fprintf (dump_file, "\tbase_address: ");
1510      print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1511      fprintf (dump_file, "\n\toffset from base address: ");
1512      print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1513      fprintf (dump_file, "\n\tconstant offset from base address: ");
1514      print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1515      fprintf (dump_file, "\n\tstep: ");
1516      print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1517      fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1518      fprintf (dump_file, "\n\tbase misalignment: %d",
1519	       DR_BASE_MISALIGNMENT (dr));
1520      fprintf (dump_file, "\n\toffset alignment: %d",
1521	       DR_OFFSET_ALIGNMENT (dr));
1522      fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1523      fprintf (dump_file, "\n\tbase_object: ");
1524      print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1525      fprintf (dump_file, "\n");
1526      for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1527	{
1528	  fprintf (dump_file, "\tAccess function %d: ", i);
1529	  print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1530	}
1531    }
1532
1533  return dr;
1534}
1535
1536/*  A helper function computes order between two tree expressions T1 and T2.
1537    This is used in comparator functions sorting objects based on the order
1538    of tree expressions.  The function returns -1, 0, or 1.  */
1539
1540int
1541data_ref_compare_tree (tree t1, tree t2)
1542{
1543  int i, cmp;
1544  enum tree_code code;
1545  char tclass;
1546
1547  if (t1 == t2)
1548    return 0;
1549  if (t1 == NULL)
1550    return -1;
1551  if (t2 == NULL)
1552    return 1;
1553
1554  STRIP_USELESS_TYPE_CONVERSION (t1);
1555  STRIP_USELESS_TYPE_CONVERSION (t2);
1556  if (t1 == t2)
1557    return 0;
1558
1559  if (TREE_CODE (t1) != TREE_CODE (t2)
1560      && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1561    return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1562
1563  code = TREE_CODE (t1);
1564  switch (code)
1565    {
1566    case INTEGER_CST:
1567      return tree_int_cst_compare (t1, t2);
1568
1569    case STRING_CST:
1570      if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1571	return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1572      return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1573		     TREE_STRING_LENGTH (t1));
1574
1575    case SSA_NAME:
1576      if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1577	return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1578      break;
1579
1580    default:
1581      if (POLY_INT_CST_P (t1))
1582	return compare_sizes_for_sort (wi::to_poly_widest (t1),
1583				       wi::to_poly_widest (t2));
1584
1585      tclass = TREE_CODE_CLASS (code);
1586
1587      /* For decls, compare their UIDs.  */
1588      if (tclass == tcc_declaration)
1589	{
1590	  if (DECL_UID (t1) != DECL_UID (t2))
1591	    return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1592	  break;
1593	}
1594      /* For expressions, compare their operands recursively.  */
1595      else if (IS_EXPR_CODE_CLASS (tclass))
1596	{
1597	  for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1598	    {
1599	      cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1600					   TREE_OPERAND (t2, i));
1601	      if (cmp != 0)
1602		return cmp;
1603	    }
1604	}
1605      else
1606	gcc_unreachable ();
1607    }
1608
1609  return 0;
1610}
1611
1612/* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1613   check.  */
1614
1615opt_result
1616runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1617{
1618  if (dump_enabled_p ())
1619    dump_printf (MSG_NOTE,
1620		 "consider run-time aliasing test between %T and %T\n",
1621		 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1622
1623  if (!speed_p)
1624    return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1625				   "runtime alias check not supported when"
1626				   " optimizing for size.\n");
1627
1628  /* FORNOW: We don't support versioning with outer-loop in either
1629     vectorization or loop distribution.  */
1630  if (loop != NULL && loop->inner != NULL)
1631    return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1632				   "runtime alias check not supported for"
1633				   " outer loop.\n");
1634
1635  return opt_result::success ();
1636}
1637
1638/* Operator == between two dr_with_seg_len objects.
1639
1640   This equality operator is used to make sure two data refs
1641   are the same one so that we will consider to combine the
1642   aliasing checks of those two pairs of data dependent data
1643   refs.  */
1644
1645static bool
1646operator == (const dr_with_seg_len& d1,
1647	     const dr_with_seg_len& d2)
1648{
1649  return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1650			   DR_BASE_ADDRESS (d2.dr), 0)
1651	  && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1652	  && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1653	  && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1654	  && known_eq (d1.access_size, d2.access_size)
1655	  && d1.align == d2.align);
1656}
1657
1658/* Comparison function for sorting objects of dr_with_seg_len_pair_t
1659   so that we can combine aliasing checks in one scan.  */
1660
1661static int
1662comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1663{
1664  const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1665  const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1666  const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1667  const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1668
1669  /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1670     if a and c have the same basic address snd step, and b and d have the same
1671     address and step.  Therefore, if any a&c or b&d don't have the same address
1672     and step, we don't care the order of those two pairs after sorting.  */
1673  int comp_res;
1674
1675  if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1676					 DR_BASE_ADDRESS (b1.dr))) != 0)
1677    return comp_res;
1678  if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1679					 DR_BASE_ADDRESS (b2.dr))) != 0)
1680    return comp_res;
1681  if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1682					 DR_STEP (b1.dr))) != 0)
1683    return comp_res;
1684  if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1685					 DR_STEP (b2.dr))) != 0)
1686    return comp_res;
1687  if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1688					 DR_OFFSET (b1.dr))) != 0)
1689    return comp_res;
1690  if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1691					 DR_INIT (b1.dr))) != 0)
1692    return comp_res;
1693  if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1694					 DR_OFFSET (b2.dr))) != 0)
1695    return comp_res;
1696  if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1697					 DR_INIT (b2.dr))) != 0)
1698    return comp_res;
1699
1700  return 0;
1701}
1702
1703/* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1704
1705static void
1706dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1707{
1708  dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1709	       DR_REF (alias_pair->first.dr),
1710	       DR_REF (alias_pair->second.dr));
1711
1712  dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1713	       alias_pair->first.seg_len);
1714  if (!operand_equal_p (alias_pair->first.seg_len,
1715			alias_pair->second.seg_len, 0))
1716    dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1717
1718  dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1719  dump_dec (MSG_NOTE, alias_pair->first.access_size);
1720  if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1721    {
1722      dump_printf (MSG_NOTE, " vs. ");
1723      dump_dec (MSG_NOTE, alias_pair->second.access_size);
1724    }
1725
1726  dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1727	       alias_pair->first.align);
1728  if (alias_pair->first.align != alias_pair->second.align)
1729    dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1730
1731  dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1732  if (alias_pair->flags & DR_ALIAS_RAW)
1733    dump_printf (MSG_NOTE, " RAW");
1734  if (alias_pair->flags & DR_ALIAS_WAR)
1735    dump_printf (MSG_NOTE, " WAR");
1736  if (alias_pair->flags & DR_ALIAS_WAW)
1737    dump_printf (MSG_NOTE, " WAW");
1738  if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1739    dump_printf (MSG_NOTE, " ARBITRARY");
1740  if (alias_pair->flags & DR_ALIAS_SWAPPED)
1741    dump_printf (MSG_NOTE, " SWAPPED");
1742  if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1743    dump_printf (MSG_NOTE, " UNSWAPPED");
1744  if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1745    dump_printf (MSG_NOTE, " MIXED_STEPS");
1746  if (alias_pair->flags == 0)
1747    dump_printf (MSG_NOTE, " <none>");
1748  dump_printf (MSG_NOTE, "\n");
1749}
1750
1751/* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1752   FACTOR is number of iterations that each data reference is accessed.
1753
1754   Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1755   we create an expression:
1756
1757   ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1758   || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1759
1760   for aliasing checks.  However, in some cases we can decrease the number
1761   of checks by combining two checks into one.  For example, suppose we have
1762   another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1763   condition is satisfied:
1764
1765   load_ptr_0 < load_ptr_1  &&
1766   load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1767
1768   (this condition means, in each iteration of vectorized loop, the accessed
1769   memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1770   load_ptr_1.)
1771
1772   we then can use only the following expression to finish the alising checks
1773   between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1774
1775   ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1776   || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1777
1778   Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1779   basic address.  */
1780
1781void
1782prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1783			       poly_uint64)
1784{
1785  if (alias_pairs->is_empty ())
1786    return;
1787
1788  /* Canonicalize each pair so that the base components are ordered wrt
1789     data_ref_compare_tree.  This allows the loop below to merge more
1790     cases.  */
1791  unsigned int i;
1792  dr_with_seg_len_pair_t *alias_pair;
1793  FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1794    {
1795      data_reference_p dr_a = alias_pair->first.dr;
1796      data_reference_p dr_b = alias_pair->second.dr;
1797      int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1798					    DR_BASE_ADDRESS (dr_b));
1799      if (comp_res == 0)
1800	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1801      if (comp_res == 0)
1802	comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1803      if (comp_res > 0)
1804	{
1805	  std::swap (alias_pair->first, alias_pair->second);
1806	  alias_pair->flags |= DR_ALIAS_SWAPPED;
1807	}
1808      else
1809	alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1810    }
1811
1812  /* Sort the collected data ref pairs so that we can scan them once to
1813     combine all possible aliasing checks.  */
1814  alias_pairs->qsort (comp_dr_with_seg_len_pair);
1815
1816  /* Scan the sorted dr pairs and check if we can combine alias checks
1817     of two neighboring dr pairs.  */
1818  unsigned int last = 0;
1819  for (i = 1; i < alias_pairs->length (); ++i)
1820    {
1821      /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1822      dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1823      dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1824
1825      dr_with_seg_len *dr_a1 = &alias_pair1->first;
1826      dr_with_seg_len *dr_b1 = &alias_pair1->second;
1827      dr_with_seg_len *dr_a2 = &alias_pair2->first;
1828      dr_with_seg_len *dr_b2 = &alias_pair2->second;
1829
1830      /* Remove duplicate data ref pairs.  */
1831      if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1832	{
1833	  if (dump_enabled_p ())
1834	    dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1835			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1836			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1837	  alias_pair1->flags |= alias_pair2->flags;
1838	  continue;
1839	}
1840
1841      /* Assume that we won't be able to merge the pairs, then correct
1842	 if we do.  */
1843      last += 1;
1844      if (last != i)
1845	(*alias_pairs)[last] = (*alias_pairs)[i];
1846
1847      if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1848	{
1849	  /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1850	     and DR_A1 and DR_A2 are two consecutive memrefs.  */
1851	  if (*dr_a1 == *dr_a2)
1852	    {
1853	      std::swap (dr_a1, dr_b1);
1854	      std::swap (dr_a2, dr_b2);
1855	    }
1856
1857	  poly_int64 init_a1, init_a2;
1858	  /* Only consider cases in which the distance between the initial
1859	     DR_A1 and the initial DR_A2 is known at compile time.  */
1860	  if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1861				DR_BASE_ADDRESS (dr_a2->dr), 0)
1862	      || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1863				   DR_OFFSET (dr_a2->dr), 0)
1864	      || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1865	      || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1866	    continue;
1867
1868	  /* Don't combine if we can't tell which one comes first.  */
1869	  if (!ordered_p (init_a1, init_a2))
1870	    continue;
1871
1872	  /* Work out what the segment length would be if we did combine
1873	     DR_A1 and DR_A2:
1874
1875	     - If DR_A1 and DR_A2 have equal lengths, that length is
1876	       also the combined length.
1877
1878	     - If DR_A1 and DR_A2 both have negative "lengths", the combined
1879	       length is the lower bound on those lengths.
1880
1881	     - If DR_A1 and DR_A2 both have positive lengths, the combined
1882	       length is the upper bound on those lengths.
1883
1884	     Other cases are unlikely to give a useful combination.
1885
1886	     The lengths both have sizetype, so the sign is taken from
1887	     the step instead.  */
1888	  poly_uint64 new_seg_len = 0;
1889	  bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1890						 dr_a2->seg_len, 0);
1891	  if (new_seg_len_p)
1892	    {
1893	      poly_uint64 seg_len_a1, seg_len_a2;
1894	      if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1895		  || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1896		continue;
1897
1898	      tree indicator_a = dr_direction_indicator (dr_a1->dr);
1899	      if (TREE_CODE (indicator_a) != INTEGER_CST)
1900		continue;
1901
1902	      tree indicator_b = dr_direction_indicator (dr_a2->dr);
1903	      if (TREE_CODE (indicator_b) != INTEGER_CST)
1904		continue;
1905
1906	      int sign_a = tree_int_cst_sgn (indicator_a);
1907	      int sign_b = tree_int_cst_sgn (indicator_b);
1908
1909	      if (sign_a <= 0 && sign_b <= 0)
1910		new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1911	      else if (sign_a >= 0 && sign_b >= 0)
1912		new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1913	      else
1914		continue;
1915	    }
1916	  /* At this point we're committed to merging the refs.  */
1917
1918	  /* Make sure dr_a1 starts left of dr_a2.  */
1919	  if (maybe_gt (init_a1, init_a2))
1920	    {
1921	      std::swap (*dr_a1, *dr_a2);
1922	      std::swap (init_a1, init_a2);
1923	    }
1924
1925	  /* The DR_Bs are equal, so only the DR_As can introduce
1926	     mixed steps.  */
1927	  if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1928	    alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1929
1930	  if (new_seg_len_p)
1931	    {
1932	      dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1933					      new_seg_len);
1934	      dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1935	    }
1936
1937	  /* This is always positive due to the swap above.  */
1938	  poly_uint64 diff = init_a2 - init_a1;
1939
1940	  /* The new check will start at DR_A1.  Make sure that its access
1941	     size encompasses the initial DR_A2.  */
1942	  if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1943	    {
1944	      dr_a1->access_size = upper_bound (dr_a1->access_size,
1945						diff + dr_a2->access_size);
1946	      unsigned int new_align = known_alignment (dr_a1->access_size);
1947	      dr_a1->align = MIN (dr_a1->align, new_align);
1948	    }
1949	  if (dump_enabled_p ())
1950	    dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1951			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1952			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1953	  alias_pair1->flags |= alias_pair2->flags;
1954	  last -= 1;
1955	}
1956    }
1957  alias_pairs->truncate (last + 1);
1958
1959  /* Try to restore the original dr_with_seg_len order within each
1960     dr_with_seg_len_pair_t.  If we ended up combining swapped and
1961     unswapped pairs into the same check, we have to invalidate any
1962     RAW, WAR and WAW information for it.  */
1963  if (dump_enabled_p ())
1964    dump_printf (MSG_NOTE, "merged alias checks:\n");
1965  FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1966    {
1967      unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1968      unsigned int swapped = (alias_pair->flags & swap_mask);
1969      if (swapped == DR_ALIAS_SWAPPED)
1970	std::swap (alias_pair->first, alias_pair->second);
1971      else if (swapped != DR_ALIAS_UNSWAPPED)
1972	alias_pair->flags |= DR_ALIAS_ARBITRARY;
1973      alias_pair->flags &= ~swap_mask;
1974      if (dump_enabled_p ())
1975	dump_alias_pair (alias_pair, "  ");
1976    }
1977}
1978
1979/* A subroutine of create_intersect_range_checks, with a subset of the
1980   same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
1981   to optimize cases in which the references form a simple RAW, WAR or
1982   WAR dependence.  */
1983
1984static bool
1985create_ifn_alias_checks (tree *cond_expr,
1986			 const dr_with_seg_len_pair_t &alias_pair)
1987{
1988  const dr_with_seg_len& dr_a = alias_pair.first;
1989  const dr_with_seg_len& dr_b = alias_pair.second;
1990
1991  /* Check for cases in which:
1992
1993     (a) we have a known RAW, WAR or WAR dependence
1994     (b) the accesses are well-ordered in both the original and new code
1995	 (see the comment above the DR_ALIAS_* flags for details); and
1996     (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
1997  if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
1998    return false;
1999
2000  /* Make sure that both DRs access the same pattern of bytes,
2001     with a constant length and step.  */
2002  poly_uint64 seg_len;
2003  if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2004      || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2005      || maybe_ne (dr_a.access_size, dr_b.access_size)
2006      || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2007      || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2008    return false;
2009
2010  unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2011  tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2012  tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2013
2014  /* See whether the target suports what we want to do.  WAW checks are
2015     equivalent to WAR checks here.  */
2016  internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2017		     ? IFN_CHECK_RAW_PTRS
2018		     : IFN_CHECK_WAR_PTRS);
2019  unsigned int align = MIN (dr_a.align, dr_b.align);
2020  poly_uint64 full_length = seg_len + bytes;
2021  if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2022					   full_length, align))
2023    {
2024      full_length = seg_len + dr_a.access_size;
2025      if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2026					       full_length, align))
2027	return false;
2028    }
2029
2030  /* Commit to using this form of test.  */
2031  addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2032  addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2033
2034  addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2035  addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2036
2037  *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2038					     ifn, boolean_type_node,
2039					     4, addr_a, addr_b,
2040					     size_int (full_length),
2041					     size_int (align));
2042
2043  if (dump_enabled_p ())
2044    {
2045      if (ifn == IFN_CHECK_RAW_PTRS)
2046	dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2047      else
2048	dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2049    }
2050  return true;
2051}
2052
2053/* Try to generate a runtime condition that is true if ALIAS_PAIR is
2054   free of aliases, using a condition based on index values instead
2055   of a condition based on addresses.  Return true on success,
2056   storing the condition in *COND_EXPR.
2057
2058   This can only be done if the two data references in ALIAS_PAIR access
2059   the same array object and the index is the only difference.  For example,
2060   if the two data references are DR_A and DR_B:
2061
2062                       DR_A                           DR_B
2063      data-ref         arr[i]                         arr[j]
2064      base_object      arr                            arr
2065      index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2066
2067   The addresses and their index are like:
2068
2069        |<- ADDR_A    ->|          |<- ADDR_B    ->|
2070     ------------------------------------------------------->
2071        |   |   |   |   |          |   |   |   |   |
2072     ------------------------------------------------------->
2073        i_0 ...         i_0+4      j_0 ...         j_0+4
2074
2075   We can create expression based on index rather than address:
2076
2077     (unsigned) (i_0 - j_0 + 3) <= 6
2078
2079   i.e. the indices are less than 4 apart.
2080
2081   Note evolution step of index needs to be considered in comparison.  */
2082
2083static bool
2084create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2085				     const dr_with_seg_len_pair_t &alias_pair)
2086{
2087  const dr_with_seg_len &dr_a = alias_pair.first;
2088  const dr_with_seg_len &dr_b = alias_pair.second;
2089  if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2090      || integer_zerop (DR_STEP (dr_a.dr))
2091      || integer_zerop (DR_STEP (dr_b.dr))
2092      || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2093    return false;
2094
2095  poly_uint64 seg_len1, seg_len2;
2096  if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2097      || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2098    return false;
2099
2100  if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2101    return false;
2102
2103  if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2104    return false;
2105
2106  if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2107    return false;
2108
2109  gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2110
2111  bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2112  unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2113  if (neg_step)
2114    {
2115      abs_step = -abs_step;
2116      seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2117      seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2118    }
2119
2120  /* Infer the number of iterations with which the memory segment is accessed
2121     by DR.  In other words, alias is checked if memory segment accessed by
2122     DR_A in some iterations intersect with memory segment accessed by DR_B
2123     in the same amount iterations.
2124     Note segnment length is a linear function of number of iterations with
2125     DR_STEP as the coefficient.  */
2126  poly_uint64 niter_len1, niter_len2;
2127  if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2128      || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2129    return false;
2130
2131  /* Divide each access size by the byte step, rounding up.  */
2132  poly_uint64 niter_access1, niter_access2;
2133  if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2134			abs_step, &niter_access1)
2135      || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2136			   abs_step, &niter_access2))
2137    return false;
2138
2139  bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2140
2141  int found = -1;
2142  for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2143    {
2144      tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2145      tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2146      /* Two indices must be the same if they are not scev, or not scev wrto
2147	 current loop being vecorized.  */
2148      if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2149	  || TREE_CODE (access2) != POLYNOMIAL_CHREC
2150	  || CHREC_VARIABLE (access1) != (unsigned)loop->num
2151	  || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2152	{
2153	  if (operand_equal_p (access1, access2, 0))
2154	    continue;
2155
2156	  return false;
2157	}
2158      if (found >= 0)
2159	return false;
2160      found = i;
2161    }
2162
2163  /* Ought not to happen in practice, since if all accesses are equal then the
2164     alias should be decidable at compile time.  */
2165  if (found < 0)
2166    return false;
2167
2168  /* The two indices must have the same step.  */
2169  tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2170  tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2171  if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2172    return false;
2173
2174  tree idx_step = CHREC_RIGHT (access1);
2175  /* Index must have const step, otherwise DR_STEP won't be constant.  */
2176  gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2177  /* Index must evaluate in the same direction as DR.  */
2178  gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2179
2180  tree min1 = CHREC_LEFT (access1);
2181  tree min2 = CHREC_LEFT (access2);
2182  if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2183    return false;
2184
2185  /* Ideally, alias can be checked against loop's control IV, but we
2186     need to prove linear mapping between control IV and reference
2187     index.  Although that should be true, we check against (array)
2188     index of data reference.  Like segment length, index length is
2189     linear function of the number of iterations with index_step as
2190     the coefficient, i.e, niter_len * idx_step.  */
2191  offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2192					      SIGNED);
2193  if (neg_step)
2194    abs_idx_step = -abs_idx_step;
2195  poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2196  poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2197  poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2198  poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2199
2200  gcc_assert (known_ge (idx_len1, 0)
2201	      && known_ge (idx_len2, 0)
2202	      && known_ge (idx_access1, 0)
2203	      && known_ge (idx_access2, 0));
2204
2205  /* Each access has the following pattern, with lengths measured
2206     in units of INDEX:
2207
2208	  <-- idx_len -->
2209	  <--- A: -ve step --->
2210	  +-----+-------+-----+-------+-----+
2211	  | n-1 | ..... |  0  | ..... | n-1 |
2212	  +-----+-------+-----+-------+-----+
2213			<--- B: +ve step --->
2214			<-- idx_len -->
2215			|
2216		       min
2217
2218     where "n" is the number of scalar iterations covered by the segment
2219     and where each access spans idx_access units.
2220
2221     A is the range of bytes accessed when the step is negative,
2222     B is the range when the step is positive.
2223
2224     When checking for general overlap, we need to test whether
2225     the range:
2226
2227       [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2228
2229     overlaps:
2230
2231       [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2232
2233     where:
2234
2235	low_offsetN = +ve step ? 0 : -idx_lenN;
2236       high_offsetN = +ve step ? idx_lenN : 0;
2237
2238     This is equivalent to testing whether:
2239
2240       min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2241       && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2242
2243     Converting this into a single test, there is an overlap if:
2244
2245       0 <= min2 - min1 + bias <= limit
2246
2247     where  bias = high_offset2 + idx_access2 - 1 - low_offset1
2248	   limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2249		 + (high_offset2 - low_offset2 + idx_access2 - 1)
2250      i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2251
2252     Combining the tests requires limit to be computable in an unsigned
2253     form of the index type; if it isn't, we fall back to the usual
2254     pointer-based checks.
2255
2256     We can do better if DR_B is a write and if DR_A and DR_B are
2257     well-ordered in both the original and the new code (see the
2258     comment above the DR_ALIAS_* flags for details).  In this case
2259     we know that for each i in [0, n-1], the write performed by
2260     access i of DR_B occurs after access numbers j<=i of DR_A in
2261     both the original and the new code.  Any write or anti
2262     dependencies wrt those DR_A accesses are therefore maintained.
2263
2264     We just need to make sure that each individual write in DR_B does not
2265     overlap any higher-indexed access in DR_A; such DR_A accesses happen
2266     after the DR_B access in the original code but happen before it in
2267     the new code.
2268
2269     We know the steps for both accesses are equal, so by induction, we
2270     just need to test whether the first write of DR_B overlaps a later
2271     access of DR_A.  In other words, we need to move min1 along by
2272     one iteration:
2273
2274       min1' = min1 + idx_step
2275
2276     and use the ranges:
2277
2278       [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2279
2280     and:
2281
2282       [min2, min2 + idx_access2 - 1]
2283
2284     where:
2285
2286	low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2287       high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2288  if (waw_or_war_p)
2289    idx_len1 -= abs_idx_step;
2290
2291  poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2292  if (!waw_or_war_p)
2293    limit += idx_len2;
2294
2295  tree utype = unsigned_type_for (TREE_TYPE (min1));
2296  if (!wi::fits_to_tree_p (limit, utype))
2297    return false;
2298
2299  poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2300  poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2301  poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2302  /* Equivalent to adding IDX_STEP to MIN1.  */
2303  if (waw_or_war_p)
2304    bias -= wi::to_offset (idx_step);
2305
2306  tree subject = fold_build2 (MINUS_EXPR, utype,
2307			      fold_convert (utype, min2),
2308			      fold_convert (utype, min1));
2309  subject = fold_build2 (PLUS_EXPR, utype, subject,
2310			 wide_int_to_tree (utype, bias));
2311  tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2312				     wide_int_to_tree (utype, limit));
2313  if (*cond_expr)
2314    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2315			      *cond_expr, part_cond_expr);
2316  else
2317    *cond_expr = part_cond_expr;
2318  if (dump_enabled_p ())
2319    {
2320      if (waw_or_war_p)
2321	dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2322      else
2323	dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2324    }
2325  return true;
2326}
2327
2328/* A subroutine of create_intersect_range_checks, with a subset of the
2329   same arguments.  Try to optimize cases in which the second access
2330   is a write and in which some overlap is valid.  */
2331
2332static bool
2333create_waw_or_war_checks (tree *cond_expr,
2334			  const dr_with_seg_len_pair_t &alias_pair)
2335{
2336  const dr_with_seg_len& dr_a = alias_pair.first;
2337  const dr_with_seg_len& dr_b = alias_pair.second;
2338
2339  /* Check for cases in which:
2340
2341     (a) DR_B is always a write;
2342     (b) the accesses are well-ordered in both the original and new code
2343	 (see the comment above the DR_ALIAS_* flags for details); and
2344     (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2345  if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2346    return false;
2347
2348  /* Check for equal (but possibly variable) steps.  */
2349  tree step = DR_STEP (dr_a.dr);
2350  if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2351    return false;
2352
2353  /* Make sure that we can operate on sizetype without loss of precision.  */
2354  tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2355  if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2356    return false;
2357
2358  /* All addresses involved are known to have a common alignment ALIGN.
2359     We can therefore subtract ALIGN from an exclusive endpoint to get
2360     an inclusive endpoint.  In the best (and common) case, ALIGN is the
2361     same as the access sizes of both DRs, and so subtracting ALIGN
2362     cancels out the addition of an access size.  */
2363  unsigned int align = MIN (dr_a.align, dr_b.align);
2364  poly_uint64 last_chunk_a = dr_a.access_size - align;
2365  poly_uint64 last_chunk_b = dr_b.access_size - align;
2366
2367  /* Get a boolean expression that is true when the step is negative.  */
2368  tree indicator = dr_direction_indicator (dr_a.dr);
2369  tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2370			       fold_convert (ssizetype, indicator),
2371			       ssize_int (0));
2372
2373  /* Get lengths in sizetype.  */
2374  tree seg_len_a
2375    = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2376  step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2377
2378  /* Each access has the following pattern:
2379
2380	  <- |seg_len| ->
2381	  <--- A: -ve step --->
2382	  +-----+-------+-----+-------+-----+
2383	  | n-1 | ..... |  0  | ..... | n-1 |
2384	  +-----+-------+-----+-------+-----+
2385			<--- B: +ve step --->
2386			<- |seg_len| ->
2387			|
2388		   base address
2389
2390     where "n" is the number of scalar iterations covered by the segment.
2391
2392     A is the range of bytes accessed when the step is negative,
2393     B is the range when the step is positive.
2394
2395     We know that DR_B is a write.  We also know (from checking that
2396     DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2397     the write performed by access i of DR_B occurs after access numbers
2398     j<=i of DR_A in both the original and the new code.  Any write or
2399     anti dependencies wrt those DR_A accesses are therefore maintained.
2400
2401     We just need to make sure that each individual write in DR_B does not
2402     overlap any higher-indexed access in DR_A; such DR_A accesses happen
2403     after the DR_B access in the original code but happen before it in
2404     the new code.
2405
2406     We know the steps for both accesses are equal, so by induction, we
2407     just need to test whether the first write of DR_B overlaps a later
2408     access of DR_A.  In other words, we need to move addr_a along by
2409     one iteration:
2410
2411       addr_a' = addr_a + step
2412
2413     and check whether:
2414
2415       [addr_b, addr_b + last_chunk_b]
2416
2417     overlaps:
2418
2419       [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2420
2421     where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2422
2423	low_offset_a = +ve step ? 0 : seg_len_a - step
2424       high_offset_a = +ve step ? seg_len_a - step : 0
2425
2426     This is equivalent to testing whether:
2427
2428       addr_a' + low_offset_a <= addr_b + last_chunk_b
2429       && addr_b <= addr_a' + high_offset_a + last_chunk_a
2430
2431     Converting this into a single test, there is an overlap if:
2432
2433       0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2434
2435     where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2436
2437     If DR_A is performed, limit + |step| - last_chunk_b is known to be
2438     less than the size of the object underlying DR_A.  We also know
2439     that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2440     guaranteed at compile time.  There can therefore be no overflow if
2441     "limit" is calculated in an unsigned type with pointer precision.  */
2442  tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2443					 DR_OFFSET (dr_a.dr));
2444  addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2445
2446  tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2447					 DR_OFFSET (dr_b.dr));
2448  addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2449
2450  /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2451  addr_a = fold_build_pointer_plus (addr_a, step);
2452  tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2453					   seg_len_a, step);
2454  if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2455    seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2456
2457  tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2458				   seg_len_a_minus_step, size_zero_node);
2459  if (!CONSTANT_CLASS_P (low_offset_a))
2460    low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2461
2462  /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2463     but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2464  tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2465				    low_offset_a);
2466
2467  /* The amount added to addr_b - addr_a'.  */
2468  tree bias = fold_build2 (MINUS_EXPR, sizetype,
2469			   size_int (last_chunk_b), low_offset_a);
2470
2471  tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2472  limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2473		       size_int (last_chunk_a + last_chunk_b));
2474
2475  tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2476  subject = fold_build2 (PLUS_EXPR, sizetype,
2477			 fold_convert (sizetype, subject), bias);
2478
2479  *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2480  if (dump_enabled_p ())
2481    dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2482  return true;
2483}
2484
2485/* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2486   every address ADDR accessed by D:
2487
2488     *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2489
2490   In this case, every element accessed by D is aligned to at least
2491   ALIGN bytes.
2492
2493   If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2494
2495     *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2496
2497static void
2498get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2499		     tree *seg_max_out, HOST_WIDE_INT align)
2500{
2501  /* Each access has the following pattern:
2502
2503	  <- |seg_len| ->
2504	  <--- A: -ve step --->
2505	  +-----+-------+-----+-------+-----+
2506	  | n-1 | ,.... |  0  | ..... | n-1 |
2507	  +-----+-------+-----+-------+-----+
2508			<--- B: +ve step --->
2509			<- |seg_len| ->
2510			|
2511		   base address
2512
2513     where "n" is the number of scalar iterations covered by the segment.
2514     (This should be VF for a particular pair if we know that both steps
2515     are the same, otherwise it will be the full number of scalar loop
2516     iterations.)
2517
2518     A is the range of bytes accessed when the step is negative,
2519     B is the range when the step is positive.
2520
2521     If the access size is "access_size" bytes, the lowest addressed byte is:
2522
2523	 base + (step < 0 ? seg_len : 0)   [LB]
2524
2525     and the highest addressed byte is always below:
2526
2527	 base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2528
2529     Thus:
2530
2531	 LB <= ADDR < UB
2532
2533     If ALIGN is nonzero, all three values are aligned to at least ALIGN
2534     bytes, so:
2535
2536	 LB <= ADDR <= UB - ALIGN
2537
2538     where "- ALIGN" folds naturally with the "+ access_size" and often
2539     cancels it out.
2540
2541     We don't try to simplify LB and UB beyond this (e.g. by using
2542     MIN and MAX based on whether seg_len rather than the stride is
2543     negative) because it is possible for the absolute size of the
2544     segment to overflow the range of a ssize_t.
2545
2546     Keeping the pointer_plus outside of the cond_expr should allow
2547     the cond_exprs to be shared with other alias checks.  */
2548  tree indicator = dr_direction_indicator (d.dr);
2549  tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2550			       fold_convert (ssizetype, indicator),
2551			       ssize_int (0));
2552  tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2553					    DR_OFFSET (d.dr));
2554  addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2555  tree seg_len
2556    = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2557
2558  tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2559				seg_len, size_zero_node);
2560  tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2561				size_zero_node, seg_len);
2562  max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2563			   size_int (d.access_size - align));
2564
2565  *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2566  *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2567}
2568
2569/* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2570   storing the condition in *COND_EXPR.  The fallback is to generate a
2571   a test that the two accesses do not overlap:
2572
2573     end_a <= start_b || end_b <= start_a.  */
2574
2575static void
2576create_intersect_range_checks (class loop *loop, tree *cond_expr,
2577			       const dr_with_seg_len_pair_t &alias_pair)
2578{
2579  const dr_with_seg_len& dr_a = alias_pair.first;
2580  const dr_with_seg_len& dr_b = alias_pair.second;
2581  *cond_expr = NULL_TREE;
2582  if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2583    return;
2584
2585  if (create_ifn_alias_checks (cond_expr, alias_pair))
2586    return;
2587
2588  if (create_waw_or_war_checks (cond_expr, alias_pair))
2589    return;
2590
2591  unsigned HOST_WIDE_INT min_align;
2592  tree_code cmp_code;
2593  /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2594     are equivalent.  This is just an optimization heuristic.  */
2595  if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2596      && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2597    {
2598      /* In this case adding access_size to seg_len is likely to give
2599	 a simple X * step, where X is either the number of scalar
2600	 iterations or the vectorization factor.  We're better off
2601	 keeping that, rather than subtracting an alignment from it.
2602
2603	 In this case the maximum values are exclusive and so there is
2604	 no alias if the maximum of one segment equals the minimum
2605	 of another.  */
2606      min_align = 0;
2607      cmp_code = LE_EXPR;
2608    }
2609  else
2610    {
2611      /* Calculate the minimum alignment shared by all four pointers,
2612	 then arrange for this alignment to be subtracted from the
2613	 exclusive maximum values to get inclusive maximum values.
2614	 This "- min_align" is cumulative with a "+ access_size"
2615	 in the calculation of the maximum values.  In the best
2616	 (and common) case, the two cancel each other out, leaving
2617	 us with an inclusive bound based only on seg_len.  In the
2618	 worst case we're simply adding a smaller number than before.
2619
2620	 Because the maximum values are inclusive, there is an alias
2621	 if the maximum value of one segment is equal to the minimum
2622	 value of the other.  */
2623      min_align = MIN (dr_a.align, dr_b.align);
2624      cmp_code = LT_EXPR;
2625    }
2626
2627  tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2628  get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2629  get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2630
2631  *cond_expr
2632    = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2633	fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2634	fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2635  if (dump_enabled_p ())
2636    dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2637}
2638
2639/* Create a conditional expression that represents the run-time checks for
2640   overlapping of address ranges represented by a list of data references
2641   pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2642   COND_EXPR is the conditional expression to be used in the if statement
2643   that controls which version of the loop gets executed at runtime.  */
2644
2645void
2646create_runtime_alias_checks (class loop *loop,
2647			     const vec<dr_with_seg_len_pair_t> *alias_pairs,
2648			     tree * cond_expr)
2649{
2650  tree part_cond_expr;
2651
2652  fold_defer_overflow_warnings ();
2653  for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2654    {
2655      gcc_assert (alias_pair.flags);
2656      if (dump_enabled_p ())
2657	dump_printf (MSG_NOTE,
2658		     "create runtime check for data references %T and %T\n",
2659		     DR_REF (alias_pair.first.dr),
2660		     DR_REF (alias_pair.second.dr));
2661
2662      /* Create condition expression for each pair data references.  */
2663      create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2664      if (*cond_expr)
2665	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2666				  *cond_expr, part_cond_expr);
2667      else
2668	*cond_expr = part_cond_expr;
2669    }
2670  fold_undefer_and_ignore_overflow_warnings ();
2671}
2672
2673/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2674   expressions.  */
2675static bool
2676dr_equal_offsets_p1 (tree offset1, tree offset2)
2677{
2678  bool res;
2679
2680  STRIP_NOPS (offset1);
2681  STRIP_NOPS (offset2);
2682
2683  if (offset1 == offset2)
2684    return true;
2685
2686  if (TREE_CODE (offset1) != TREE_CODE (offset2)
2687      || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2688    return false;
2689
2690  res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2691                             TREE_OPERAND (offset2, 0));
2692
2693  if (!res || !BINARY_CLASS_P (offset1))
2694    return res;
2695
2696  res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2697                             TREE_OPERAND (offset2, 1));
2698
2699  return res;
2700}
2701
2702/* Check if DRA and DRB have equal offsets.  */
2703bool
2704dr_equal_offsets_p (struct data_reference *dra,
2705                    struct data_reference *drb)
2706{
2707  tree offset1, offset2;
2708
2709  offset1 = DR_OFFSET (dra);
2710  offset2 = DR_OFFSET (drb);
2711
2712  return dr_equal_offsets_p1 (offset1, offset2);
2713}
2714
2715/* Returns true if FNA == FNB.  */
2716
2717static bool
2718affine_function_equal_p (affine_fn fna, affine_fn fnb)
2719{
2720  unsigned i, n = fna.length ();
2721
2722  if (n != fnb.length ())
2723    return false;
2724
2725  for (i = 0; i < n; i++)
2726    if (!operand_equal_p (fna[i], fnb[i], 0))
2727      return false;
2728
2729  return true;
2730}
2731
2732/* If all the functions in CF are the same, returns one of them,
2733   otherwise returns NULL.  */
2734
2735static affine_fn
2736common_affine_function (conflict_function *cf)
2737{
2738  unsigned i;
2739  affine_fn comm;
2740
2741  if (!CF_NONTRIVIAL_P (cf))
2742    return affine_fn ();
2743
2744  comm = cf->fns[0];
2745
2746  for (i = 1; i < cf->n; i++)
2747    if (!affine_function_equal_p (comm, cf->fns[i]))
2748      return affine_fn ();
2749
2750  return comm;
2751}
2752
2753/* Returns the base of the affine function FN.  */
2754
2755static tree
2756affine_function_base (affine_fn fn)
2757{
2758  return fn[0];
2759}
2760
2761/* Returns true if FN is a constant.  */
2762
2763static bool
2764affine_function_constant_p (affine_fn fn)
2765{
2766  unsigned i;
2767  tree coef;
2768
2769  for (i = 1; fn.iterate (i, &coef); i++)
2770    if (!integer_zerop (coef))
2771      return false;
2772
2773  return true;
2774}
2775
2776/* Returns true if FN is the zero constant function.  */
2777
2778static bool
2779affine_function_zero_p (affine_fn fn)
2780{
2781  return (integer_zerop (affine_function_base (fn))
2782	  && affine_function_constant_p (fn));
2783}
2784
2785/* Returns a signed integer type with the largest precision from TA
2786   and TB.  */
2787
2788static tree
2789signed_type_for_types (tree ta, tree tb)
2790{
2791  if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2792    return signed_type_for (ta);
2793  else
2794    return signed_type_for (tb);
2795}
2796
2797/* Applies operation OP on affine functions FNA and FNB, and returns the
2798   result.  */
2799
2800static affine_fn
2801affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2802{
2803  unsigned i, n, m;
2804  affine_fn ret;
2805  tree coef;
2806
2807  if (fnb.length () > fna.length ())
2808    {
2809      n = fna.length ();
2810      m = fnb.length ();
2811    }
2812  else
2813    {
2814      n = fnb.length ();
2815      m = fna.length ();
2816    }
2817
2818  ret.create (m);
2819  for (i = 0; i < n; i++)
2820    {
2821      tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2822					 TREE_TYPE (fnb[i]));
2823      ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2824    }
2825
2826  for (; fna.iterate (i, &coef); i++)
2827    ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2828				 coef, integer_zero_node));
2829  for (; fnb.iterate (i, &coef); i++)
2830    ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2831				 integer_zero_node, coef));
2832
2833  return ret;
2834}
2835
2836/* Returns the sum of affine functions FNA and FNB.  */
2837
2838static affine_fn
2839affine_fn_plus (affine_fn fna, affine_fn fnb)
2840{
2841  return affine_fn_op (PLUS_EXPR, fna, fnb);
2842}
2843
2844/* Returns the difference of affine functions FNA and FNB.  */
2845
2846static affine_fn
2847affine_fn_minus (affine_fn fna, affine_fn fnb)
2848{
2849  return affine_fn_op (MINUS_EXPR, fna, fnb);
2850}
2851
2852/* Frees affine function FN.  */
2853
2854static void
2855affine_fn_free (affine_fn fn)
2856{
2857  fn.release ();
2858}
2859
2860/* Determine for each subscript in the data dependence relation DDR
2861   the distance.  */
2862
2863static void
2864compute_subscript_distance (struct data_dependence_relation *ddr)
2865{
2866  conflict_function *cf_a, *cf_b;
2867  affine_fn fn_a, fn_b, diff;
2868
2869  if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2870    {
2871      unsigned int i;
2872
2873      for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2874 	{
2875 	  struct subscript *subscript;
2876
2877 	  subscript = DDR_SUBSCRIPT (ddr, i);
2878 	  cf_a = SUB_CONFLICTS_IN_A (subscript);
2879 	  cf_b = SUB_CONFLICTS_IN_B (subscript);
2880
2881	  fn_a = common_affine_function (cf_a);
2882	  fn_b = common_affine_function (cf_b);
2883	  if (!fn_a.exists () || !fn_b.exists ())
2884	    {
2885	      SUB_DISTANCE (subscript) = chrec_dont_know;
2886	      return;
2887	    }
2888	  diff = affine_fn_minus (fn_a, fn_b);
2889
2890 	  if (affine_function_constant_p (diff))
2891 	    SUB_DISTANCE (subscript) = affine_function_base (diff);
2892 	  else
2893 	    SUB_DISTANCE (subscript) = chrec_dont_know;
2894
2895	  affine_fn_free (diff);
2896 	}
2897    }
2898}
2899
2900/* Returns the conflict function for "unknown".  */
2901
2902static conflict_function *
2903conflict_fn_not_known (void)
2904{
2905  conflict_function *fn = XCNEW (conflict_function);
2906  fn->n = NOT_KNOWN;
2907
2908  return fn;
2909}
2910
2911/* Returns the conflict function for "independent".  */
2912
2913static conflict_function *
2914conflict_fn_no_dependence (void)
2915{
2916  conflict_function *fn = XCNEW (conflict_function);
2917  fn->n = NO_DEPENDENCE;
2918
2919  return fn;
2920}
2921
2922/* Returns true if the address of OBJ is invariant in LOOP.  */
2923
2924static bool
2925object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2926{
2927  while (handled_component_p (obj))
2928    {
2929      if (TREE_CODE (obj) == ARRAY_REF)
2930	{
2931	  for (int i = 1; i < 4; ++i)
2932	    if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2933							loop->num))
2934	      return false;
2935	}
2936      else if (TREE_CODE (obj) == COMPONENT_REF)
2937	{
2938	  if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2939						      loop->num))
2940	    return false;
2941	}
2942      obj = TREE_OPERAND (obj, 0);
2943    }
2944
2945  if (!INDIRECT_REF_P (obj)
2946      && TREE_CODE (obj) != MEM_REF)
2947    return true;
2948
2949  return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2950						  loop->num);
2951}
2952
2953/* Returns false if we can prove that data references A and B do not alias,
2954   true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2955   considered.  */
2956
2957bool
2958dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2959		class loop *loop_nest)
2960{
2961  tree addr_a = DR_BASE_OBJECT (a);
2962  tree addr_b = DR_BASE_OBJECT (b);
2963
2964  /* If we are not processing a loop nest but scalar code we
2965     do not need to care about possible cross-iteration dependences
2966     and thus can process the full original reference.  Do so,
2967     similar to how loop invariant motion applies extra offset-based
2968     disambiguation.  */
2969  if (!loop_nest)
2970    {
2971      aff_tree off1, off2;
2972      poly_widest_int size1, size2;
2973      get_inner_reference_aff (DR_REF (a), &off1, &size1);
2974      get_inner_reference_aff (DR_REF (b), &off2, &size2);
2975      aff_combination_scale (&off1, -1);
2976      aff_combination_add (&off2, &off1);
2977      if (aff_comb_cannot_overlap_p (&off2, size1, size2))
2978	return false;
2979    }
2980
2981  if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
2982      && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
2983      /* For cross-iteration dependences the cliques must be valid for the
2984	 whole loop, not just individual iterations.  */
2985      && (!loop_nest
2986	  || MR_DEPENDENCE_CLIQUE (addr_a) == 1
2987	  || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
2988      && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
2989      && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
2990    return false;
2991
2992  /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
2993     do not know the size of the base-object.  So we cannot do any
2994     offset/overlap based analysis but have to rely on points-to
2995     information only.  */
2996  if (TREE_CODE (addr_a) == MEM_REF
2997      && (DR_UNCONSTRAINED_BASE (a)
2998	  || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
2999    {
3000      /* For true dependences we can apply TBAA.  */
3001      if (flag_strict_aliasing
3002	  && DR_IS_WRITE (a) && DR_IS_READ (b)
3003	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3004				     get_alias_set (DR_REF (b))))
3005	return false;
3006      if (TREE_CODE (addr_b) == MEM_REF)
3007	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3008				       TREE_OPERAND (addr_b, 0));
3009      else
3010	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3011				       build_fold_addr_expr (addr_b));
3012    }
3013  else if (TREE_CODE (addr_b) == MEM_REF
3014	   && (DR_UNCONSTRAINED_BASE (b)
3015	       || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3016    {
3017      /* For true dependences we can apply TBAA.  */
3018      if (flag_strict_aliasing
3019	  && DR_IS_WRITE (a) && DR_IS_READ (b)
3020	  && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3021				     get_alias_set (DR_REF (b))))
3022	return false;
3023      if (TREE_CODE (addr_a) == MEM_REF)
3024	return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3025				       TREE_OPERAND (addr_b, 0));
3026      else
3027	return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3028				       TREE_OPERAND (addr_b, 0));
3029    }
3030
3031  /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3032     that is being subsetted in the loop nest.  */
3033  if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3034    return refs_output_dependent_p (addr_a, addr_b);
3035  else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3036    return refs_anti_dependent_p (addr_a, addr_b);
3037  return refs_may_alias_p (addr_a, addr_b);
3038}
3039
3040/* REF_A and REF_B both satisfy access_fn_component_p.  Return true
3041   if it is meaningful to compare their associated access functions
3042   when checking for dependencies.  */
3043
3044static bool
3045access_fn_components_comparable_p (tree ref_a, tree ref_b)
3046{
3047  /* Allow pairs of component refs from the following sets:
3048
3049       { REALPART_EXPR, IMAGPART_EXPR }
3050       { COMPONENT_REF }
3051       { ARRAY_REF }.  */
3052  tree_code code_a = TREE_CODE (ref_a);
3053  tree_code code_b = TREE_CODE (ref_b);
3054  if (code_a == IMAGPART_EXPR)
3055    code_a = REALPART_EXPR;
3056  if (code_b == IMAGPART_EXPR)
3057    code_b = REALPART_EXPR;
3058  if (code_a != code_b)
3059    return false;
3060
3061  if (TREE_CODE (ref_a) == COMPONENT_REF)
3062    /* ??? We cannot simply use the type of operand #0 of the refs here as
3063       the Fortran compiler smuggles type punning into COMPONENT_REFs.
3064       Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
3065    return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3066	    == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3067
3068  return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3069			     TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3070}
3071
3072/* Initialize a data dependence relation RES in LOOP_NEST.  USE_ALT_INDICES
3073   is true when the main indices of A and B were not comparable so we try again
3074   with alternate indices computed on an indirect reference.  */
3075
3076struct data_dependence_relation *
3077initialize_data_dependence_relation (struct data_dependence_relation *res,
3078				     vec<loop_p> loop_nest,
3079				     bool use_alt_indices)
3080{
3081  struct data_reference *a = DDR_A (res);
3082  struct data_reference *b = DDR_B (res);
3083  unsigned int i;
3084
3085  struct indices *indices_a = &a->indices;
3086  struct indices *indices_b = &b->indices;
3087  if (use_alt_indices)
3088    {
3089      if (TREE_CODE (DR_REF (a)) != MEM_REF)
3090	indices_a = &a->alt_indices;
3091      if (TREE_CODE (DR_REF (b)) != MEM_REF)
3092	indices_b = &b->alt_indices;
3093    }
3094  unsigned int num_dimensions_a = indices_a->access_fns.length ();
3095  unsigned int num_dimensions_b = indices_b->access_fns.length ();
3096  if (num_dimensions_a == 0 || num_dimensions_b == 0)
3097    {
3098      DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3099      return res;
3100    }
3101
3102  /* For unconstrained bases, the root (highest-indexed) subscript
3103     describes a variation in the base of the original DR_REF rather
3104     than a component access.  We have no type that accurately describes
3105     the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3106     applying this subscript) so limit the search to the last real
3107     component access.
3108
3109     E.g. for:
3110
3111	void
3112	f (int a[][8], int b[][8])
3113	{
3114	  for (int i = 0; i < 8; ++i)
3115	    a[i * 2][0] = b[i][0];
3116	}
3117
3118     the a and b accesses have a single ARRAY_REF component reference [0]
3119     but have two subscripts.  */
3120  if (indices_a->unconstrained_base)
3121    num_dimensions_a -= 1;
3122  if (indices_b->unconstrained_base)
3123    num_dimensions_b -= 1;
3124
3125  /* These structures describe sequences of component references in
3126     DR_REF (A) and DR_REF (B).  Each component reference is tied to a
3127     specific access function.  */
3128  struct {
3129    /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3130       DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3131       indices.  In C notation, these are the indices of the rightmost
3132       component references; e.g. for a sequence .b.c.d, the start
3133       index is for .d.  */
3134    unsigned int start_a;
3135    unsigned int start_b;
3136
3137    /* The sequence contains LENGTH consecutive access functions from
3138       each DR.  */
3139    unsigned int length;
3140
3141    /* The enclosing objects for the A and B sequences respectively,
3142       i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3143       and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
3144    tree object_a;
3145    tree object_b;
3146  } full_seq = {}, struct_seq = {};
3147
3148  /* Before each iteration of the loop:
3149
3150     - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3151     - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
3152  unsigned int index_a = 0;
3153  unsigned int index_b = 0;
3154  tree ref_a = DR_REF (a);
3155  tree ref_b = DR_REF (b);
3156
3157  /* Now walk the component references from the final DR_REFs back up to
3158     the enclosing base objects.  Each component reference corresponds
3159     to one access function in the DR, with access function 0 being for
3160     the final DR_REF and the highest-indexed access function being the
3161     one that is applied to the base of the DR.
3162
3163     Look for a sequence of component references whose access functions
3164     are comparable (see access_fn_components_comparable_p).  If more
3165     than one such sequence exists, pick the one nearest the base
3166     (which is the leftmost sequence in C notation).  Store this sequence
3167     in FULL_SEQ.
3168
3169     For example, if we have:
3170
3171	struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3172
3173	A: a[0][i].s.c.d
3174	B: __real b[0][i].s.e[i].f
3175
3176     (where d is the same type as the real component of f) then the access
3177     functions would be:
3178
3179			 0   1   2   3
3180	A:              .d  .c  .s [i]
3181
3182		 0   1   2   3   4   5
3183	B:  __real  .f [i]  .e  .s [i]
3184
3185     The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3186     and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
3187     COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
3188     the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3189     so is comparable.  The A3/B5 column contains two ARRAY_REFs that
3190     index foo[10] arrays, so is again comparable.  The sequence is
3191     therefore:
3192
3193        A: [1, 3]  (i.e. [i].s.c)
3194        B: [3, 5]  (i.e. [i].s.e)
3195
3196     Also look for sequences of component references whose access
3197     functions are comparable and whose enclosing objects have the same
3198     RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
3199     example, STRUCT_SEQ would be:
3200
3201        A: [1, 2]  (i.e. s.c)
3202        B: [3, 4]  (i.e. s.e)  */
3203  while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3204    {
3205      /* The alternate indices form always has a single dimension
3206	 with unconstrained base.  */
3207      gcc_assert (!use_alt_indices);
3208
3209      /* REF_A and REF_B must be one of the component access types
3210	 allowed by dr_analyze_indices.  */
3211      gcc_checking_assert (access_fn_component_p (ref_a));
3212      gcc_checking_assert (access_fn_component_p (ref_b));
3213
3214      /* Get the immediately-enclosing objects for REF_A and REF_B,
3215	 i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3216	 and DR_ACCESS_FN (B, INDEX_B).  */
3217      tree object_a = TREE_OPERAND (ref_a, 0);
3218      tree object_b = TREE_OPERAND (ref_b, 0);
3219
3220      tree type_a = TREE_TYPE (object_a);
3221      tree type_b = TREE_TYPE (object_b);
3222      if (access_fn_components_comparable_p (ref_a, ref_b))
3223	{
3224	  /* This pair of component accesses is comparable for dependence
3225	     analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3226	     DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
3227	  if (full_seq.start_a + full_seq.length != index_a
3228	      || full_seq.start_b + full_seq.length != index_b)
3229	    {
3230	      /* The accesses don't extend the current sequence,
3231		 so start a new one here.  */
3232	      full_seq.start_a = index_a;
3233	      full_seq.start_b = index_b;
3234	      full_seq.length = 0;
3235	    }
3236
3237	  /* Add this pair of references to the sequence.  */
3238	  full_seq.length += 1;
3239	  full_seq.object_a = object_a;
3240	  full_seq.object_b = object_b;
3241
3242	  /* If the enclosing objects are structures (and thus have the
3243	     same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
3244	  if (TREE_CODE (type_a) == RECORD_TYPE)
3245	    struct_seq = full_seq;
3246
3247	  /* Move to the next containing reference for both A and B.  */
3248	  ref_a = object_a;
3249	  ref_b = object_b;
3250	  index_a += 1;
3251	  index_b += 1;
3252	  continue;
3253	}
3254
3255      /* Try to approach equal type sizes.  */
3256      if (!COMPLETE_TYPE_P (type_a)
3257	  || !COMPLETE_TYPE_P (type_b)
3258	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3259	  || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3260	break;
3261
3262      unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3263      unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3264      if (size_a <= size_b)
3265	{
3266	  index_a += 1;
3267	  ref_a = object_a;
3268	}
3269      if (size_b <= size_a)
3270	{
3271	  index_b += 1;
3272	  ref_b = object_b;
3273	}
3274    }
3275
3276  /* See whether FULL_SEQ ends at the base and whether the two bases
3277     are equal.  We do not care about TBAA or alignment info so we can
3278     use OEP_ADDRESS_OF to avoid false negatives.  */
3279  tree base_a = indices_a->base_object;
3280  tree base_b = indices_b->base_object;
3281  bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3282		      && full_seq.start_b + full_seq.length == num_dimensions_b
3283		      && (indices_a->unconstrained_base
3284			  == indices_b->unconstrained_base)
3285		      && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3286		      && (types_compatible_p (TREE_TYPE (base_a),
3287					      TREE_TYPE (base_b))
3288			  || (!base_supports_access_fn_components_p (base_a)
3289			      && !base_supports_access_fn_components_p (base_b)
3290			      && operand_equal_p
3291				   (TYPE_SIZE (TREE_TYPE (base_a)),
3292				    TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3293		      && (!loop_nest.exists ()
3294			  || (object_address_invariant_in_loop_p
3295			      (loop_nest[0], base_a))));
3296
3297  /* If the bases are the same, we can include the base variation too.
3298     E.g. the b accesses in:
3299
3300       for (int i = 0; i < n; ++i)
3301         b[i + 4][0] = b[i][0];
3302
3303     have a definite dependence distance of 4, while for:
3304
3305       for (int i = 0; i < n; ++i)
3306         a[i + 4][0] = b[i][0];
3307
3308     the dependence distance depends on the gap between a and b.
3309
3310     If the bases are different then we can only rely on the sequence
3311     rooted at a structure access, since arrays are allowed to overlap
3312     arbitrarily and change shape arbitrarily.  E.g. we treat this as
3313     valid code:
3314
3315       int a[256];
3316       ...
3317       ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3318
3319     where two lvalues with the same int[4][3] type overlap, and where
3320     both lvalues are distinct from the object's declared type.  */
3321  if (same_base_p)
3322    {
3323      if (indices_a->unconstrained_base)
3324	full_seq.length += 1;
3325    }
3326  else
3327    full_seq = struct_seq;
3328
3329  /* Punt if we didn't find a suitable sequence.  */
3330  if (full_seq.length == 0)
3331    {
3332      if (use_alt_indices
3333	  || (TREE_CODE (DR_REF (a)) == MEM_REF
3334	      && TREE_CODE (DR_REF (b)) == MEM_REF)
3335	  || may_be_nonaddressable_p (DR_REF (a))
3336	  || may_be_nonaddressable_p (DR_REF (b)))
3337	{
3338	  /* Fully exhausted possibilities.  */
3339	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3340	  return res;
3341	}
3342
3343      /* Try evaluating both DRs as dereferences of pointers.  */
3344      if (!a->alt_indices.base_object
3345	  && TREE_CODE (DR_REF (a)) != MEM_REF)
3346	{
3347	  tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3348				 build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3349				 build_int_cst
3350				   (reference_alias_ptr_type (DR_REF (a)), 0));
3351	  dr_analyze_indices (&a->alt_indices, alt_ref,
3352			      loop_preheader_edge (loop_nest[0]),
3353			      loop_containing_stmt (DR_STMT (a)));
3354	}
3355      if (!b->alt_indices.base_object
3356	  && TREE_CODE (DR_REF (b)) != MEM_REF)
3357	{
3358	  tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3359				 build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3360				 build_int_cst
3361				   (reference_alias_ptr_type (DR_REF (b)), 0));
3362	  dr_analyze_indices (&b->alt_indices, alt_ref,
3363			      loop_preheader_edge (loop_nest[0]),
3364			      loop_containing_stmt (DR_STMT (b)));
3365	}
3366      return initialize_data_dependence_relation (res, loop_nest, true);
3367    }
3368
3369  if (!same_base_p)
3370    {
3371      /* Partial overlap is possible for different bases when strict aliasing
3372	 is not in effect.  It's also possible if either base involves a union
3373	 access; e.g. for:
3374
3375	   struct s1 { int a[2]; };
3376	   struct s2 { struct s1 b; int c; };
3377	   struct s3 { int d; struct s1 e; };
3378	   union u { struct s2 f; struct s3 g; } *p, *q;
3379
3380	 the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3381	 "p->g.e" (base "p->g") and might partially overlap the s1 at
3382	 "q->g.e" (base "q->g").  */
3383      if (!flag_strict_aliasing
3384	  || ref_contains_union_access_p (full_seq.object_a)
3385	  || ref_contains_union_access_p (full_seq.object_b))
3386	{
3387	  DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3388	  return res;
3389	}
3390
3391      DDR_COULD_BE_INDEPENDENT_P (res) = true;
3392      if (!loop_nest.exists ()
3393	  || (object_address_invariant_in_loop_p (loop_nest[0],
3394						  full_seq.object_a)
3395	      && object_address_invariant_in_loop_p (loop_nest[0],
3396						     full_seq.object_b)))
3397	{
3398	  DDR_OBJECT_A (res) = full_seq.object_a;
3399	  DDR_OBJECT_B (res) = full_seq.object_b;
3400	}
3401    }
3402
3403  DDR_AFFINE_P (res) = true;
3404  DDR_ARE_DEPENDENT (res) = NULL_TREE;
3405  DDR_SUBSCRIPTS (res).create (full_seq.length);
3406  DDR_LOOP_NEST (res) = loop_nest;
3407  DDR_SELF_REFERENCE (res) = false;
3408
3409  for (i = 0; i < full_seq.length; ++i)
3410    {
3411      struct subscript *subscript;
3412
3413      subscript = XNEW (struct subscript);
3414      SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3415      SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3416      SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3417      SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3418      SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3419      SUB_DISTANCE (subscript) = chrec_dont_know;
3420      DDR_SUBSCRIPTS (res).safe_push (subscript);
3421    }
3422
3423  return res;
3424}
3425
3426/* Initialize a data dependence relation between data accesses A and
3427   B.  NB_LOOPS is the number of loops surrounding the references: the
3428   size of the classic distance/direction vectors.  */
3429
3430struct data_dependence_relation *
3431initialize_data_dependence_relation (struct data_reference *a,
3432				     struct data_reference *b,
3433				     vec<loop_p> loop_nest)
3434{
3435  data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3436  DDR_A (res) = a;
3437  DDR_B (res) = b;
3438  DDR_LOOP_NEST (res).create (0);
3439  DDR_SUBSCRIPTS (res).create (0);
3440  DDR_DIR_VECTS (res).create (0);
3441  DDR_DIST_VECTS (res).create (0);
3442
3443  if (a == NULL || b == NULL)
3444    {
3445      DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3446      return res;
3447    }
3448
3449  /* If the data references do not alias, then they are independent.  */
3450  if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3451    {
3452      DDR_ARE_DEPENDENT (res) = chrec_known;
3453      return res;
3454    }
3455
3456  return initialize_data_dependence_relation (res, loop_nest, false);
3457}
3458
3459
3460/* Frees memory used by the conflict function F.  */
3461
3462static void
3463free_conflict_function (conflict_function *f)
3464{
3465  unsigned i;
3466
3467  if (CF_NONTRIVIAL_P (f))
3468    {
3469      for (i = 0; i < f->n; i++)
3470	affine_fn_free (f->fns[i]);
3471    }
3472  free (f);
3473}
3474
3475/* Frees memory used by SUBSCRIPTS.  */
3476
3477static void
3478free_subscripts (vec<subscript_p> subscripts)
3479{
3480  for (subscript_p s : subscripts)
3481    {
3482      free_conflict_function (s->conflicting_iterations_in_a);
3483      free_conflict_function (s->conflicting_iterations_in_b);
3484      free (s);
3485    }
3486  subscripts.release ();
3487}
3488
3489/* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3490   description.  */
3491
3492static inline void
3493finalize_ddr_dependent (struct data_dependence_relation *ddr,
3494			tree chrec)
3495{
3496  DDR_ARE_DEPENDENT (ddr) = chrec;
3497  free_subscripts (DDR_SUBSCRIPTS (ddr));
3498  DDR_SUBSCRIPTS (ddr).create (0);
3499}
3500
3501/* The dependence relation DDR cannot be represented by a distance
3502   vector.  */
3503
3504static inline void
3505non_affine_dependence_relation (struct data_dependence_relation *ddr)
3506{
3507  if (dump_file && (dump_flags & TDF_DETAILS))
3508    fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3509
3510  DDR_AFFINE_P (ddr) = false;
3511}
3512
3513
3514
3515/* This section contains the classic Banerjee tests.  */
3516
3517/* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3518   variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3519
3520static inline bool
3521ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3522{
3523  return (evolution_function_is_constant_p (chrec_a)
3524	  && evolution_function_is_constant_p (chrec_b));
3525}
3526
3527/* Returns true iff CHREC_A and CHREC_B are dependent on an index
3528   variable, i.e., if the SIV (Single Index Variable) test is true.  */
3529
3530static bool
3531siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3532{
3533  if ((evolution_function_is_constant_p (chrec_a)
3534       && evolution_function_is_univariate_p (chrec_b))
3535      || (evolution_function_is_constant_p (chrec_b)
3536	  && evolution_function_is_univariate_p (chrec_a)))
3537    return true;
3538
3539  if (evolution_function_is_univariate_p (chrec_a)
3540      && evolution_function_is_univariate_p (chrec_b))
3541    {
3542      switch (TREE_CODE (chrec_a))
3543	{
3544	case POLYNOMIAL_CHREC:
3545	  switch (TREE_CODE (chrec_b))
3546	    {
3547	    case POLYNOMIAL_CHREC:
3548	      if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3549		return false;
3550	      /* FALLTHRU */
3551
3552	    default:
3553	      return true;
3554	    }
3555
3556	default:
3557	  return true;
3558	}
3559    }
3560
3561  return false;
3562}
3563
3564/* Creates a conflict function with N dimensions.  The affine functions
3565   in each dimension follow.  */
3566
3567static conflict_function *
3568conflict_fn (unsigned n, ...)
3569{
3570  unsigned i;
3571  conflict_function *ret = XCNEW (conflict_function);
3572  va_list ap;
3573
3574  gcc_assert (n > 0 && n <= MAX_DIM);
3575  va_start (ap, n);
3576
3577  ret->n = n;
3578  for (i = 0; i < n; i++)
3579    ret->fns[i] = va_arg (ap, affine_fn);
3580  va_end (ap);
3581
3582  return ret;
3583}
3584
3585/* Returns constant affine function with value CST.  */
3586
3587static affine_fn
3588affine_fn_cst (tree cst)
3589{
3590  affine_fn fn;
3591  fn.create (1);
3592  fn.quick_push (cst);
3593  return fn;
3594}
3595
3596/* Returns affine function with single variable, CST + COEF * x_DIM.  */
3597
3598static affine_fn
3599affine_fn_univar (tree cst, unsigned dim, tree coef)
3600{
3601  affine_fn fn;
3602  fn.create (dim + 1);
3603  unsigned i;
3604
3605  gcc_assert (dim > 0);
3606  fn.quick_push (cst);
3607  for (i = 1; i < dim; i++)
3608    fn.quick_push (integer_zero_node);
3609  fn.quick_push (coef);
3610  return fn;
3611}
3612
3613/* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3614   *OVERLAPS_B are initialized to the functions that describe the
3615   relation between the elements accessed twice by CHREC_A and
3616   CHREC_B.  For k >= 0, the following property is verified:
3617
3618   CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3619
3620static void
3621analyze_ziv_subscript (tree chrec_a,
3622		       tree chrec_b,
3623		       conflict_function **overlaps_a,
3624		       conflict_function **overlaps_b,
3625		       tree *last_conflicts)
3626{
3627  tree type, difference;
3628  dependence_stats.num_ziv++;
3629
3630  if (dump_file && (dump_flags & TDF_DETAILS))
3631    fprintf (dump_file, "(analyze_ziv_subscript \n");
3632
3633  type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3634  chrec_a = chrec_convert (type, chrec_a, NULL);
3635  chrec_b = chrec_convert (type, chrec_b, NULL);
3636  difference = chrec_fold_minus (type, chrec_a, chrec_b);
3637
3638  switch (TREE_CODE (difference))
3639    {
3640    case INTEGER_CST:
3641      if (integer_zerop (difference))
3642	{
3643	  /* The difference is equal to zero: the accessed index
3644	     overlaps for each iteration in the loop.  */
3645	  *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3646	  *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3647	  *last_conflicts = chrec_dont_know;
3648	  dependence_stats.num_ziv_dependent++;
3649	}
3650      else
3651	{
3652	  /* The accesses do not overlap.  */
3653	  *overlaps_a = conflict_fn_no_dependence ();
3654	  *overlaps_b = conflict_fn_no_dependence ();
3655	  *last_conflicts = integer_zero_node;
3656	  dependence_stats.num_ziv_independent++;
3657	}
3658      break;
3659
3660    default:
3661      /* We're not sure whether the indexes overlap.  For the moment,
3662	 conservatively answer "don't know".  */
3663      if (dump_file && (dump_flags & TDF_DETAILS))
3664	fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3665
3666      *overlaps_a = conflict_fn_not_known ();
3667      *overlaps_b = conflict_fn_not_known ();
3668      *last_conflicts = chrec_dont_know;
3669      dependence_stats.num_ziv_unimplemented++;
3670      break;
3671    }
3672
3673  if (dump_file && (dump_flags & TDF_DETAILS))
3674    fprintf (dump_file, ")\n");
3675}
3676
3677/* Similar to max_stmt_executions_int, but returns the bound as a tree,
3678   and only if it fits to the int type.  If this is not the case, or the
3679   bound  on the number of iterations of LOOP could not be derived, returns
3680   chrec_dont_know.  */
3681
3682static tree
3683max_stmt_executions_tree (class loop *loop)
3684{
3685  widest_int nit;
3686
3687  if (!max_stmt_executions (loop, &nit))
3688    return chrec_dont_know;
3689
3690  if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3691    return chrec_dont_know;
3692
3693  return wide_int_to_tree (unsigned_type_node, nit);
3694}
3695
3696/* Determine whether the CHREC is always positive/negative.  If the expression
3697   cannot be statically analyzed, return false, otherwise set the answer into
3698   VALUE.  */
3699
3700static bool
3701chrec_is_positive (tree chrec, bool *value)
3702{
3703  bool value0, value1, value2;
3704  tree end_value, nb_iter;
3705
3706  switch (TREE_CODE (chrec))
3707    {
3708    case POLYNOMIAL_CHREC:
3709      if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3710	  || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3711	return false;
3712
3713      /* FIXME -- overflows.  */
3714      if (value0 == value1)
3715	{
3716	  *value = value0;
3717	  return true;
3718	}
3719
3720      /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3721	 and the proof consists in showing that the sign never
3722	 changes during the execution of the loop, from 0 to
3723	 loop->nb_iterations.  */
3724      if (!evolution_function_is_affine_p (chrec))
3725	return false;
3726
3727      nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3728      if (chrec_contains_undetermined (nb_iter))
3729	return false;
3730
3731#if 0
3732      /* TODO -- If the test is after the exit, we may decrease the number of
3733	 iterations by one.  */
3734      if (after_exit)
3735	nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3736#endif
3737
3738      end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3739
3740      if (!chrec_is_positive (end_value, &value2))
3741	return false;
3742
3743      *value = value0;
3744      return value0 == value1;
3745
3746    case INTEGER_CST:
3747      switch (tree_int_cst_sgn (chrec))
3748	{
3749	case -1:
3750	  *value = false;
3751	  break;
3752	case 1:
3753	  *value = true;
3754	  break;
3755	default:
3756	  return false;
3757	}
3758      return true;
3759
3760    default:
3761      return false;
3762    }
3763}
3764
3765
3766/* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3767   constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3768   *OVERLAPS_B are initialized to the functions that describe the
3769   relation between the elements accessed twice by CHREC_A and
3770   CHREC_B.  For k >= 0, the following property is verified:
3771
3772   CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3773
3774static void
3775analyze_siv_subscript_cst_affine (tree chrec_a,
3776				  tree chrec_b,
3777				  conflict_function **overlaps_a,
3778				  conflict_function **overlaps_b,
3779				  tree *last_conflicts)
3780{
3781  bool value0, value1, value2;
3782  tree type, difference, tmp;
3783
3784  type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3785  chrec_a = chrec_convert (type, chrec_a, NULL);
3786  chrec_b = chrec_convert (type, chrec_b, NULL);
3787  difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3788
3789  /* Special case overlap in the first iteration.  */
3790  if (integer_zerop (difference))
3791    {
3792      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3793      *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3794      *last_conflicts = integer_one_node;
3795      return;
3796    }
3797
3798  if (!chrec_is_positive (initial_condition (difference), &value0))
3799    {
3800      if (dump_file && (dump_flags & TDF_DETAILS))
3801	fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3802
3803      dependence_stats.num_siv_unimplemented++;
3804      *overlaps_a = conflict_fn_not_known ();
3805      *overlaps_b = conflict_fn_not_known ();
3806      *last_conflicts = chrec_dont_know;
3807      return;
3808    }
3809  else
3810    {
3811      if (value0 == false)
3812	{
3813	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3814	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3815	    {
3816	      if (dump_file && (dump_flags & TDF_DETAILS))
3817		fprintf (dump_file, "siv test failed: chrec not positive.\n");
3818
3819	      *overlaps_a = conflict_fn_not_known ();
3820	      *overlaps_b = conflict_fn_not_known ();
3821	      *last_conflicts = chrec_dont_know;
3822	      dependence_stats.num_siv_unimplemented++;
3823	      return;
3824	    }
3825	  else
3826	    {
3827	      if (value1 == true)
3828		{
3829		  /* Example:
3830		     chrec_a = 12
3831		     chrec_b = {10, +, 1}
3832		  */
3833
3834		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3835		    {
3836		      HOST_WIDE_INT numiter;
3837		      class loop *loop = get_chrec_loop (chrec_b);
3838
3839		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3840		      tmp = fold_build2 (EXACT_DIV_EXPR, type,
3841					 fold_build1 (ABS_EXPR, type, difference),
3842					 CHREC_RIGHT (chrec_b));
3843		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3844		      *last_conflicts = integer_one_node;
3845
3846
3847		      /* Perform weak-zero siv test to see if overlap is
3848			 outside the loop bounds.  */
3849		      numiter = max_stmt_executions_int (loop);
3850
3851		      if (numiter >= 0
3852			  && compare_tree_int (tmp, numiter) > 0)
3853			{
3854			  free_conflict_function (*overlaps_a);
3855			  free_conflict_function (*overlaps_b);
3856			  *overlaps_a = conflict_fn_no_dependence ();
3857			  *overlaps_b = conflict_fn_no_dependence ();
3858			  *last_conflicts = integer_zero_node;
3859			  dependence_stats.num_siv_independent++;
3860			  return;
3861			}
3862		      dependence_stats.num_siv_dependent++;
3863		      return;
3864		    }
3865
3866		  /* When the step does not divide the difference, there are
3867		     no overlaps.  */
3868		  else
3869		    {
3870		      *overlaps_a = conflict_fn_no_dependence ();
3871		      *overlaps_b = conflict_fn_no_dependence ();
3872		      *last_conflicts = integer_zero_node;
3873		      dependence_stats.num_siv_independent++;
3874		      return;
3875		    }
3876		}
3877
3878	      else
3879		{
3880		  /* Example:
3881		     chrec_a = 12
3882		     chrec_b = {10, +, -1}
3883
3884		     In this case, chrec_a will not overlap with chrec_b.  */
3885		  *overlaps_a = conflict_fn_no_dependence ();
3886		  *overlaps_b = conflict_fn_no_dependence ();
3887		  *last_conflicts = integer_zero_node;
3888		  dependence_stats.num_siv_independent++;
3889		  return;
3890		}
3891	    }
3892	}
3893      else
3894	{
3895	  if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3896	      || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3897	    {
3898	      if (dump_file && (dump_flags & TDF_DETAILS))
3899		fprintf (dump_file, "siv test failed: chrec not positive.\n");
3900
3901	      *overlaps_a = conflict_fn_not_known ();
3902	      *overlaps_b = conflict_fn_not_known ();
3903	      *last_conflicts = chrec_dont_know;
3904	      dependence_stats.num_siv_unimplemented++;
3905	      return;
3906	    }
3907	  else
3908	    {
3909	      if (value2 == false)
3910		{
3911		  /* Example:
3912		     chrec_a = 3
3913		     chrec_b = {10, +, -1}
3914		  */
3915		  if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3916		    {
3917		      HOST_WIDE_INT numiter;
3918		      class loop *loop = get_chrec_loop (chrec_b);
3919
3920		      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3921		      tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3922					 CHREC_RIGHT (chrec_b));
3923		      *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3924		      *last_conflicts = integer_one_node;
3925
3926		      /* Perform weak-zero siv test to see if overlap is
3927			 outside the loop bounds.  */
3928		      numiter = max_stmt_executions_int (loop);
3929
3930		      if (numiter >= 0
3931			  && compare_tree_int (tmp, numiter) > 0)
3932			{
3933			  free_conflict_function (*overlaps_a);
3934			  free_conflict_function (*overlaps_b);
3935			  *overlaps_a = conflict_fn_no_dependence ();
3936			  *overlaps_b = conflict_fn_no_dependence ();
3937			  *last_conflicts = integer_zero_node;
3938			  dependence_stats.num_siv_independent++;
3939			  return;
3940			}
3941		      dependence_stats.num_siv_dependent++;
3942		      return;
3943		    }
3944
3945		  /* When the step does not divide the difference, there
3946		     are no overlaps.  */
3947		  else
3948		    {
3949		      *overlaps_a = conflict_fn_no_dependence ();
3950		      *overlaps_b = conflict_fn_no_dependence ();
3951		      *last_conflicts = integer_zero_node;
3952		      dependence_stats.num_siv_independent++;
3953		      return;
3954		    }
3955		}
3956	      else
3957		{
3958		  /* Example:
3959		     chrec_a = 3
3960		     chrec_b = {4, +, 1}
3961
3962		     In this case, chrec_a will not overlap with chrec_b.  */
3963		  *overlaps_a = conflict_fn_no_dependence ();
3964		  *overlaps_b = conflict_fn_no_dependence ();
3965		  *last_conflicts = integer_zero_node;
3966		  dependence_stats.num_siv_independent++;
3967		  return;
3968		}
3969	    }
3970	}
3971    }
3972}
3973
3974/* Helper recursive function for initializing the matrix A.  Returns
3975   the initial value of CHREC.  */
3976
3977static tree
3978initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
3979{
3980  gcc_assert (chrec);
3981
3982  switch (TREE_CODE (chrec))
3983    {
3984    case POLYNOMIAL_CHREC:
3985      HOST_WIDE_INT chrec_right;
3986      if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
3987	return chrec_dont_know;
3988      chrec_right = int_cst_value (CHREC_RIGHT (chrec));
3989      /* We want to be able to negate without overflow.  */
3990      if (chrec_right == HOST_WIDE_INT_MIN)
3991	return chrec_dont_know;
3992      A[index][0] = mult * chrec_right;
3993      return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
3994
3995    case PLUS_EXPR:
3996    case MULT_EXPR:
3997    case MINUS_EXPR:
3998      {
3999	tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4000	tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4001
4002	return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4003      }
4004
4005    CASE_CONVERT:
4006      {
4007	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4008	return chrec_convert (chrec_type (chrec), op, NULL);
4009      }
4010
4011    case BIT_NOT_EXPR:
4012      {
4013	/* Handle ~X as -1 - X.  */
4014	tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4015	return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4016			      build_int_cst (TREE_TYPE (chrec), -1), op);
4017      }
4018
4019    case INTEGER_CST:
4020      return chrec;
4021
4022    default:
4023      gcc_unreachable ();
4024      return NULL_TREE;
4025    }
4026}
4027
4028#define FLOOR_DIV(x,y) ((x) / (y))
4029
4030/* Solves the special case of the Diophantine equation:
4031   | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4032
4033   Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
4034   number of iterations that loops X and Y run.  The overlaps will be
4035   constructed as evolutions in dimension DIM.  */
4036
4037static void
4038compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4039					 HOST_WIDE_INT step_a,
4040					 HOST_WIDE_INT step_b,
4041					 affine_fn *overlaps_a,
4042					 affine_fn *overlaps_b,
4043					 tree *last_conflicts, int dim)
4044{
4045  if (((step_a > 0 && step_b > 0)
4046       || (step_a < 0 && step_b < 0)))
4047    {
4048      HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4049      HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4050
4051      gcd_steps_a_b = gcd (step_a, step_b);
4052      step_overlaps_a = step_b / gcd_steps_a_b;
4053      step_overlaps_b = step_a / gcd_steps_a_b;
4054
4055      if (niter > 0)
4056	{
4057	  tau2 = FLOOR_DIV (niter, step_overlaps_a);
4058	  tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4059	  last_conflict = tau2;
4060	  *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4061	}
4062      else
4063	*last_conflicts = chrec_dont_know;
4064
4065      *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4066				      build_int_cst (NULL_TREE,
4067						     step_overlaps_a));
4068      *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4069				      build_int_cst (NULL_TREE,
4070						     step_overlaps_b));
4071    }
4072
4073  else
4074    {
4075      *overlaps_a = affine_fn_cst (integer_zero_node);
4076      *overlaps_b = affine_fn_cst (integer_zero_node);
4077      *last_conflicts = integer_zero_node;
4078    }
4079}
4080
4081/* Solves the special case of a Diophantine equation where CHREC_A is
4082   an affine bivariate function, and CHREC_B is an affine univariate
4083   function.  For example,
4084
4085   | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4086
4087   has the following overlapping functions:
4088
4089   | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4090   | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4091   | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4092
4093   FORNOW: This is a specialized implementation for a case occurring in
4094   a common benchmark.  Implement the general algorithm.  */
4095
4096static void
4097compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4098				      conflict_function **overlaps_a,
4099				      conflict_function **overlaps_b,
4100				      tree *last_conflicts)
4101{
4102  bool xz_p, yz_p, xyz_p;
4103  HOST_WIDE_INT step_x, step_y, step_z;
4104  HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4105  affine_fn overlaps_a_xz, overlaps_b_xz;
4106  affine_fn overlaps_a_yz, overlaps_b_yz;
4107  affine_fn overlaps_a_xyz, overlaps_b_xyz;
4108  affine_fn ova1, ova2, ovb;
4109  tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4110
4111  step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4112  step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4113  step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4114
4115  niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4116  niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4117  niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4118
4119  if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4120    {
4121      if (dump_file && (dump_flags & TDF_DETAILS))
4122	fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4123
4124      *overlaps_a = conflict_fn_not_known ();
4125      *overlaps_b = conflict_fn_not_known ();
4126      *last_conflicts = chrec_dont_know;
4127      return;
4128    }
4129
4130  niter = MIN (niter_x, niter_z);
4131  compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4132					   &overlaps_a_xz,
4133					   &overlaps_b_xz,
4134					   &last_conflicts_xz, 1);
4135  niter = MIN (niter_y, niter_z);
4136  compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4137					   &overlaps_a_yz,
4138					   &overlaps_b_yz,
4139					   &last_conflicts_yz, 2);
4140  niter = MIN (niter_x, niter_z);
4141  niter = MIN (niter_y, niter);
4142  compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4143					   &overlaps_a_xyz,
4144					   &overlaps_b_xyz,
4145					   &last_conflicts_xyz, 3);
4146
4147  xz_p = !integer_zerop (last_conflicts_xz);
4148  yz_p = !integer_zerop (last_conflicts_yz);
4149  xyz_p = !integer_zerop (last_conflicts_xyz);
4150
4151  if (xz_p || yz_p || xyz_p)
4152    {
4153      ova1 = affine_fn_cst (integer_zero_node);
4154      ova2 = affine_fn_cst (integer_zero_node);
4155      ovb = affine_fn_cst (integer_zero_node);
4156      if (xz_p)
4157	{
4158	  affine_fn t0 = ova1;
4159	  affine_fn t2 = ovb;
4160
4161	  ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4162	  ovb = affine_fn_plus (ovb, overlaps_b_xz);
4163	  affine_fn_free (t0);
4164	  affine_fn_free (t2);
4165	  *last_conflicts = last_conflicts_xz;
4166	}
4167      if (yz_p)
4168	{
4169	  affine_fn t0 = ova2;
4170	  affine_fn t2 = ovb;
4171
4172	  ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4173	  ovb = affine_fn_plus (ovb, overlaps_b_yz);
4174	  affine_fn_free (t0);
4175	  affine_fn_free (t2);
4176	  *last_conflicts = last_conflicts_yz;
4177	}
4178      if (xyz_p)
4179	{
4180	  affine_fn t0 = ova1;
4181	  affine_fn t2 = ova2;
4182	  affine_fn t4 = ovb;
4183
4184	  ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4185	  ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4186	  ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4187	  affine_fn_free (t0);
4188	  affine_fn_free (t2);
4189	  affine_fn_free (t4);
4190	  *last_conflicts = last_conflicts_xyz;
4191	}
4192      *overlaps_a = conflict_fn (2, ova1, ova2);
4193      *overlaps_b = conflict_fn (1, ovb);
4194    }
4195  else
4196    {
4197      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4198      *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4199      *last_conflicts = integer_zero_node;
4200    }
4201
4202  affine_fn_free (overlaps_a_xz);
4203  affine_fn_free (overlaps_b_xz);
4204  affine_fn_free (overlaps_a_yz);
4205  affine_fn_free (overlaps_b_yz);
4206  affine_fn_free (overlaps_a_xyz);
4207  affine_fn_free (overlaps_b_xyz);
4208}
4209
4210/* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
4211
4212static void
4213lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4214		    int size)
4215{
4216  memcpy (vec2, vec1, size * sizeof (*vec1));
4217}
4218
4219/* Copy the elements of M x N matrix MAT1 to MAT2.  */
4220
4221static void
4222lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4223		    int m, int n)
4224{
4225  int i;
4226
4227  for (i = 0; i < m; i++)
4228    lambda_vector_copy (mat1[i], mat2[i], n);
4229}
4230
4231/* Store the N x N identity matrix in MAT.  */
4232
4233static void
4234lambda_matrix_id (lambda_matrix mat, int size)
4235{
4236  int i, j;
4237
4238  for (i = 0; i < size; i++)
4239    for (j = 0; j < size; j++)
4240      mat[i][j] = (i == j) ? 1 : 0;
4241}
4242
4243/* Return the index of the first nonzero element of vector VEC1 between
4244   START and N.  We must have START <= N.
4245   Returns N if VEC1 is the zero vector.  */
4246
4247static int
4248lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4249{
4250  int j = start;
4251  while (j < n && vec1[j] == 0)
4252    j++;
4253  return j;
4254}
4255
4256/* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4257   R2 = R2 + CONST1 * R1.  */
4258
4259static bool
4260lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4261		       lambda_int const1)
4262{
4263  int i;
4264
4265  if (const1 == 0)
4266    return true;
4267
4268  for (i = 0; i < n; i++)
4269    {
4270      bool ovf;
4271      lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4272      if (ovf)
4273	return false;
4274      lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4275      if (ovf || tem2 == HOST_WIDE_INT_MIN)
4276	return false;
4277      mat[r2][i] = tem2;
4278    }
4279
4280  return true;
4281}
4282
4283/* Multiply vector VEC1 of length SIZE by a constant CONST1,
4284   and store the result in VEC2.  */
4285
4286static void
4287lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4288			  int size, lambda_int const1)
4289{
4290  int i;
4291
4292  if (const1 == 0)
4293    lambda_vector_clear (vec2, size);
4294  else
4295    for (i = 0; i < size; i++)
4296      vec2[i] = const1 * vec1[i];
4297}
4298
4299/* Negate vector VEC1 with length SIZE and store it in VEC2.  */
4300
4301static void
4302lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4303		      int size)
4304{
4305  lambda_vector_mult_const (vec1, vec2, size, -1);
4306}
4307
4308/* Negate row R1 of matrix MAT which has N columns.  */
4309
4310static void
4311lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4312{
4313  lambda_vector_negate (mat[r1], mat[r1], n);
4314}
4315
4316/* Return true if two vectors are equal.  */
4317
4318static bool
4319lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4320{
4321  int i;
4322  for (i = 0; i < size; i++)
4323    if (vec1[i] != vec2[i])
4324      return false;
4325  return true;
4326}
4327
4328/* Given an M x N integer matrix A, this function determines an M x
4329   M unimodular matrix U, and an M x N echelon matrix S such that
4330   "U.A = S".  This decomposition is also known as "right Hermite".
4331
4332   Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4333   Restructuring Compilers" Utpal Banerjee.  */
4334
4335static bool
4336lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4337			     lambda_matrix S, lambda_matrix U)
4338{
4339  int i, j, i0 = 0;
4340
4341  lambda_matrix_copy (A, S, m, n);
4342  lambda_matrix_id (U, m);
4343
4344  for (j = 0; j < n; j++)
4345    {
4346      if (lambda_vector_first_nz (S[j], m, i0) < m)
4347	{
4348	  ++i0;
4349	  for (i = m - 1; i >= i0; i--)
4350	    {
4351	      while (S[i][j] != 0)
4352		{
4353		  lambda_int factor, a, b;
4354
4355		  a = S[i-1][j];
4356		  b = S[i][j];
4357		  gcc_assert (a != HOST_WIDE_INT_MIN);
4358		  factor = a / b;
4359
4360		  if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4361		    return false;
4362		  std::swap (S[i], S[i-1]);
4363
4364		  if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4365		    return false;
4366		  std::swap (U[i], U[i-1]);
4367		}
4368	    }
4369	}
4370    }
4371
4372  return true;
4373}
4374
4375/* Determines the overlapping elements due to accesses CHREC_A and
4376   CHREC_B, that are affine functions.  This function cannot handle
4377   symbolic evolution functions, ie. when initial conditions are
4378   parameters, because it uses lambda matrices of integers.  */
4379
4380static void
4381analyze_subscript_affine_affine (tree chrec_a,
4382				 tree chrec_b,
4383				 conflict_function **overlaps_a,
4384				 conflict_function **overlaps_b,
4385				 tree *last_conflicts)
4386{
4387  unsigned nb_vars_a, nb_vars_b, dim;
4388  lambda_int gamma, gcd_alpha_beta;
4389  lambda_matrix A, U, S;
4390  struct obstack scratch_obstack;
4391
4392  if (eq_evolutions_p (chrec_a, chrec_b))
4393    {
4394      /* The accessed index overlaps for each iteration in the
4395	 loop.  */
4396      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4397      *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4398      *last_conflicts = chrec_dont_know;
4399      return;
4400    }
4401  if (dump_file && (dump_flags & TDF_DETAILS))
4402    fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4403
4404  /* For determining the initial intersection, we have to solve a
4405     Diophantine equation.  This is the most time consuming part.
4406
4407     For answering to the question: "Is there a dependence?" we have
4408     to prove that there exists a solution to the Diophantine
4409     equation, and that the solution is in the iteration domain,
4410     i.e. the solution is positive or zero, and that the solution
4411     happens before the upper bound loop.nb_iterations.  Otherwise
4412     there is no dependence.  This function outputs a description of
4413     the iterations that hold the intersections.  */
4414
4415  nb_vars_a = nb_vars_in_chrec (chrec_a);
4416  nb_vars_b = nb_vars_in_chrec (chrec_b);
4417
4418  gcc_obstack_init (&scratch_obstack);
4419
4420  dim = nb_vars_a + nb_vars_b;
4421  U = lambda_matrix_new (dim, dim, &scratch_obstack);
4422  A = lambda_matrix_new (dim, 1, &scratch_obstack);
4423  S = lambda_matrix_new (dim, 1, &scratch_obstack);
4424
4425  tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4426  tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4427  if (init_a == chrec_dont_know
4428      || init_b == chrec_dont_know)
4429    {
4430      if (dump_file && (dump_flags & TDF_DETAILS))
4431	fprintf (dump_file, "affine-affine test failed: "
4432		 "representation issue.\n");
4433      *overlaps_a = conflict_fn_not_known ();
4434      *overlaps_b = conflict_fn_not_known ();
4435      *last_conflicts = chrec_dont_know;
4436      goto end_analyze_subs_aa;
4437    }
4438  gamma = int_cst_value (init_b) - int_cst_value (init_a);
4439
4440  /* Don't do all the hard work of solving the Diophantine equation
4441     when we already know the solution: for example,
4442     | {3, +, 1}_1
4443     | {3, +, 4}_2
4444     | gamma = 3 - 3 = 0.
4445     Then the first overlap occurs during the first iterations:
4446     | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4447  */
4448  if (gamma == 0)
4449    {
4450      if (nb_vars_a == 1 && nb_vars_b == 1)
4451	{
4452	  HOST_WIDE_INT step_a, step_b;
4453	  HOST_WIDE_INT niter, niter_a, niter_b;
4454	  affine_fn ova, ovb;
4455
4456	  niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4457	  niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4458	  niter = MIN (niter_a, niter_b);
4459	  step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4460	  step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4461
4462	  compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4463						   &ova, &ovb,
4464						   last_conflicts, 1);
4465	  *overlaps_a = conflict_fn (1, ova);
4466	  *overlaps_b = conflict_fn (1, ovb);
4467	}
4468
4469      else if (nb_vars_a == 2 && nb_vars_b == 1)
4470	compute_overlap_steps_for_affine_1_2
4471	  (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4472
4473      else if (nb_vars_a == 1 && nb_vars_b == 2)
4474	compute_overlap_steps_for_affine_1_2
4475	  (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4476
4477      else
4478	{
4479	  if (dump_file && (dump_flags & TDF_DETAILS))
4480	    fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4481	  *overlaps_a = conflict_fn_not_known ();
4482	  *overlaps_b = conflict_fn_not_known ();
4483	  *last_conflicts = chrec_dont_know;
4484	}
4485      goto end_analyze_subs_aa;
4486    }
4487
4488  /* U.A = S */
4489  if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4490    {
4491      *overlaps_a = conflict_fn_not_known ();
4492      *overlaps_b = conflict_fn_not_known ();
4493      *last_conflicts = chrec_dont_know;
4494      goto end_analyze_subs_aa;
4495    }
4496
4497  if (S[0][0] < 0)
4498    {
4499      S[0][0] *= -1;
4500      lambda_matrix_row_negate (U, dim, 0);
4501    }
4502  gcd_alpha_beta = S[0][0];
4503
4504  /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4505     but that is a quite strange case.  Instead of ICEing, answer
4506     don't know.  */
4507  if (gcd_alpha_beta == 0)
4508    {
4509      *overlaps_a = conflict_fn_not_known ();
4510      *overlaps_b = conflict_fn_not_known ();
4511      *last_conflicts = chrec_dont_know;
4512      goto end_analyze_subs_aa;
4513    }
4514
4515  /* The classic "gcd-test".  */
4516  if (!int_divides_p (gcd_alpha_beta, gamma))
4517    {
4518      /* The "gcd-test" has determined that there is no integer
4519	 solution, i.e. there is no dependence.  */
4520      *overlaps_a = conflict_fn_no_dependence ();
4521      *overlaps_b = conflict_fn_no_dependence ();
4522      *last_conflicts = integer_zero_node;
4523    }
4524
4525  /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4526  else if (nb_vars_a == 1 && nb_vars_b == 1)
4527    {
4528      /* Both functions should have the same evolution sign.  */
4529      if (((A[0][0] > 0 && -A[1][0] > 0)
4530	   || (A[0][0] < 0 && -A[1][0] < 0)))
4531	{
4532	  /* The solutions are given by:
4533	     |
4534	     | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4535	     |                           [u21 u22]    [y0]
4536
4537	     For a given integer t.  Using the following variables,
4538
4539	     | i0 = u11 * gamma / gcd_alpha_beta
4540	     | j0 = u12 * gamma / gcd_alpha_beta
4541	     | i1 = u21
4542	     | j1 = u22
4543
4544	     the solutions are:
4545
4546	     | x0 = i0 + i1 * t,
4547	     | y0 = j0 + j1 * t.  */
4548      	  HOST_WIDE_INT i0, j0, i1, j1;
4549
4550	  i0 = U[0][0] * gamma / gcd_alpha_beta;
4551	  j0 = U[0][1] * gamma / gcd_alpha_beta;
4552	  i1 = U[1][0];
4553	  j1 = U[1][1];
4554
4555	  if ((i1 == 0 && i0 < 0)
4556	      || (j1 == 0 && j0 < 0))
4557	    {
4558	      /* There is no solution.
4559		 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4560		 falls in here, but for the moment we don't look at the
4561		 upper bound of the iteration domain.  */
4562	      *overlaps_a = conflict_fn_no_dependence ();
4563	      *overlaps_b = conflict_fn_no_dependence ();
4564	      *last_conflicts = integer_zero_node;
4565	      goto end_analyze_subs_aa;
4566	    }
4567
4568	  if (i1 > 0 && j1 > 0)
4569	    {
4570	      HOST_WIDE_INT niter_a
4571		= max_stmt_executions_int (get_chrec_loop (chrec_a));
4572	      HOST_WIDE_INT niter_b
4573		= max_stmt_executions_int (get_chrec_loop (chrec_b));
4574	      HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4575
4576	      /* (X0, Y0) is a solution of the Diophantine equation:
4577		 "chrec_a (X0) = chrec_b (Y0)".  */
4578	      HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4579					CEIL (-j0, j1));
4580	      HOST_WIDE_INT x0 = i1 * tau1 + i0;
4581	      HOST_WIDE_INT y0 = j1 * tau1 + j0;
4582
4583	      /* (X1, Y1) is the smallest positive solution of the eq
4584		 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4585		 first conflict occurs.  */
4586	      HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4587	      HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4588	      HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4589
4590	      if (niter > 0)
4591		{
4592		  /* If the overlap occurs outside of the bounds of the
4593		     loop, there is no dependence.  */
4594		  if (x1 >= niter_a || y1 >= niter_b)
4595		    {
4596		      *overlaps_a = conflict_fn_no_dependence ();
4597		      *overlaps_b = conflict_fn_no_dependence ();
4598		      *last_conflicts = integer_zero_node;
4599		      goto end_analyze_subs_aa;
4600		    }
4601
4602		  /* max stmt executions can get quite large, avoid
4603		     overflows by using wide ints here.  */
4604		  widest_int tau2
4605		    = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4606				wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4607		  widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4608		  if (wi::min_precision (last_conflict, SIGNED)
4609		      <= TYPE_PRECISION (integer_type_node))
4610		    *last_conflicts
4611		       = build_int_cst (integer_type_node,
4612					last_conflict.to_shwi ());
4613		  else
4614		    *last_conflicts = chrec_dont_know;
4615		}
4616	      else
4617		*last_conflicts = chrec_dont_know;
4618
4619	      *overlaps_a
4620		= conflict_fn (1,
4621			       affine_fn_univar (build_int_cst (NULL_TREE, x1),
4622						 1,
4623						 build_int_cst (NULL_TREE, i1)));
4624	      *overlaps_b
4625		= conflict_fn (1,
4626			       affine_fn_univar (build_int_cst (NULL_TREE, y1),
4627						 1,
4628						 build_int_cst (NULL_TREE, j1)));
4629	    }
4630	  else
4631	    {
4632	      /* FIXME: For the moment, the upper bound of the
4633		 iteration domain for i and j is not checked.  */
4634	      if (dump_file && (dump_flags & TDF_DETAILS))
4635		fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4636	      *overlaps_a = conflict_fn_not_known ();
4637	      *overlaps_b = conflict_fn_not_known ();
4638	      *last_conflicts = chrec_dont_know;
4639	    }
4640	}
4641      else
4642	{
4643	  if (dump_file && (dump_flags & TDF_DETAILS))
4644	    fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4645	  *overlaps_a = conflict_fn_not_known ();
4646	  *overlaps_b = conflict_fn_not_known ();
4647	  *last_conflicts = chrec_dont_know;
4648	}
4649    }
4650  else
4651    {
4652      if (dump_file && (dump_flags & TDF_DETAILS))
4653	fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4654      *overlaps_a = conflict_fn_not_known ();
4655      *overlaps_b = conflict_fn_not_known ();
4656      *last_conflicts = chrec_dont_know;
4657    }
4658
4659end_analyze_subs_aa:
4660  obstack_free (&scratch_obstack, NULL);
4661  if (dump_file && (dump_flags & TDF_DETAILS))
4662    {
4663      fprintf (dump_file, "  (overlaps_a = ");
4664      dump_conflict_function (dump_file, *overlaps_a);
4665      fprintf (dump_file, ")\n  (overlaps_b = ");
4666      dump_conflict_function (dump_file, *overlaps_b);
4667      fprintf (dump_file, "))\n");
4668    }
4669}
4670
4671/* Returns true when analyze_subscript_affine_affine can be used for
4672   determining the dependence relation between chrec_a and chrec_b,
4673   that contain symbols.  This function modifies chrec_a and chrec_b
4674   such that the analysis result is the same, and such that they don't
4675   contain symbols, and then can safely be passed to the analyzer.
4676
4677   Example: The analysis of the following tuples of evolutions produce
4678   the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4679   vs. {0, +, 1}_1
4680
4681   {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4682   {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4683*/
4684
4685static bool
4686can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4687{
4688  tree diff, type, left_a, left_b, right_b;
4689
4690  if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4691      || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4692    /* FIXME: For the moment not handled.  Might be refined later.  */
4693    return false;
4694
4695  type = chrec_type (*chrec_a);
4696  left_a = CHREC_LEFT (*chrec_a);
4697  left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4698  diff = chrec_fold_minus (type, left_a, left_b);
4699
4700  if (!evolution_function_is_constant_p (diff))
4701    return false;
4702
4703  if (dump_file && (dump_flags & TDF_DETAILS))
4704    fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4705
4706  *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4707				     diff, CHREC_RIGHT (*chrec_a));
4708  right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4709  *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4710				     build_int_cst (type, 0),
4711				     right_b);
4712  return true;
4713}
4714
4715/* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4716   *OVERLAPS_B are initialized to the functions that describe the
4717   relation between the elements accessed twice by CHREC_A and
4718   CHREC_B.  For k >= 0, the following property is verified:
4719
4720   CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4721
4722static void
4723analyze_siv_subscript (tree chrec_a,
4724		       tree chrec_b,
4725		       conflict_function **overlaps_a,
4726		       conflict_function **overlaps_b,
4727		       tree *last_conflicts,
4728		       int loop_nest_num)
4729{
4730  dependence_stats.num_siv++;
4731
4732  if (dump_file && (dump_flags & TDF_DETAILS))
4733    fprintf (dump_file, "(analyze_siv_subscript \n");
4734
4735  if (evolution_function_is_constant_p (chrec_a)
4736      && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4737    analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4738				      overlaps_a, overlaps_b, last_conflicts);
4739
4740  else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4741	   && evolution_function_is_constant_p (chrec_b))
4742    analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4743				      overlaps_b, overlaps_a, last_conflicts);
4744
4745  else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4746	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4747    {
4748      if (!chrec_contains_symbols (chrec_a)
4749	  && !chrec_contains_symbols (chrec_b))
4750	{
4751	  analyze_subscript_affine_affine (chrec_a, chrec_b,
4752					   overlaps_a, overlaps_b,
4753					   last_conflicts);
4754
4755	  if (CF_NOT_KNOWN_P (*overlaps_a)
4756	      || CF_NOT_KNOWN_P (*overlaps_b))
4757	    dependence_stats.num_siv_unimplemented++;
4758	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4759		   || CF_NO_DEPENDENCE_P (*overlaps_b))
4760	    dependence_stats.num_siv_independent++;
4761	  else
4762	    dependence_stats.num_siv_dependent++;
4763	}
4764      else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4765							&chrec_b))
4766	{
4767	  analyze_subscript_affine_affine (chrec_a, chrec_b,
4768					   overlaps_a, overlaps_b,
4769					   last_conflicts);
4770
4771	  if (CF_NOT_KNOWN_P (*overlaps_a)
4772	      || CF_NOT_KNOWN_P (*overlaps_b))
4773	    dependence_stats.num_siv_unimplemented++;
4774	  else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4775		   || CF_NO_DEPENDENCE_P (*overlaps_b))
4776	    dependence_stats.num_siv_independent++;
4777	  else
4778	    dependence_stats.num_siv_dependent++;
4779	}
4780      else
4781	goto siv_subscript_dontknow;
4782    }
4783
4784  else
4785    {
4786    siv_subscript_dontknow:;
4787      if (dump_file && (dump_flags & TDF_DETAILS))
4788	fprintf (dump_file, "  siv test failed: unimplemented");
4789      *overlaps_a = conflict_fn_not_known ();
4790      *overlaps_b = conflict_fn_not_known ();
4791      *last_conflicts = chrec_dont_know;
4792      dependence_stats.num_siv_unimplemented++;
4793    }
4794
4795  if (dump_file && (dump_flags & TDF_DETAILS))
4796    fprintf (dump_file, ")\n");
4797}
4798
4799/* Returns false if we can prove that the greatest common divisor of the steps
4800   of CHREC does not divide CST, false otherwise.  */
4801
4802static bool
4803gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4804{
4805  HOST_WIDE_INT cd = 0, val;
4806  tree step;
4807
4808  if (!tree_fits_shwi_p (cst))
4809    return true;
4810  val = tree_to_shwi (cst);
4811
4812  while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4813    {
4814      step = CHREC_RIGHT (chrec);
4815      if (!tree_fits_shwi_p (step))
4816	return true;
4817      cd = gcd (cd, tree_to_shwi (step));
4818      chrec = CHREC_LEFT (chrec);
4819    }
4820
4821  return val % cd == 0;
4822}
4823
4824/* Analyze a MIV (Multiple Index Variable) subscript with respect to
4825   LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4826   functions that describe the relation between the elements accessed
4827   twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4828   is verified:
4829
4830   CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4831
4832static void
4833analyze_miv_subscript (tree chrec_a,
4834		       tree chrec_b,
4835		       conflict_function **overlaps_a,
4836		       conflict_function **overlaps_b,
4837		       tree *last_conflicts,
4838		       class loop *loop_nest)
4839{
4840  tree type, difference;
4841
4842  dependence_stats.num_miv++;
4843  if (dump_file && (dump_flags & TDF_DETAILS))
4844    fprintf (dump_file, "(analyze_miv_subscript \n");
4845
4846  type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4847  chrec_a = chrec_convert (type, chrec_a, NULL);
4848  chrec_b = chrec_convert (type, chrec_b, NULL);
4849  difference = chrec_fold_minus (type, chrec_a, chrec_b);
4850
4851  if (eq_evolutions_p (chrec_a, chrec_b))
4852    {
4853      /* Access functions are the same: all the elements are accessed
4854	 in the same order.  */
4855      *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4856      *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4857      *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4858      dependence_stats.num_miv_dependent++;
4859    }
4860
4861  else if (evolution_function_is_constant_p (difference)
4862	   && evolution_function_is_affine_multivariate_p (chrec_a,
4863							   loop_nest->num)
4864	   && !gcd_of_steps_may_divide_p (chrec_a, difference))
4865    {
4866      /* testsuite/.../ssa-chrec-33.c
4867	 {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4868
4869	 The difference is 1, and all the evolution steps are multiples
4870	 of 2, consequently there are no overlapping elements.  */
4871      *overlaps_a = conflict_fn_no_dependence ();
4872      *overlaps_b = conflict_fn_no_dependence ();
4873      *last_conflicts = integer_zero_node;
4874      dependence_stats.num_miv_independent++;
4875    }
4876
4877  else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4878	   && !chrec_contains_symbols (chrec_a, loop_nest)
4879	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4880	   && !chrec_contains_symbols (chrec_b, loop_nest))
4881    {
4882      /* testsuite/.../ssa-chrec-35.c
4883	 {0, +, 1}_2  vs.  {0, +, 1}_3
4884	 the overlapping elements are respectively located at iterations:
4885	 {0, +, 1}_x and {0, +, 1}_x,
4886	 in other words, we have the equality:
4887	 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4888
4889	 Other examples:
4890	 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4891	 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4892
4893	 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4894	 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4895      */
4896      analyze_subscript_affine_affine (chrec_a, chrec_b,
4897				       overlaps_a, overlaps_b, last_conflicts);
4898
4899      if (CF_NOT_KNOWN_P (*overlaps_a)
4900 	  || CF_NOT_KNOWN_P (*overlaps_b))
4901	dependence_stats.num_miv_unimplemented++;
4902      else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4903	       || CF_NO_DEPENDENCE_P (*overlaps_b))
4904	dependence_stats.num_miv_independent++;
4905      else
4906	dependence_stats.num_miv_dependent++;
4907    }
4908
4909  else
4910    {
4911      /* When the analysis is too difficult, answer "don't know".  */
4912      if (dump_file && (dump_flags & TDF_DETAILS))
4913	fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4914
4915      *overlaps_a = conflict_fn_not_known ();
4916      *overlaps_b = conflict_fn_not_known ();
4917      *last_conflicts = chrec_dont_know;
4918      dependence_stats.num_miv_unimplemented++;
4919    }
4920
4921  if (dump_file && (dump_flags & TDF_DETAILS))
4922    fprintf (dump_file, ")\n");
4923}
4924
4925/* Determines the iterations for which CHREC_A is equal to CHREC_B in
4926   with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4927   OVERLAP_ITERATIONS_B are initialized with two functions that
4928   describe the iterations that contain conflicting elements.
4929
4930   Remark: For an integer k >= 0, the following equality is true:
4931
4932   CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
4933*/
4934
4935static void
4936analyze_overlapping_iterations (tree chrec_a,
4937				tree chrec_b,
4938				conflict_function **overlap_iterations_a,
4939				conflict_function **overlap_iterations_b,
4940				tree *last_conflicts, class loop *loop_nest)
4941{
4942  unsigned int lnn = loop_nest->num;
4943
4944  dependence_stats.num_subscript_tests++;
4945
4946  if (dump_file && (dump_flags & TDF_DETAILS))
4947    {
4948      fprintf (dump_file, "(analyze_overlapping_iterations \n");
4949      fprintf (dump_file, "  (chrec_a = ");
4950      print_generic_expr (dump_file, chrec_a);
4951      fprintf (dump_file, ")\n  (chrec_b = ");
4952      print_generic_expr (dump_file, chrec_b);
4953      fprintf (dump_file, ")\n");
4954    }
4955
4956  if (chrec_a == NULL_TREE
4957      || chrec_b == NULL_TREE
4958      || chrec_contains_undetermined (chrec_a)
4959      || chrec_contains_undetermined (chrec_b))
4960    {
4961      dependence_stats.num_subscript_undetermined++;
4962
4963      *overlap_iterations_a = conflict_fn_not_known ();
4964      *overlap_iterations_b = conflict_fn_not_known ();
4965    }
4966
4967  /* If they are the same chrec, and are affine, they overlap
4968     on every iteration.  */
4969  else if (eq_evolutions_p (chrec_a, chrec_b)
4970	   && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4971	       || operand_equal_p (chrec_a, chrec_b, 0)))
4972    {
4973      dependence_stats.num_same_subscript_function++;
4974      *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4975      *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4976      *last_conflicts = chrec_dont_know;
4977    }
4978
4979  /* If they aren't the same, and aren't affine, we can't do anything
4980     yet.  */
4981  else if ((chrec_contains_symbols (chrec_a)
4982	    || chrec_contains_symbols (chrec_b))
4983	   && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
4984	       || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
4985    {
4986      dependence_stats.num_subscript_undetermined++;
4987      *overlap_iterations_a = conflict_fn_not_known ();
4988      *overlap_iterations_b = conflict_fn_not_known ();
4989    }
4990
4991  else if (ziv_subscript_p (chrec_a, chrec_b))
4992    analyze_ziv_subscript (chrec_a, chrec_b,
4993			   overlap_iterations_a, overlap_iterations_b,
4994			   last_conflicts);
4995
4996  else if (siv_subscript_p (chrec_a, chrec_b))
4997    analyze_siv_subscript (chrec_a, chrec_b,
4998			   overlap_iterations_a, overlap_iterations_b,
4999			   last_conflicts, lnn);
5000
5001  else
5002    analyze_miv_subscript (chrec_a, chrec_b,
5003			   overlap_iterations_a, overlap_iterations_b,
5004			   last_conflicts, loop_nest);
5005
5006  if (dump_file && (dump_flags & TDF_DETAILS))
5007    {
5008      fprintf (dump_file, "  (overlap_iterations_a = ");
5009      dump_conflict_function (dump_file, *overlap_iterations_a);
5010      fprintf (dump_file, ")\n  (overlap_iterations_b = ");
5011      dump_conflict_function (dump_file, *overlap_iterations_b);
5012      fprintf (dump_file, "))\n");
5013    }
5014}
5015
5016/* Helper function for uniquely inserting distance vectors.  */
5017
5018static void
5019save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5020{
5021  for (lambda_vector v : DDR_DIST_VECTS (ddr))
5022    if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5023      return;
5024
5025  DDR_DIST_VECTS (ddr).safe_push (dist_v);
5026}
5027
5028/* Helper function for uniquely inserting direction vectors.  */
5029
5030static void
5031save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5032{
5033  for (lambda_vector v : DDR_DIR_VECTS (ddr))
5034    if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5035      return;
5036
5037  DDR_DIR_VECTS (ddr).safe_push (dir_v);
5038}
5039
5040/* Add a distance of 1 on all the loops outer than INDEX.  If we
5041   haven't yet determined a distance for this outer loop, push a new
5042   distance vector composed of the previous distance, and a distance
5043   of 1 for this outer loop.  Example:
5044
5045   | loop_1
5046   |   loop_2
5047   |     A[10]
5048   |   endloop_2
5049   | endloop_1
5050
5051   Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
5052   save (0, 1), then we have to save (1, 0).  */
5053
5054static void
5055add_outer_distances (struct data_dependence_relation *ddr,
5056		     lambda_vector dist_v, int index)
5057{
5058  /* For each outer loop where init_v is not set, the accesses are
5059     in dependence of distance 1 in the loop.  */
5060  while (--index >= 0)
5061    {
5062      lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5063      lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5064      save_v[index] = 1;
5065      save_dist_v (ddr, save_v);
5066    }
5067}
5068
5069/* Return false when fail to represent the data dependence as a
5070   distance vector.  A_INDEX is the index of the first reference
5071   (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5072   second reference.  INIT_B is set to true when a component has been
5073   added to the distance vector DIST_V.  INDEX_CARRY is then set to
5074   the index in DIST_V that carries the dependence.  */
5075
5076static bool
5077build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5078			     unsigned int a_index, unsigned int b_index,
5079			     lambda_vector dist_v, bool *init_b,
5080			     int *index_carry)
5081{
5082  unsigned i;
5083  lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5084  class loop *loop = DDR_LOOP_NEST (ddr)[0];
5085
5086  for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5087    {
5088      tree access_fn_a, access_fn_b;
5089      struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5090
5091      if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5092	{
5093	  non_affine_dependence_relation (ddr);
5094	  return false;
5095	}
5096
5097      access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5098      access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5099
5100      if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5101	  && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5102	{
5103	  HOST_WIDE_INT dist;
5104	  int index;
5105	  int var_a = CHREC_VARIABLE (access_fn_a);
5106	  int var_b = CHREC_VARIABLE (access_fn_b);
5107
5108	  if (var_a != var_b
5109	      || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5110	    {
5111	      non_affine_dependence_relation (ddr);
5112	      return false;
5113	    }
5114
5115	  /* When data references are collected in a loop while data
5116	     dependences are analyzed in loop nest nested in the loop, we
5117	     would have more number of access functions than number of
5118	     loops.  Skip access functions of loops not in the loop nest.
5119
5120	     See PR89725 for more information.  */
5121	  if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5122	    continue;
5123
5124	  dist = int_cst_value (SUB_DISTANCE (subscript));
5125	  index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5126	  *index_carry = MIN (index, *index_carry);
5127
5128	  /* This is the subscript coupling test.  If we have already
5129	     recorded a distance for this loop (a distance coming from
5130	     another subscript), it should be the same.  For example,
5131	     in the following code, there is no dependence:
5132
5133	     | loop i = 0, N, 1
5134	     |   T[i+1][i] = ...
5135	     |   ... = T[i][i]
5136	     | endloop
5137	  */
5138	  if (init_v[index] != 0 && dist_v[index] != dist)
5139	    {
5140	      finalize_ddr_dependent (ddr, chrec_known);
5141	      return false;
5142	    }
5143
5144	  dist_v[index] = dist;
5145	  init_v[index] = 1;
5146	  *init_b = true;
5147	}
5148      else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5149	{
5150	  /* This can be for example an affine vs. constant dependence
5151	     (T[i] vs. T[3]) that is not an affine dependence and is
5152	     not representable as a distance vector.  */
5153	  non_affine_dependence_relation (ddr);
5154	  return false;
5155	}
5156      else
5157	*init_b = true;
5158    }
5159
5160  return true;
5161}
5162
5163/* Return true when the DDR contains only invariant access functions wrto. loop
5164   number LNUM.  */
5165
5166static bool
5167invariant_access_functions (const struct data_dependence_relation *ddr,
5168			    int lnum)
5169{
5170  for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5171    if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5172	|| !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5173      return false;
5174
5175  return true;
5176}
5177
5178/* Helper function for the case where DDR_A and DDR_B are the same
5179   multivariate access function with a constant step.  For an example
5180   see pr34635-1.c.  */
5181
5182static void
5183add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5184{
5185  int x_1, x_2;
5186  tree c_1 = CHREC_LEFT (c_2);
5187  tree c_0 = CHREC_LEFT (c_1);
5188  lambda_vector dist_v;
5189  HOST_WIDE_INT v1, v2, cd;
5190
5191  /* Polynomials with more than 2 variables are not handled yet.  When
5192     the evolution steps are parameters, it is not possible to
5193     represent the dependence using classical distance vectors.  */
5194  if (TREE_CODE (c_0) != INTEGER_CST
5195      || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5196      || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5197    {
5198      DDR_AFFINE_P (ddr) = false;
5199      return;
5200    }
5201
5202  x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5203  x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5204
5205  /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
5206  dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5207  v1 = int_cst_value (CHREC_RIGHT (c_1));
5208  v2 = int_cst_value (CHREC_RIGHT (c_2));
5209  cd = gcd (v1, v2);
5210  v1 /= cd;
5211  v2 /= cd;
5212
5213  if (v2 < 0)
5214    {
5215      v2 = -v2;
5216      v1 = -v1;
5217    }
5218
5219  dist_v[x_1] = v2;
5220  dist_v[x_2] = -v1;
5221  save_dist_v (ddr, dist_v);
5222
5223  add_outer_distances (ddr, dist_v, x_1);
5224}
5225
5226/* Helper function for the case where DDR_A and DDR_B are the same
5227   access functions.  */
5228
5229static void
5230add_other_self_distances (struct data_dependence_relation *ddr)
5231{
5232  lambda_vector dist_v;
5233  unsigned i;
5234  int index_carry = DDR_NB_LOOPS (ddr);
5235  subscript *sub;
5236  class loop *loop = DDR_LOOP_NEST (ddr)[0];
5237
5238  FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5239    {
5240      tree access_fun = SUB_ACCESS_FN (sub, 0);
5241
5242      if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5243	{
5244	  if (!evolution_function_is_univariate_p (access_fun, loop->num))
5245	    {
5246	      if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5247		{
5248		  DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5249		  return;
5250		}
5251
5252	      access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5253
5254	      if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5255		add_multivariate_self_dist (ddr, access_fun);
5256	      else
5257		/* The evolution step is not constant: it varies in
5258		   the outer loop, so this cannot be represented by a
5259		   distance vector.  For example in pr34635.c the
5260		   evolution is {0, +, {0, +, 4}_1}_2.  */
5261		DDR_AFFINE_P (ddr) = false;
5262
5263	      return;
5264	    }
5265
5266	  /* When data references are collected in a loop while data
5267	     dependences are analyzed in loop nest nested in the loop, we
5268	     would have more number of access functions than number of
5269	     loops.  Skip access functions of loops not in the loop nest.
5270
5271	     See PR89725 for more information.  */
5272	  if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5273				  loop))
5274	    continue;
5275
5276	  index_carry = MIN (index_carry,
5277			     index_in_loop_nest (CHREC_VARIABLE (access_fun),
5278						 DDR_LOOP_NEST (ddr)));
5279	}
5280    }
5281
5282  dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5283  add_outer_distances (ddr, dist_v, index_carry);
5284}
5285
5286static void
5287insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5288{
5289  lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5290
5291  dist_v[0] = 1;
5292  save_dist_v (ddr, dist_v);
5293}
5294
5295/* Adds a unit distance vector to DDR when there is a 0 overlap.  This
5296   is the case for example when access functions are the same and
5297   equal to a constant, as in:
5298
5299   | loop_1
5300   |   A[3] = ...
5301   |   ... = A[3]
5302   | endloop_1
5303
5304   in which case the distance vectors are (0) and (1).  */
5305
5306static void
5307add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5308{
5309  unsigned i, j;
5310
5311  for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5312    {
5313      subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5314      conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5315      conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5316
5317      for (j = 0; j < ca->n; j++)
5318	if (affine_function_zero_p (ca->fns[j]))
5319	  {
5320	    insert_innermost_unit_dist_vector (ddr);
5321	    return;
5322	  }
5323
5324      for (j = 0; j < cb->n; j++)
5325	if (affine_function_zero_p (cb->fns[j]))
5326	  {
5327	    insert_innermost_unit_dist_vector (ddr);
5328	    return;
5329	  }
5330    }
5331}
5332
5333/* Return true when the DDR contains two data references that have the
5334   same access functions.  */
5335
5336static inline bool
5337same_access_functions (const struct data_dependence_relation *ddr)
5338{
5339  for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5340    if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5341			  SUB_ACCESS_FN (sub, 1)))
5342      return false;
5343
5344  return true;
5345}
5346
5347/* Compute the classic per loop distance vector.  DDR is the data
5348   dependence relation to build a vector from.  Return false when fail
5349   to represent the data dependence as a distance vector.  */
5350
5351static bool
5352build_classic_dist_vector (struct data_dependence_relation *ddr,
5353			   class loop *loop_nest)
5354{
5355  bool init_b = false;
5356  int index_carry = DDR_NB_LOOPS (ddr);
5357  lambda_vector dist_v;
5358
5359  if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5360    return false;
5361
5362  if (same_access_functions (ddr))
5363    {
5364      /* Save the 0 vector.  */
5365      dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5366      save_dist_v (ddr, dist_v);
5367
5368      if (invariant_access_functions (ddr, loop_nest->num))
5369	add_distance_for_zero_overlaps (ddr);
5370
5371      if (DDR_NB_LOOPS (ddr) > 1)
5372	add_other_self_distances (ddr);
5373
5374      return true;
5375    }
5376
5377  dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5378  if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5379    return false;
5380
5381  /* Save the distance vector if we initialized one.  */
5382  if (init_b)
5383    {
5384      /* Verify a basic constraint: classic distance vectors should
5385	 always be lexicographically positive.
5386
5387	 Data references are collected in the order of execution of
5388	 the program, thus for the following loop
5389
5390	 | for (i = 1; i < 100; i++)
5391	 |   for (j = 1; j < 100; j++)
5392	 |     {
5393	 |       t = T[j+1][i-1];  // A
5394	 |       T[j][i] = t + 2;  // B
5395	 |     }
5396
5397	 references are collected following the direction of the wind:
5398	 A then B.  The data dependence tests are performed also
5399	 following this order, such that we're looking at the distance
5400	 separating the elements accessed by A from the elements later
5401	 accessed by B.  But in this example, the distance returned by
5402	 test_dep (A, B) is lexicographically negative (-1, 1), that
5403	 means that the access A occurs later than B with respect to
5404	 the outer loop, ie. we're actually looking upwind.  In this
5405	 case we solve test_dep (B, A) looking downwind to the
5406	 lexicographically positive solution, that returns the
5407	 distance vector (1, -1).  */
5408      if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5409	{
5410	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5411	  if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5412	    return false;
5413	  compute_subscript_distance (ddr);
5414	  if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5415					    &index_carry))
5416	    return false;
5417	  save_dist_v (ddr, save_v);
5418	  DDR_REVERSED_P (ddr) = true;
5419
5420	  /* In this case there is a dependence forward for all the
5421	     outer loops:
5422
5423	     | for (k = 1; k < 100; k++)
5424	     |  for (i = 1; i < 100; i++)
5425	     |   for (j = 1; j < 100; j++)
5426	     |     {
5427	     |       t = T[j+1][i-1];  // A
5428	     |       T[j][i] = t + 2;  // B
5429	     |     }
5430
5431	     the vectors are:
5432	     (0,  1, -1)
5433	     (1,  1, -1)
5434	     (1, -1,  1)
5435	  */
5436	  if (DDR_NB_LOOPS (ddr) > 1)
5437	    {
5438 	      add_outer_distances (ddr, save_v, index_carry);
5439	      add_outer_distances (ddr, dist_v, index_carry);
5440	    }
5441	}
5442      else
5443	{
5444	  lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5445	  lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5446
5447	  if (DDR_NB_LOOPS (ddr) > 1)
5448	    {
5449	      lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5450
5451	      if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5452		return false;
5453	      compute_subscript_distance (ddr);
5454	      if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5455						&index_carry))
5456		return false;
5457
5458	      save_dist_v (ddr, save_v);
5459	      add_outer_distances (ddr, dist_v, index_carry);
5460	      add_outer_distances (ddr, opposite_v, index_carry);
5461	    }
5462	  else
5463	    save_dist_v (ddr, save_v);
5464	}
5465    }
5466  else
5467    {
5468      /* There is a distance of 1 on all the outer loops: Example:
5469	 there is a dependence of distance 1 on loop_1 for the array A.
5470
5471	 | loop_1
5472	 |   A[5] = ...
5473	 | endloop
5474      */
5475      add_outer_distances (ddr, dist_v,
5476			   lambda_vector_first_nz (dist_v,
5477						   DDR_NB_LOOPS (ddr), 0));
5478    }
5479
5480  if (dump_file && (dump_flags & TDF_DETAILS))
5481    {
5482      unsigned i;
5483
5484      fprintf (dump_file, "(build_classic_dist_vector\n");
5485      for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5486	{
5487	  fprintf (dump_file, "  dist_vector = (");
5488	  print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5489			       DDR_NB_LOOPS (ddr));
5490	  fprintf (dump_file, "  )\n");
5491	}
5492      fprintf (dump_file, ")\n");
5493    }
5494
5495  return true;
5496}
5497
5498/* Return the direction for a given distance.
5499   FIXME: Computing dir this way is suboptimal, since dir can catch
5500   cases that dist is unable to represent.  */
5501
5502static inline enum data_dependence_direction
5503dir_from_dist (int dist)
5504{
5505  if (dist > 0)
5506    return dir_positive;
5507  else if (dist < 0)
5508    return dir_negative;
5509  else
5510    return dir_equal;
5511}
5512
5513/* Compute the classic per loop direction vector.  DDR is the data
5514   dependence relation to build a vector from.  */
5515
5516static void
5517build_classic_dir_vector (struct data_dependence_relation *ddr)
5518{
5519  unsigned i, j;
5520  lambda_vector dist_v;
5521
5522  FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5523    {
5524      lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5525
5526      for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5527	dir_v[j] = dir_from_dist (dist_v[j]);
5528
5529      save_dir_v (ddr, dir_v);
5530    }
5531}
5532
5533/* Helper function.  Returns true when there is a dependence between the
5534   data references.  A_INDEX is the index of the first reference (0 for
5535   DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5536
5537static bool
5538subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5539			       unsigned int a_index, unsigned int b_index,
5540			       class loop *loop_nest)
5541{
5542  unsigned int i;
5543  tree last_conflicts;
5544  struct subscript *subscript;
5545  tree res = NULL_TREE;
5546
5547  for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5548    {
5549      conflict_function *overlaps_a, *overlaps_b;
5550
5551      analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5552				      SUB_ACCESS_FN (subscript, b_index),
5553				      &overlaps_a, &overlaps_b,
5554				      &last_conflicts, loop_nest);
5555
5556      if (SUB_CONFLICTS_IN_A (subscript))
5557	free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5558      if (SUB_CONFLICTS_IN_B (subscript))
5559	free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5560
5561      SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5562      SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5563      SUB_LAST_CONFLICT (subscript) = last_conflicts;
5564
5565      /* If there is any undetermined conflict function we have to
5566         give a conservative answer in case we cannot prove that
5567	 no dependence exists when analyzing another subscript.  */
5568      if (CF_NOT_KNOWN_P (overlaps_a)
5569 	  || CF_NOT_KNOWN_P (overlaps_b))
5570 	{
5571	  res = chrec_dont_know;
5572	  continue;
5573 	}
5574
5575      /* When there is a subscript with no dependence we can stop.  */
5576      else if (CF_NO_DEPENDENCE_P (overlaps_a)
5577 	       || CF_NO_DEPENDENCE_P (overlaps_b))
5578 	{
5579	  res = chrec_known;
5580	  break;
5581 	}
5582    }
5583
5584  if (res == NULL_TREE)
5585    return true;
5586
5587  if (res == chrec_known)
5588    dependence_stats.num_dependence_independent++;
5589  else
5590    dependence_stats.num_dependence_undetermined++;
5591  finalize_ddr_dependent (ddr, res);
5592  return false;
5593}
5594
5595/* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5596
5597static void
5598subscript_dependence_tester (struct data_dependence_relation *ddr,
5599			     class loop *loop_nest)
5600{
5601  if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5602    dependence_stats.num_dependence_dependent++;
5603
5604  compute_subscript_distance (ddr);
5605  if (build_classic_dist_vector (ddr, loop_nest))
5606    build_classic_dir_vector (ddr);
5607}
5608
5609/* Returns true when all the access functions of A are affine or
5610   constant with respect to LOOP_NEST.  */
5611
5612static bool
5613access_functions_are_affine_or_constant_p (const struct data_reference *a,
5614					   const class loop *loop_nest)
5615{
5616  vec<tree> fns = DR_ACCESS_FNS (a);
5617  for (tree t : fns)
5618    if (!evolution_function_is_invariant_p (t, loop_nest->num)
5619	&& !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5620      return false;
5621
5622  return true;
5623}
5624
5625/* This computes the affine dependence relation between A and B with
5626   respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5627   independence between two accesses, while CHREC_DONT_KNOW is used
5628   for representing the unknown relation.
5629
5630   Note that it is possible to stop the computation of the dependence
5631   relation the first time we detect a CHREC_KNOWN element for a given
5632   subscript.  */
5633
5634void
5635compute_affine_dependence (struct data_dependence_relation *ddr,
5636			   class loop *loop_nest)
5637{
5638  struct data_reference *dra = DDR_A (ddr);
5639  struct data_reference *drb = DDR_B (ddr);
5640
5641  if (dump_file && (dump_flags & TDF_DETAILS))
5642    {
5643      fprintf (dump_file, "(compute_affine_dependence\n");
5644      fprintf (dump_file, "  ref_a: ");
5645      print_generic_expr (dump_file, DR_REF (dra));
5646      fprintf (dump_file, ", stmt_a: ");
5647      print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5648      fprintf (dump_file, "  ref_b: ");
5649      print_generic_expr (dump_file, DR_REF (drb));
5650      fprintf (dump_file, ", stmt_b: ");
5651      print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5652    }
5653
5654  /* Analyze only when the dependence relation is not yet known.  */
5655  if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5656    {
5657      dependence_stats.num_dependence_tests++;
5658
5659      if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5660	  && access_functions_are_affine_or_constant_p (drb, loop_nest))
5661	subscript_dependence_tester (ddr, loop_nest);
5662
5663      /* As a last case, if the dependence cannot be determined, or if
5664	 the dependence is considered too difficult to determine, answer
5665	 "don't know".  */
5666      else
5667	{
5668	  dependence_stats.num_dependence_undetermined++;
5669
5670	  if (dump_file && (dump_flags & TDF_DETAILS))
5671	    {
5672	      fprintf (dump_file, "Data ref a:\n");
5673	      dump_data_reference (dump_file, dra);
5674	      fprintf (dump_file, "Data ref b:\n");
5675	      dump_data_reference (dump_file, drb);
5676	      fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5677	    }
5678	  finalize_ddr_dependent (ddr, chrec_dont_know);
5679	}
5680    }
5681
5682  if (dump_file && (dump_flags & TDF_DETAILS))
5683    {
5684      if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5685	fprintf (dump_file, ") -> no dependence\n");
5686      else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5687	fprintf (dump_file, ") -> dependence analysis failed\n");
5688      else
5689	fprintf (dump_file, ")\n");
5690    }
5691}
5692
5693/* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5694   the data references in DATAREFS, in the LOOP_NEST.  When
5695   COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5696   relations.  Return true when successful, i.e. data references number
5697   is small enough to be handled.  */
5698
5699bool
5700compute_all_dependences (const vec<data_reference_p> &datarefs,
5701			 vec<ddr_p> *dependence_relations,
5702			 const vec<loop_p> &loop_nest,
5703			 bool compute_self_and_rr)
5704{
5705  struct data_dependence_relation *ddr;
5706  struct data_reference *a, *b;
5707  unsigned int i, j;
5708
5709  if ((int) datarefs.length ()
5710      > param_loop_max_datarefs_for_datadeps)
5711    {
5712      struct data_dependence_relation *ddr;
5713
5714      /* Insert a single relation into dependence_relations:
5715	 chrec_dont_know.  */
5716      ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5717      dependence_relations->safe_push (ddr);
5718      return false;
5719    }
5720
5721  FOR_EACH_VEC_ELT (datarefs, i, a)
5722    for (j = i + 1; datarefs.iterate (j, &b); j++)
5723      if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5724	{
5725	  ddr = initialize_data_dependence_relation (a, b, loop_nest);
5726	  dependence_relations->safe_push (ddr);
5727          if (loop_nest.exists ())
5728   	    compute_affine_dependence (ddr, loop_nest[0]);
5729	}
5730
5731  if (compute_self_and_rr)
5732    FOR_EACH_VEC_ELT (datarefs, i, a)
5733      {
5734	ddr = initialize_data_dependence_relation (a, a, loop_nest);
5735	dependence_relations->safe_push (ddr);
5736        if (loop_nest.exists ())
5737   	  compute_affine_dependence (ddr, loop_nest[0]);
5738      }
5739
5740  return true;
5741}
5742
5743/* Describes a location of a memory reference.  */
5744
5745struct data_ref_loc
5746{
5747  /* The memory reference.  */
5748  tree ref;
5749
5750  /* True if the memory reference is read.  */
5751  bool is_read;
5752
5753  /* True if the data reference is conditional within the containing
5754     statement, i.e. if it might not occur even when the statement
5755     is executed and runs to completion.  */
5756  bool is_conditional_in_stmt;
5757};
5758
5759
5760/* Stores the locations of memory references in STMT to REFERENCES.  Returns
5761   true if STMT clobbers memory, false otherwise.  */
5762
5763static bool
5764get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5765{
5766  bool clobbers_memory = false;
5767  data_ref_loc ref;
5768  tree op0, op1;
5769  enum gimple_code stmt_code = gimple_code (stmt);
5770
5771  /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5772     As we cannot model data-references to not spelled out
5773     accesses give up if they may occur.  */
5774  if (stmt_code == GIMPLE_CALL
5775      && !(gimple_call_flags (stmt) & ECF_CONST))
5776    {
5777      /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5778      if (gimple_call_internal_p (stmt))
5779	switch (gimple_call_internal_fn (stmt))
5780	  {
5781	  case IFN_GOMP_SIMD_LANE:
5782	    {
5783	      class loop *loop = gimple_bb (stmt)->loop_father;
5784	      tree uid = gimple_call_arg (stmt, 0);
5785	      gcc_assert (TREE_CODE (uid) == SSA_NAME);
5786	      if (loop == NULL
5787		  || loop->simduid != SSA_NAME_VAR (uid))
5788		clobbers_memory = true;
5789	      break;
5790	    }
5791	  case IFN_MASK_LOAD:
5792	  case IFN_MASK_STORE:
5793	    break;
5794	  default:
5795	    clobbers_memory = true;
5796	    break;
5797	  }
5798      else
5799	clobbers_memory = true;
5800    }
5801  else if (stmt_code == GIMPLE_ASM
5802	   && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5803	       || gimple_vuse (stmt)))
5804    clobbers_memory = true;
5805
5806  if (!gimple_vuse (stmt))
5807    return clobbers_memory;
5808
5809  if (stmt_code == GIMPLE_ASSIGN)
5810    {
5811      tree base;
5812      op0 = gimple_assign_lhs (stmt);
5813      op1 = gimple_assign_rhs1 (stmt);
5814
5815      if (DECL_P (op1)
5816	  || (REFERENCE_CLASS_P (op1)
5817	      && (base = get_base_address (op1))
5818	      && TREE_CODE (base) != SSA_NAME
5819	      && !is_gimple_min_invariant (base)))
5820	{
5821	  ref.ref = op1;
5822	  ref.is_read = true;
5823	  ref.is_conditional_in_stmt = false;
5824	  references->safe_push (ref);
5825	}
5826    }
5827  else if (stmt_code == GIMPLE_CALL)
5828    {
5829      unsigned i, n;
5830      tree ptr, type;
5831      unsigned int align;
5832
5833      ref.is_read = false;
5834      if (gimple_call_internal_p (stmt))
5835	switch (gimple_call_internal_fn (stmt))
5836	  {
5837	  case IFN_MASK_LOAD:
5838	    if (gimple_call_lhs (stmt) == NULL_TREE)
5839	      break;
5840	    ref.is_read = true;
5841	    /* FALLTHRU */
5842	  case IFN_MASK_STORE:
5843	    ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5844	    align = tree_to_shwi (gimple_call_arg (stmt, 1));
5845	    if (ref.is_read)
5846	      type = TREE_TYPE (gimple_call_lhs (stmt));
5847	    else
5848	      type = TREE_TYPE (gimple_call_arg (stmt, 3));
5849	    if (TYPE_ALIGN (type) != align)
5850	      type = build_aligned_type (type, align);
5851	    ref.is_conditional_in_stmt = true;
5852	    ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5853				   ptr);
5854	    references->safe_push (ref);
5855	    return false;
5856	  default:
5857	    break;
5858	  }
5859
5860      op0 = gimple_call_lhs (stmt);
5861      n = gimple_call_num_args (stmt);
5862      for (i = 0; i < n; i++)
5863	{
5864	  op1 = gimple_call_arg (stmt, i);
5865
5866	  if (DECL_P (op1)
5867	      || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5868	    {
5869	      ref.ref = op1;
5870	      ref.is_read = true;
5871	      ref.is_conditional_in_stmt = false;
5872	      references->safe_push (ref);
5873	    }
5874	}
5875    }
5876  else
5877    return clobbers_memory;
5878
5879  if (op0
5880      && (DECL_P (op0)
5881	  || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5882    {
5883      ref.ref = op0;
5884      ref.is_read = false;
5885      ref.is_conditional_in_stmt = false;
5886      references->safe_push (ref);
5887    }
5888  return clobbers_memory;
5889}
5890
5891
5892/* Returns true if the loop-nest has any data reference.  */
5893
5894bool
5895loop_nest_has_data_refs (loop_p loop)
5896{
5897  basic_block *bbs = get_loop_body (loop);
5898  auto_vec<data_ref_loc, 3> references;
5899
5900  for (unsigned i = 0; i < loop->num_nodes; i++)
5901    {
5902      basic_block bb = bbs[i];
5903      gimple_stmt_iterator bsi;
5904
5905      for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5906	{
5907	  gimple *stmt = gsi_stmt (bsi);
5908	  get_references_in_stmt (stmt, &references);
5909	  if (references.length ())
5910	    {
5911	      free (bbs);
5912	      return true;
5913	    }
5914	}
5915    }
5916  free (bbs);
5917  return false;
5918}
5919
5920/* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
5921   reference, returns false, otherwise returns true.  NEST is the outermost
5922   loop of the loop nest in which the references should be analyzed.  */
5923
5924opt_result
5925find_data_references_in_stmt (class loop *nest, gimple *stmt,
5926			      vec<data_reference_p> *datarefs)
5927{
5928  auto_vec<data_ref_loc, 2> references;
5929  data_reference_p dr;
5930
5931  if (get_references_in_stmt (stmt, &references))
5932    return opt_result::failure_at (stmt, "statement clobbers memory: %G",
5933				   stmt);
5934
5935  for (const data_ref_loc &ref : references)
5936    {
5937      dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
5938			    loop_containing_stmt (stmt), ref.ref,
5939			    stmt, ref.is_read, ref.is_conditional_in_stmt);
5940      gcc_assert (dr != NULL);
5941      datarefs->safe_push (dr);
5942    }
5943
5944  return opt_result::success ();
5945}
5946
5947/* Stores the data references in STMT to DATAREFS.  If there is an
5948   unanalyzable reference, returns false, otherwise returns true.
5949   NEST is the outermost loop of the loop nest in which the references
5950   should be instantiated, LOOP is the loop in which the references
5951   should be analyzed.  */
5952
5953bool
5954graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
5955				       vec<data_reference_p> *datarefs)
5956{
5957  auto_vec<data_ref_loc, 2> references;
5958  bool ret = true;
5959  data_reference_p dr;
5960
5961  if (get_references_in_stmt (stmt, &references))
5962    return false;
5963
5964  for (const data_ref_loc &ref : references)
5965    {
5966      dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
5967			    ref.is_conditional_in_stmt);
5968      gcc_assert (dr != NULL);
5969      datarefs->safe_push (dr);
5970    }
5971
5972  return ret;
5973}
5974
5975/* Search the data references in LOOP, and record the information into
5976   DATAREFS.  Returns chrec_dont_know when failing to analyze a
5977   difficult case, returns NULL_TREE otherwise.  */
5978
5979tree
5980find_data_references_in_bb (class loop *loop, basic_block bb,
5981                            vec<data_reference_p> *datarefs)
5982{
5983  gimple_stmt_iterator bsi;
5984
5985  for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5986    {
5987      gimple *stmt = gsi_stmt (bsi);
5988
5989      if (!find_data_references_in_stmt (loop, stmt, datarefs))
5990        {
5991          struct data_reference *res;
5992          res = XCNEW (struct data_reference);
5993          datarefs->safe_push (res);
5994
5995          return chrec_dont_know;
5996        }
5997    }
5998
5999  return NULL_TREE;
6000}
6001
6002/* Search the data references in LOOP, and record the information into
6003   DATAREFS.  Returns chrec_dont_know when failing to analyze a
6004   difficult case, returns NULL_TREE otherwise.
6005
6006   TODO: This function should be made smarter so that it can handle address
6007   arithmetic as if they were array accesses, etc.  */
6008
6009tree
6010find_data_references_in_loop (class loop *loop,
6011			      vec<data_reference_p> *datarefs)
6012{
6013  basic_block bb, *bbs;
6014  unsigned int i;
6015
6016  bbs = get_loop_body_in_dom_order (loop);
6017
6018  for (i = 0; i < loop->num_nodes; i++)
6019    {
6020      bb = bbs[i];
6021
6022      if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6023        {
6024          free (bbs);
6025          return chrec_dont_know;
6026        }
6027    }
6028  free (bbs);
6029
6030  return NULL_TREE;
6031}
6032
6033/* Return the alignment in bytes that DRB is guaranteed to have at all
6034   times.  */
6035
6036unsigned int
6037dr_alignment (innermost_loop_behavior *drb)
6038{
6039  /* Get the alignment of BASE_ADDRESS + INIT.  */
6040  unsigned int alignment = drb->base_alignment;
6041  unsigned int misalignment = (drb->base_misalignment
6042			       + TREE_INT_CST_LOW (drb->init));
6043  if (misalignment != 0)
6044    alignment = MIN (alignment, misalignment & -misalignment);
6045
6046  /* Cap it to the alignment of OFFSET.  */
6047  if (!integer_zerop (drb->offset))
6048    alignment = MIN (alignment, drb->offset_alignment);
6049
6050  /* Cap it to the alignment of STEP.  */
6051  if (!integer_zerop (drb->step))
6052    alignment = MIN (alignment, drb->step_alignment);
6053
6054  return alignment;
6055}
6056
6057/* If BASE is a pointer-typed SSA name, try to find the object that it
6058   is based on.  Return this object X on success and store the alignment
6059   in bytes of BASE - &X in *ALIGNMENT_OUT.  */
6060
6061static tree
6062get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6063{
6064  if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6065    return NULL_TREE;
6066
6067  gimple *def = SSA_NAME_DEF_STMT (base);
6068  base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6069
6070  /* Peel chrecs and record the minimum alignment preserved by
6071     all steps.  */
6072  unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6073  while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6074    {
6075      unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6076      alignment = MIN (alignment, step_alignment);
6077      base = CHREC_LEFT (base);
6078    }
6079
6080  /* Punt if the expression is too complicated to handle.  */
6081  if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6082    return NULL_TREE;
6083
6084  /* The only useful cases are those for which a dereference folds to something
6085     other than an INDIRECT_REF.  */
6086  tree ref_type = TREE_TYPE (TREE_TYPE (base));
6087  tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6088  if (!ref)
6089    return NULL_TREE;
6090
6091  /* Analyze the base to which the steps we peeled were applied.  */
6092  poly_int64 bitsize, bitpos, bytepos;
6093  machine_mode mode;
6094  int unsignedp, reversep, volatilep;
6095  tree offset;
6096  base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6097			      &unsignedp, &reversep, &volatilep);
6098  if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6099    return NULL_TREE;
6100
6101  /* Restrict the alignment to that guaranteed by the offsets.  */
6102  unsigned int bytepos_alignment = known_alignment (bytepos);
6103  if (bytepos_alignment != 0)
6104    alignment = MIN (alignment, bytepos_alignment);
6105  if (offset)
6106    {
6107      unsigned int offset_alignment = highest_pow2_factor (offset);
6108      alignment = MIN (alignment, offset_alignment);
6109    }
6110
6111  *alignment_out = alignment;
6112  return base;
6113}
6114
6115/* Return the object whose alignment would need to be changed in order
6116   to increase the alignment of ADDR.  Store the maximum achievable
6117   alignment in *MAX_ALIGNMENT.  */
6118
6119tree
6120get_base_for_alignment (tree addr, unsigned int *max_alignment)
6121{
6122  tree base = get_base_for_alignment_1 (addr, max_alignment);
6123  if (base)
6124    return base;
6125
6126  if (TREE_CODE (addr) == ADDR_EXPR)
6127    addr = TREE_OPERAND (addr, 0);
6128  *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6129  return addr;
6130}
6131
6132/* Recursive helper function.  */
6133
6134static bool
6135find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6136{
6137  /* Inner loops of the nest should not contain siblings.  Example:
6138     when there are two consecutive loops,
6139
6140     | loop_0
6141     |   loop_1
6142     |     A[{0, +, 1}_1]
6143     |   endloop_1
6144     |   loop_2
6145     |     A[{0, +, 1}_2]
6146     |   endloop_2
6147     | endloop_0
6148
6149     the dependence relation cannot be captured by the distance
6150     abstraction.  */
6151  if (loop->next)
6152    return false;
6153
6154  loop_nest->safe_push (loop);
6155  if (loop->inner)
6156    return find_loop_nest_1 (loop->inner, loop_nest);
6157  return true;
6158}
6159
6160/* Return false when the LOOP is not well nested.  Otherwise return
6161   true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
6162   contain the loops from the outermost to the innermost, as they will
6163   appear in the classic distance vector.  */
6164
6165bool
6166find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6167{
6168  loop_nest->safe_push (loop);
6169  if (loop->inner)
6170    return find_loop_nest_1 (loop->inner, loop_nest);
6171  return true;
6172}
6173
6174/* Returns true when the data dependences have been computed, false otherwise.
6175   Given a loop nest LOOP, the following vectors are returned:
6176   DATAREFS is initialized to all the array elements contained in this loop,
6177   DEPENDENCE_RELATIONS contains the relations between the data references.
6178   Compute read-read and self relations if
6179   COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
6180
6181bool
6182compute_data_dependences_for_loop (class loop *loop,
6183				   bool compute_self_and_read_read_dependences,
6184				   vec<loop_p> *loop_nest,
6185				   vec<data_reference_p> *datarefs,
6186				   vec<ddr_p> *dependence_relations)
6187{
6188  bool res = true;
6189
6190  memset (&dependence_stats, 0, sizeof (dependence_stats));
6191
6192  /* If the loop nest is not well formed, or one of the data references
6193     is not computable, give up without spending time to compute other
6194     dependences.  */
6195  if (!loop
6196      || !find_loop_nest (loop, loop_nest)
6197      || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6198      || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6199				   compute_self_and_read_read_dependences))
6200    res = false;
6201
6202  if (dump_file && (dump_flags & TDF_STATS))
6203    {
6204      fprintf (dump_file, "Dependence tester statistics:\n");
6205
6206      fprintf (dump_file, "Number of dependence tests: %d\n",
6207	       dependence_stats.num_dependence_tests);
6208      fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6209	       dependence_stats.num_dependence_dependent);
6210      fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6211	       dependence_stats.num_dependence_independent);
6212      fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6213	       dependence_stats.num_dependence_undetermined);
6214
6215      fprintf (dump_file, "Number of subscript tests: %d\n",
6216	       dependence_stats.num_subscript_tests);
6217      fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6218	       dependence_stats.num_subscript_undetermined);
6219      fprintf (dump_file, "Number of same subscript function: %d\n",
6220	       dependence_stats.num_same_subscript_function);
6221
6222      fprintf (dump_file, "Number of ziv tests: %d\n",
6223	       dependence_stats.num_ziv);
6224      fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6225	       dependence_stats.num_ziv_dependent);
6226      fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6227	       dependence_stats.num_ziv_independent);
6228      fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6229	       dependence_stats.num_ziv_unimplemented);
6230
6231      fprintf (dump_file, "Number of siv tests: %d\n",
6232	       dependence_stats.num_siv);
6233      fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6234	       dependence_stats.num_siv_dependent);
6235      fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6236	       dependence_stats.num_siv_independent);
6237      fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6238	       dependence_stats.num_siv_unimplemented);
6239
6240      fprintf (dump_file, "Number of miv tests: %d\n",
6241	       dependence_stats.num_miv);
6242      fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6243	       dependence_stats.num_miv_dependent);
6244      fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6245	       dependence_stats.num_miv_independent);
6246      fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6247	       dependence_stats.num_miv_unimplemented);
6248    }
6249
6250  return res;
6251}
6252
6253/* Free the memory used by a data dependence relation DDR.  */
6254
6255void
6256free_dependence_relation (struct data_dependence_relation *ddr)
6257{
6258  if (ddr == NULL)
6259    return;
6260
6261  if (DDR_SUBSCRIPTS (ddr).exists ())
6262    free_subscripts (DDR_SUBSCRIPTS (ddr));
6263  DDR_DIST_VECTS (ddr).release ();
6264  DDR_DIR_VECTS (ddr).release ();
6265
6266  free (ddr);
6267}
6268
6269/* Free the memory used by the data dependence relations from
6270   DEPENDENCE_RELATIONS.  */
6271
6272void
6273free_dependence_relations (vec<ddr_p>& dependence_relations)
6274{
6275  for (data_dependence_relation *ddr : dependence_relations)
6276    if (ddr)
6277      free_dependence_relation (ddr);
6278
6279  dependence_relations.release ();
6280}
6281
6282/* Free the memory used by the data references from DATAREFS.  */
6283
6284void
6285free_data_refs (vec<data_reference_p>& datarefs)
6286{
6287  for (data_reference *dr : datarefs)
6288    free_data_ref (dr);
6289  datarefs.release ();
6290}
6291
6292/* Common routine implementing both dr_direction_indicator and
6293   dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
6294   to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6295   Return the step as the indicator otherwise.  */
6296
6297static tree
6298dr_step_indicator (struct data_reference *dr, int useful_min)
6299{
6300  tree step = DR_STEP (dr);
6301  if (!step)
6302    return NULL_TREE;
6303  STRIP_NOPS (step);
6304  /* Look for cases where the step is scaled by a positive constant
6305     integer, which will often be the access size.  If the multiplication
6306     doesn't change the sign (due to overflow effects) then we can
6307     test the unscaled value instead.  */
6308  if (TREE_CODE (step) == MULT_EXPR
6309      && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6310      && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6311    {
6312      tree factor = TREE_OPERAND (step, 1);
6313      step = TREE_OPERAND (step, 0);
6314
6315      /* Strip widening and truncating conversions as well as nops.  */
6316      if (CONVERT_EXPR_P (step)
6317	  && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6318	step = TREE_OPERAND (step, 0);
6319      tree type = TREE_TYPE (step);
6320
6321      /* Get the range of step values that would not cause overflow.  */
6322      widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6323			 / wi::to_widest (factor));
6324      widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6325			 / wi::to_widest (factor));
6326
6327      /* Get the range of values that the unconverted step actually has.  */
6328      wide_int step_min, step_max;
6329      value_range vr;
6330      if (TREE_CODE (step) != SSA_NAME
6331	  || !get_range_query (cfun)->range_of_expr (vr, step)
6332	  || vr.kind () != VR_RANGE)
6333	{
6334	  step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6335	  step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6336	}
6337      else
6338	{
6339	  step_min = vr.lower_bound ();
6340	  step_max = vr.upper_bound ();
6341	}
6342
6343      /* Check whether the unconverted step has an acceptable range.  */
6344      signop sgn = TYPE_SIGN (type);
6345      if (wi::les_p (minv, widest_int::from (step_min, sgn))
6346	  && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6347	{
6348	  if (wi::ge_p (step_min, useful_min, sgn))
6349	    return ssize_int (useful_min);
6350	  else if (wi::lt_p (step_max, 0, sgn))
6351	    return ssize_int (-1);
6352	  else
6353	    return fold_convert (ssizetype, step);
6354	}
6355    }
6356  return DR_STEP (dr);
6357}
6358
6359/* Return a value that is negative iff DR has a negative step.  */
6360
6361tree
6362dr_direction_indicator (struct data_reference *dr)
6363{
6364  return dr_step_indicator (dr, 0);
6365}
6366
6367/* Return a value that is zero iff DR has a zero step.  */
6368
6369tree
6370dr_zero_step_indicator (struct data_reference *dr)
6371{
6372  return dr_step_indicator (dr, 1);
6373}
6374
6375/* Return true if DR is known to have a nonnegative (but possibly zero)
6376   step.  */
6377
6378bool
6379dr_known_forward_stride_p (struct data_reference *dr)
6380{
6381  tree indicator = dr_direction_indicator (dr);
6382  tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6383				   fold_convert (ssizetype, indicator),
6384				   ssize_int (0));
6385  return neg_step_val && integer_zerop (neg_step_val);
6386}
6387