1/* Data references and dependences detectors. 2 Copyright (C) 2003-2015 Free Software Foundation, Inc. 3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING3. If not see 19<http://www.gnu.org/licenses/>. */ 20 21/* This pass walks a given loop structure searching for array 22 references. The information about the array accesses is recorded 23 in DATA_REFERENCE structures. 24 25 The basic test for determining the dependences is: 26 given two access functions chrec1 and chrec2 to a same array, and 27 x and y two vectors from the iteration domain, the same element of 28 the array is accessed twice at iterations x and y if and only if: 29 | chrec1 (x) == chrec2 (y). 30 31 The goals of this analysis are: 32 33 - to determine the independence: the relation between two 34 independent accesses is qualified with the chrec_known (this 35 information allows a loop parallelization), 36 37 - when two data references access the same data, to qualify the 38 dependence relation with classic dependence representations: 39 40 - distance vectors 41 - direction vectors 42 - loop carried level dependence 43 - polyhedron dependence 44 or with the chains of recurrences based representation, 45 46 - to define a knowledge base for storing the data dependence 47 information, 48 49 - to define an interface to access this data. 50 51 52 Definitions: 53 54 - subscript: given two array accesses a subscript is the tuple 55 composed of the access functions for a given dimension. Example: 56 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts: 57 (f1, g1), (f2, g2), (f3, g3). 58 59 - Diophantine equation: an equation whose coefficients and 60 solutions are integer constants, for example the equation 61 | 3*x + 2*y = 1 62 has an integer solution x = 1 and y = -1. 63 64 References: 65 66 - "Advanced Compilation for High Performance Computing" by Randy 67 Allen and Ken Kennedy. 68 http://citeseer.ist.psu.edu/goff91practical.html 69 70 - "Loop Transformations for Restructuring Compilers - The Foundations" 71 by Utpal Banerjee. 72 73 74*/ 75 76#include "config.h" 77#include "system.h" 78#include "coretypes.h" 79#include "hash-set.h" 80#include "machmode.h" 81#include "vec.h" 82#include "double-int.h" 83#include "input.h" 84#include "alias.h" 85#include "symtab.h" 86#include "options.h" 87#include "wide-int.h" 88#include "inchash.h" 89#include "tree.h" 90#include "fold-const.h" 91#include "hashtab.h" 92#include "tm.h" 93#include "hard-reg-set.h" 94#include "function.h" 95#include "rtl.h" 96#include "flags.h" 97#include "statistics.h" 98#include "real.h" 99#include "fixed-value.h" 100#include "insn-config.h" 101#include "expmed.h" 102#include "dojump.h" 103#include "explow.h" 104#include "calls.h" 105#include "emit-rtl.h" 106#include "varasm.h" 107#include "stmt.h" 108#include "expr.h" 109#include "gimple-pretty-print.h" 110#include "predict.h" 111#include "dominance.h" 112#include "cfg.h" 113#include "basic-block.h" 114#include "tree-ssa-alias.h" 115#include "internal-fn.h" 116#include "gimple-expr.h" 117#include "is-a.h" 118#include "gimple.h" 119#include "gimple-iterator.h" 120#include "tree-ssa-loop-niter.h" 121#include "tree-ssa-loop.h" 122#include "tree-ssa.h" 123#include "cfgloop.h" 124#include "tree-data-ref.h" 125#include "tree-scalar-evolution.h" 126#include "dumpfile.h" 127#include "langhooks.h" 128#include "tree-affine.h" 129#include "params.h" 130 131static struct datadep_stats 132{ 133 int num_dependence_tests; 134 int num_dependence_dependent; 135 int num_dependence_independent; 136 int num_dependence_undetermined; 137 138 int num_subscript_tests; 139 int num_subscript_undetermined; 140 int num_same_subscript_function; 141 142 int num_ziv; 143 int num_ziv_independent; 144 int num_ziv_dependent; 145 int num_ziv_unimplemented; 146 147 int num_siv; 148 int num_siv_independent; 149 int num_siv_dependent; 150 int num_siv_unimplemented; 151 152 int num_miv; 153 int num_miv_independent; 154 int num_miv_dependent; 155 int num_miv_unimplemented; 156} dependence_stats; 157 158static bool subscript_dependence_tester_1 (struct data_dependence_relation *, 159 struct data_reference *, 160 struct data_reference *, 161 struct loop *); 162/* Returns true iff A divides B. */ 163 164static inline bool 165tree_fold_divides_p (const_tree a, const_tree b) 166{ 167 gcc_assert (TREE_CODE (a) == INTEGER_CST); 168 gcc_assert (TREE_CODE (b) == INTEGER_CST); 169 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a)); 170} 171 172/* Returns true iff A divides B. */ 173 174static inline bool 175int_divides_p (int a, int b) 176{ 177 return ((b % a) == 0); 178} 179 180 181 182/* Dump into FILE all the data references from DATAREFS. */ 183 184static void 185dump_data_references (FILE *file, vec<data_reference_p> datarefs) 186{ 187 unsigned int i; 188 struct data_reference *dr; 189 190 FOR_EACH_VEC_ELT (datarefs, i, dr) 191 dump_data_reference (file, dr); 192} 193 194/* Unified dump into FILE all the data references from DATAREFS. */ 195 196DEBUG_FUNCTION void 197debug (vec<data_reference_p> &ref) 198{ 199 dump_data_references (stderr, ref); 200} 201 202DEBUG_FUNCTION void 203debug (vec<data_reference_p> *ptr) 204{ 205 if (ptr) 206 debug (*ptr); 207 else 208 fprintf (stderr, "<nil>\n"); 209} 210 211 212/* Dump into STDERR all the data references from DATAREFS. */ 213 214DEBUG_FUNCTION void 215debug_data_references (vec<data_reference_p> datarefs) 216{ 217 dump_data_references (stderr, datarefs); 218} 219 220/* Print to STDERR the data_reference DR. */ 221 222DEBUG_FUNCTION void 223debug_data_reference (struct data_reference *dr) 224{ 225 dump_data_reference (stderr, dr); 226} 227 228/* Dump function for a DATA_REFERENCE structure. */ 229 230void 231dump_data_reference (FILE *outf, 232 struct data_reference *dr) 233{ 234 unsigned int i; 235 236 fprintf (outf, "#(Data Ref: \n"); 237 fprintf (outf, "# bb: %d \n", gimple_bb (DR_STMT (dr))->index); 238 fprintf (outf, "# stmt: "); 239 print_gimple_stmt (outf, DR_STMT (dr), 0, 0); 240 fprintf (outf, "# ref: "); 241 print_generic_stmt (outf, DR_REF (dr), 0); 242 fprintf (outf, "# base_object: "); 243 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0); 244 245 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++) 246 { 247 fprintf (outf, "# Access function %d: ", i); 248 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0); 249 } 250 fprintf (outf, "#)\n"); 251} 252 253/* Unified dump function for a DATA_REFERENCE structure. */ 254 255DEBUG_FUNCTION void 256debug (data_reference &ref) 257{ 258 dump_data_reference (stderr, &ref); 259} 260 261DEBUG_FUNCTION void 262debug (data_reference *ptr) 263{ 264 if (ptr) 265 debug (*ptr); 266 else 267 fprintf (stderr, "<nil>\n"); 268} 269 270 271/* Dumps the affine function described by FN to the file OUTF. */ 272 273static void 274dump_affine_function (FILE *outf, affine_fn fn) 275{ 276 unsigned i; 277 tree coef; 278 279 print_generic_expr (outf, fn[0], TDF_SLIM); 280 for (i = 1; fn.iterate (i, &coef); i++) 281 { 282 fprintf (outf, " + "); 283 print_generic_expr (outf, coef, TDF_SLIM); 284 fprintf (outf, " * x_%u", i); 285 } 286} 287 288/* Dumps the conflict function CF to the file OUTF. */ 289 290static void 291dump_conflict_function (FILE *outf, conflict_function *cf) 292{ 293 unsigned i; 294 295 if (cf->n == NO_DEPENDENCE) 296 fprintf (outf, "no dependence"); 297 else if (cf->n == NOT_KNOWN) 298 fprintf (outf, "not known"); 299 else 300 { 301 for (i = 0; i < cf->n; i++) 302 { 303 if (i != 0) 304 fprintf (outf, " "); 305 fprintf (outf, "["); 306 dump_affine_function (outf, cf->fns[i]); 307 fprintf (outf, "]"); 308 } 309 } 310} 311 312/* Dump function for a SUBSCRIPT structure. */ 313 314static void 315dump_subscript (FILE *outf, struct subscript *subscript) 316{ 317 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript); 318 319 fprintf (outf, "\n (subscript \n"); 320 fprintf (outf, " iterations_that_access_an_element_twice_in_A: "); 321 dump_conflict_function (outf, cf); 322 if (CF_NONTRIVIAL_P (cf)) 323 { 324 tree last_iteration = SUB_LAST_CONFLICT (subscript); 325 fprintf (outf, "\n last_conflict: "); 326 print_generic_expr (outf, last_iteration, 0); 327 } 328 329 cf = SUB_CONFLICTS_IN_B (subscript); 330 fprintf (outf, "\n iterations_that_access_an_element_twice_in_B: "); 331 dump_conflict_function (outf, cf); 332 if (CF_NONTRIVIAL_P (cf)) 333 { 334 tree last_iteration = SUB_LAST_CONFLICT (subscript); 335 fprintf (outf, "\n last_conflict: "); 336 print_generic_expr (outf, last_iteration, 0); 337 } 338 339 fprintf (outf, "\n (Subscript distance: "); 340 print_generic_expr (outf, SUB_DISTANCE (subscript), 0); 341 fprintf (outf, " ))\n"); 342} 343 344/* Print the classic direction vector DIRV to OUTF. */ 345 346static void 347print_direction_vector (FILE *outf, 348 lambda_vector dirv, 349 int length) 350{ 351 int eq; 352 353 for (eq = 0; eq < length; eq++) 354 { 355 enum data_dependence_direction dir = ((enum data_dependence_direction) 356 dirv[eq]); 357 358 switch (dir) 359 { 360 case dir_positive: 361 fprintf (outf, " +"); 362 break; 363 case dir_negative: 364 fprintf (outf, " -"); 365 break; 366 case dir_equal: 367 fprintf (outf, " ="); 368 break; 369 case dir_positive_or_equal: 370 fprintf (outf, " +="); 371 break; 372 case dir_positive_or_negative: 373 fprintf (outf, " +-"); 374 break; 375 case dir_negative_or_equal: 376 fprintf (outf, " -="); 377 break; 378 case dir_star: 379 fprintf (outf, " *"); 380 break; 381 default: 382 fprintf (outf, "indep"); 383 break; 384 } 385 } 386 fprintf (outf, "\n"); 387} 388 389/* Print a vector of direction vectors. */ 390 391static void 392print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects, 393 int length) 394{ 395 unsigned j; 396 lambda_vector v; 397 398 FOR_EACH_VEC_ELT (dir_vects, j, v) 399 print_direction_vector (outf, v, length); 400} 401 402/* Print out a vector VEC of length N to OUTFILE. */ 403 404static inline void 405print_lambda_vector (FILE * outfile, lambda_vector vector, int n) 406{ 407 int i; 408 409 for (i = 0; i < n; i++) 410 fprintf (outfile, "%3d ", vector[i]); 411 fprintf (outfile, "\n"); 412} 413 414/* Print a vector of distance vectors. */ 415 416static void 417print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects, 418 int length) 419{ 420 unsigned j; 421 lambda_vector v; 422 423 FOR_EACH_VEC_ELT (dist_vects, j, v) 424 print_lambda_vector (outf, v, length); 425} 426 427/* Dump function for a DATA_DEPENDENCE_RELATION structure. */ 428 429static void 430dump_data_dependence_relation (FILE *outf, 431 struct data_dependence_relation *ddr) 432{ 433 struct data_reference *dra, *drb; 434 435 fprintf (outf, "(Data Dep: \n"); 436 437 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 438 { 439 if (ddr) 440 { 441 dra = DDR_A (ddr); 442 drb = DDR_B (ddr); 443 if (dra) 444 dump_data_reference (outf, dra); 445 else 446 fprintf (outf, " (nil)\n"); 447 if (drb) 448 dump_data_reference (outf, drb); 449 else 450 fprintf (outf, " (nil)\n"); 451 } 452 fprintf (outf, " (don't know)\n)\n"); 453 return; 454 } 455 456 dra = DDR_A (ddr); 457 drb = DDR_B (ddr); 458 dump_data_reference (outf, dra); 459 dump_data_reference (outf, drb); 460 461 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 462 fprintf (outf, " (no dependence)\n"); 463 464 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 465 { 466 unsigned int i; 467 struct loop *loopi; 468 469 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 470 { 471 fprintf (outf, " access_fn_A: "); 472 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0); 473 fprintf (outf, " access_fn_B: "); 474 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0); 475 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i)); 476 } 477 478 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr)); 479 fprintf (outf, " loop nest: ("); 480 FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi) 481 fprintf (outf, "%d ", loopi->num); 482 fprintf (outf, ")\n"); 483 484 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) 485 { 486 fprintf (outf, " distance_vector: "); 487 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i), 488 DDR_NB_LOOPS (ddr)); 489 } 490 491 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++) 492 { 493 fprintf (outf, " direction_vector: "); 494 print_direction_vector (outf, DDR_DIR_VECT (ddr, i), 495 DDR_NB_LOOPS (ddr)); 496 } 497 } 498 499 fprintf (outf, ")\n"); 500} 501 502/* Debug version. */ 503 504DEBUG_FUNCTION void 505debug_data_dependence_relation (struct data_dependence_relation *ddr) 506{ 507 dump_data_dependence_relation (stderr, ddr); 508} 509 510/* Dump into FILE all the dependence relations from DDRS. */ 511 512void 513dump_data_dependence_relations (FILE *file, 514 vec<ddr_p> ddrs) 515{ 516 unsigned int i; 517 struct data_dependence_relation *ddr; 518 519 FOR_EACH_VEC_ELT (ddrs, i, ddr) 520 dump_data_dependence_relation (file, ddr); 521} 522 523DEBUG_FUNCTION void 524debug (vec<ddr_p> &ref) 525{ 526 dump_data_dependence_relations (stderr, ref); 527} 528 529DEBUG_FUNCTION void 530debug (vec<ddr_p> *ptr) 531{ 532 if (ptr) 533 debug (*ptr); 534 else 535 fprintf (stderr, "<nil>\n"); 536} 537 538 539/* Dump to STDERR all the dependence relations from DDRS. */ 540 541DEBUG_FUNCTION void 542debug_data_dependence_relations (vec<ddr_p> ddrs) 543{ 544 dump_data_dependence_relations (stderr, ddrs); 545} 546 547/* Dumps the distance and direction vectors in FILE. DDRS contains 548 the dependence relations, and VECT_SIZE is the size of the 549 dependence vectors, or in other words the number of loops in the 550 considered nest. */ 551 552static void 553dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs) 554{ 555 unsigned int i, j; 556 struct data_dependence_relation *ddr; 557 lambda_vector v; 558 559 FOR_EACH_VEC_ELT (ddrs, i, ddr) 560 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr)) 561 { 562 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), j, v) 563 { 564 fprintf (file, "DISTANCE_V ("); 565 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr)); 566 fprintf (file, ")\n"); 567 } 568 569 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), j, v) 570 { 571 fprintf (file, "DIRECTION_V ("); 572 print_direction_vector (file, v, DDR_NB_LOOPS (ddr)); 573 fprintf (file, ")\n"); 574 } 575 } 576 577 fprintf (file, "\n\n"); 578} 579 580/* Dumps the data dependence relations DDRS in FILE. */ 581 582static void 583dump_ddrs (FILE *file, vec<ddr_p> ddrs) 584{ 585 unsigned int i; 586 struct data_dependence_relation *ddr; 587 588 FOR_EACH_VEC_ELT (ddrs, i, ddr) 589 dump_data_dependence_relation (file, ddr); 590 591 fprintf (file, "\n\n"); 592} 593 594DEBUG_FUNCTION void 595debug_ddrs (vec<ddr_p> ddrs) 596{ 597 dump_ddrs (stderr, ddrs); 598} 599 600/* Helper function for split_constant_offset. Expresses OP0 CODE OP1 601 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero 602 constant of type ssizetype, and returns true. If we cannot do this 603 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false 604 is returned. */ 605 606static bool 607split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1, 608 tree *var, tree *off) 609{ 610 tree var0, var1; 611 tree off0, off1; 612 enum tree_code ocode = code; 613 614 *var = NULL_TREE; 615 *off = NULL_TREE; 616 617 switch (code) 618 { 619 case INTEGER_CST: 620 *var = build_int_cst (type, 0); 621 *off = fold_convert (ssizetype, op0); 622 return true; 623 624 case POINTER_PLUS_EXPR: 625 ocode = PLUS_EXPR; 626 /* FALLTHROUGH */ 627 case PLUS_EXPR: 628 case MINUS_EXPR: 629 split_constant_offset (op0, &var0, &off0); 630 split_constant_offset (op1, &var1, &off1); 631 *var = fold_build2 (code, type, var0, var1); 632 *off = size_binop (ocode, off0, off1); 633 return true; 634 635 case MULT_EXPR: 636 if (TREE_CODE (op1) != INTEGER_CST) 637 return false; 638 639 split_constant_offset (op0, &var0, &off0); 640 *var = fold_build2 (MULT_EXPR, type, var0, op1); 641 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1)); 642 return true; 643 644 case ADDR_EXPR: 645 { 646 tree base, poffset; 647 HOST_WIDE_INT pbitsize, pbitpos; 648 machine_mode pmode; 649 int punsignedp, pvolatilep; 650 651 op0 = TREE_OPERAND (op0, 0); 652 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, 653 &pmode, &punsignedp, &pvolatilep, false); 654 655 if (pbitpos % BITS_PER_UNIT != 0) 656 return false; 657 base = build_fold_addr_expr (base); 658 off0 = ssize_int (pbitpos / BITS_PER_UNIT); 659 660 if (poffset) 661 { 662 split_constant_offset (poffset, &poffset, &off1); 663 off0 = size_binop (PLUS_EXPR, off0, off1); 664 if (POINTER_TYPE_P (TREE_TYPE (base))) 665 base = fold_build_pointer_plus (base, poffset); 666 else 667 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, 668 fold_convert (TREE_TYPE (base), poffset)); 669 } 670 671 var0 = fold_convert (type, base); 672 673 /* If variable length types are involved, punt, otherwise casts 674 might be converted into ARRAY_REFs in gimplify_conversion. 675 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which 676 possibly no longer appears in current GIMPLE, might resurface. 677 This perhaps could run 678 if (CONVERT_EXPR_P (var0)) 679 { 680 gimplify_conversion (&var0); 681 // Attempt to fill in any within var0 found ARRAY_REF's 682 // element size from corresponding op embedded ARRAY_REF, 683 // if unsuccessful, just punt. 684 } */ 685 while (POINTER_TYPE_P (type)) 686 type = TREE_TYPE (type); 687 if (int_size_in_bytes (type) < 0) 688 return false; 689 690 *var = var0; 691 *off = off0; 692 return true; 693 } 694 695 case SSA_NAME: 696 { 697 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0)) 698 return false; 699 700 gimple def_stmt = SSA_NAME_DEF_STMT (op0); 701 enum tree_code subcode; 702 703 if (gimple_code (def_stmt) != GIMPLE_ASSIGN) 704 return false; 705 706 var0 = gimple_assign_rhs1 (def_stmt); 707 subcode = gimple_assign_rhs_code (def_stmt); 708 var1 = gimple_assign_rhs2 (def_stmt); 709 710 return split_constant_offset_1 (type, var0, subcode, var1, var, off); 711 } 712 CASE_CONVERT: 713 { 714 /* We must not introduce undefined overflow, and we must not change the value. 715 Hence we're okay if the inner type doesn't overflow to start with 716 (pointer or signed), the outer type also is an integer or pointer 717 and the outer precision is at least as large as the inner. */ 718 tree itype = TREE_TYPE (op0); 719 if ((POINTER_TYPE_P (itype) 720 || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype))) 721 && TYPE_PRECISION (type) >= TYPE_PRECISION (itype) 722 && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type))) 723 { 724 split_constant_offset (op0, &var0, off); 725 *var = fold_convert (type, var0); 726 return true; 727 } 728 return false; 729 } 730 731 default: 732 return false; 733 } 734} 735 736/* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF 737 will be ssizetype. */ 738 739void 740split_constant_offset (tree exp, tree *var, tree *off) 741{ 742 tree type = TREE_TYPE (exp), otype, op0, op1, e, o; 743 enum tree_code code; 744 745 *var = exp; 746 *off = ssize_int (0); 747 STRIP_NOPS (exp); 748 749 if (tree_is_chrec (exp) 750 || get_gimple_rhs_class (TREE_CODE (exp)) == GIMPLE_TERNARY_RHS) 751 return; 752 753 otype = TREE_TYPE (exp); 754 code = TREE_CODE (exp); 755 extract_ops_from_tree (exp, &code, &op0, &op1); 756 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o)) 757 { 758 *var = fold_convert (type, e); 759 *off = o; 760 } 761} 762 763/* Returns the address ADDR of an object in a canonical shape (without nop 764 casts, and with type of pointer to the object). */ 765 766static tree 767canonicalize_base_object_address (tree addr) 768{ 769 tree orig = addr; 770 771 STRIP_NOPS (addr); 772 773 /* The base address may be obtained by casting from integer, in that case 774 keep the cast. */ 775 if (!POINTER_TYPE_P (TREE_TYPE (addr))) 776 return orig; 777 778 if (TREE_CODE (addr) != ADDR_EXPR) 779 return addr; 780 781 return build_fold_addr_expr (TREE_OPERAND (addr, 0)); 782} 783 784/* Analyzes the behavior of the memory reference DR in the innermost loop or 785 basic block that contains it. Returns true if analysis succeed or false 786 otherwise. */ 787 788bool 789dr_analyze_innermost (struct data_reference *dr, struct loop *nest) 790{ 791 gimple stmt = DR_STMT (dr); 792 struct loop *loop = loop_containing_stmt (stmt); 793 tree ref = DR_REF (dr); 794 HOST_WIDE_INT pbitsize, pbitpos; 795 tree base, poffset; 796 machine_mode pmode; 797 int punsignedp, pvolatilep; 798 affine_iv base_iv, offset_iv; 799 tree init, dinit, step; 800 bool in_loop = (loop && loop->num); 801 802 if (dump_file && (dump_flags & TDF_DETAILS)) 803 fprintf (dump_file, "analyze_innermost: "); 804 805 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, 806 &pmode, &punsignedp, &pvolatilep, false); 807 gcc_assert (base != NULL_TREE); 808 809 if (pbitpos % BITS_PER_UNIT != 0) 810 { 811 if (dump_file && (dump_flags & TDF_DETAILS)) 812 fprintf (dump_file, "failed: bit offset alignment.\n"); 813 return false; 814 } 815 816 if (TREE_CODE (base) == MEM_REF) 817 { 818 if (!integer_zerop (TREE_OPERAND (base, 1))) 819 { 820 offset_int moff = mem_ref_offset (base); 821 tree mofft = wide_int_to_tree (sizetype, moff); 822 if (!poffset) 823 poffset = mofft; 824 else 825 poffset = size_binop (PLUS_EXPR, poffset, mofft); 826 } 827 base = TREE_OPERAND (base, 0); 828 } 829 else 830 base = build_fold_addr_expr (base); 831 832 if (in_loop) 833 { 834 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv, 835 nest ? true : false)) 836 { 837 if (nest) 838 { 839 if (dump_file && (dump_flags & TDF_DETAILS)) 840 fprintf (dump_file, "failed: evolution of base is not" 841 " affine.\n"); 842 return false; 843 } 844 else 845 { 846 base_iv.base = base; 847 base_iv.step = ssize_int (0); 848 base_iv.no_overflow = true; 849 } 850 } 851 } 852 else 853 { 854 base_iv.base = base; 855 base_iv.step = ssize_int (0); 856 base_iv.no_overflow = true; 857 } 858 859 if (!poffset) 860 { 861 offset_iv.base = ssize_int (0); 862 offset_iv.step = ssize_int (0); 863 } 864 else 865 { 866 if (!in_loop) 867 { 868 offset_iv.base = poffset; 869 offset_iv.step = ssize_int (0); 870 } 871 else if (!simple_iv (loop, loop_containing_stmt (stmt), 872 poffset, &offset_iv, 873 nest ? true : false)) 874 { 875 if (nest) 876 { 877 if (dump_file && (dump_flags & TDF_DETAILS)) 878 fprintf (dump_file, "failed: evolution of offset is not" 879 " affine.\n"); 880 return false; 881 } 882 else 883 { 884 offset_iv.base = poffset; 885 offset_iv.step = ssize_int (0); 886 } 887 } 888 } 889 890 init = ssize_int (pbitpos / BITS_PER_UNIT); 891 split_constant_offset (base_iv.base, &base_iv.base, &dinit); 892 init = size_binop (PLUS_EXPR, init, dinit); 893 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); 894 init = size_binop (PLUS_EXPR, init, dinit); 895 896 step = size_binop (PLUS_EXPR, 897 fold_convert (ssizetype, base_iv.step), 898 fold_convert (ssizetype, offset_iv.step)); 899 900 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base); 901 902 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base); 903 DR_INIT (dr) = init; 904 DR_STEP (dr) = step; 905 906 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base)); 907 908 if (dump_file && (dump_flags & TDF_DETAILS)) 909 fprintf (dump_file, "success.\n"); 910 911 return true; 912} 913 914/* Determines the base object and the list of indices of memory reference 915 DR, analyzed in LOOP and instantiated in loop nest NEST. */ 916 917static void 918dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop) 919{ 920 vec<tree> access_fns = vNULL; 921 tree ref, op; 922 tree base, off, access_fn; 923 basic_block before_loop; 924 925 /* If analyzing a basic-block there are no indices to analyze 926 and thus no access functions. */ 927 if (!nest) 928 { 929 DR_BASE_OBJECT (dr) = DR_REF (dr); 930 DR_ACCESS_FNS (dr).create (0); 931 return; 932 } 933 934 ref = DR_REF (dr); 935 before_loop = block_before_loop (nest); 936 937 /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses 938 into a two element array with a constant index. The base is 939 then just the immediate underlying object. */ 940 if (TREE_CODE (ref) == REALPART_EXPR) 941 { 942 ref = TREE_OPERAND (ref, 0); 943 access_fns.safe_push (integer_zero_node); 944 } 945 else if (TREE_CODE (ref) == IMAGPART_EXPR) 946 { 947 ref = TREE_OPERAND (ref, 0); 948 access_fns.safe_push (integer_one_node); 949 } 950 951 /* Analyze access functions of dimensions we know to be independent. */ 952 while (handled_component_p (ref)) 953 { 954 if (TREE_CODE (ref) == ARRAY_REF) 955 { 956 op = TREE_OPERAND (ref, 1); 957 access_fn = analyze_scalar_evolution (loop, op); 958 access_fn = instantiate_scev (before_loop, loop, access_fn); 959 access_fns.safe_push (access_fn); 960 } 961 else if (TREE_CODE (ref) == COMPONENT_REF 962 && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE) 963 { 964 /* For COMPONENT_REFs of records (but not unions!) use the 965 FIELD_DECL offset as constant access function so we can 966 disambiguate a[i].f1 and a[i].f2. */ 967 tree off = component_ref_field_offset (ref); 968 off = size_binop (PLUS_EXPR, 969 size_binop (MULT_EXPR, 970 fold_convert (bitsizetype, off), 971 bitsize_int (BITS_PER_UNIT)), 972 DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1))); 973 access_fns.safe_push (off); 974 } 975 else 976 /* If we have an unhandled component we could not translate 977 to an access function stop analyzing. We have determined 978 our base object in this case. */ 979 break; 980 981 ref = TREE_OPERAND (ref, 0); 982 } 983 984 /* If the address operand of a MEM_REF base has an evolution in the 985 analyzed nest, add it as an additional independent access-function. */ 986 if (TREE_CODE (ref) == MEM_REF) 987 { 988 op = TREE_OPERAND (ref, 0); 989 access_fn = analyze_scalar_evolution (loop, op); 990 access_fn = instantiate_scev (before_loop, loop, access_fn); 991 if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC) 992 { 993 tree orig_type; 994 tree memoff = TREE_OPERAND (ref, 1); 995 base = initial_condition (access_fn); 996 orig_type = TREE_TYPE (base); 997 STRIP_USELESS_TYPE_CONVERSION (base); 998 split_constant_offset (base, &base, &off); 999 STRIP_USELESS_TYPE_CONVERSION (base); 1000 /* Fold the MEM_REF offset into the evolutions initial 1001 value to make more bases comparable. */ 1002 if (!integer_zerop (memoff)) 1003 { 1004 off = size_binop (PLUS_EXPR, off, 1005 fold_convert (ssizetype, memoff)); 1006 memoff = build_int_cst (TREE_TYPE (memoff), 0); 1007 } 1008 /* Adjust the offset so it is a multiple of the access type 1009 size and thus we separate bases that can possibly be used 1010 to produce partial overlaps (which the access_fn machinery 1011 cannot handle). */ 1012 wide_int rem; 1013 if (TYPE_SIZE_UNIT (TREE_TYPE (ref)) 1014 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST 1015 && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref)))) 1016 rem = wi::mod_trunc (off, TYPE_SIZE_UNIT (TREE_TYPE (ref)), SIGNED); 1017 else 1018 /* If we can't compute the remainder simply force the initial 1019 condition to zero. */ 1020 rem = off; 1021 off = wide_int_to_tree (ssizetype, wi::sub (off, rem)); 1022 memoff = wide_int_to_tree (TREE_TYPE (memoff), rem); 1023 /* And finally replace the initial condition. */ 1024 access_fn = chrec_replace_initial_condition 1025 (access_fn, fold_convert (orig_type, off)); 1026 /* ??? This is still not a suitable base object for 1027 dr_may_alias_p - the base object needs to be an 1028 access that covers the object as whole. With 1029 an evolution in the pointer this cannot be 1030 guaranteed. 1031 As a band-aid, mark the access so we can special-case 1032 it in dr_may_alias_p. */ 1033 tree old = ref; 1034 ref = fold_build2_loc (EXPR_LOCATION (ref), 1035 MEM_REF, TREE_TYPE (ref), 1036 base, memoff); 1037 MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old); 1038 MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old); 1039 DR_UNCONSTRAINED_BASE (dr) = true; 1040 access_fns.safe_push (access_fn); 1041 } 1042 } 1043 else if (DECL_P (ref)) 1044 { 1045 /* Canonicalize DR_BASE_OBJECT to MEM_REF form. */ 1046 ref = build2 (MEM_REF, TREE_TYPE (ref), 1047 build_fold_addr_expr (ref), 1048 build_int_cst (reference_alias_ptr_type (ref), 0)); 1049 } 1050 1051 DR_BASE_OBJECT (dr) = ref; 1052 DR_ACCESS_FNS (dr) = access_fns; 1053} 1054 1055/* Extracts the alias analysis information from the memory reference DR. */ 1056 1057static void 1058dr_analyze_alias (struct data_reference *dr) 1059{ 1060 tree ref = DR_REF (dr); 1061 tree base = get_base_address (ref), addr; 1062 1063 if (INDIRECT_REF_P (base) 1064 || TREE_CODE (base) == MEM_REF) 1065 { 1066 addr = TREE_OPERAND (base, 0); 1067 if (TREE_CODE (addr) == SSA_NAME) 1068 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr); 1069 } 1070} 1071 1072/* Frees data reference DR. */ 1073 1074void 1075free_data_ref (data_reference_p dr) 1076{ 1077 DR_ACCESS_FNS (dr).release (); 1078 free (dr); 1079} 1080 1081/* Analyzes memory reference MEMREF accessed in STMT. The reference 1082 is read if IS_READ is true, write otherwise. Returns the 1083 data_reference description of MEMREF. NEST is the outermost loop 1084 in which the reference should be instantiated, LOOP is the loop in 1085 which the data reference should be analyzed. */ 1086 1087struct data_reference * 1088create_data_ref (loop_p nest, loop_p loop, tree memref, gimple stmt, 1089 bool is_read) 1090{ 1091 struct data_reference *dr; 1092 1093 if (dump_file && (dump_flags & TDF_DETAILS)) 1094 { 1095 fprintf (dump_file, "Creating dr for "); 1096 print_generic_expr (dump_file, memref, TDF_SLIM); 1097 fprintf (dump_file, "\n"); 1098 } 1099 1100 dr = XCNEW (struct data_reference); 1101 DR_STMT (dr) = stmt; 1102 DR_REF (dr) = memref; 1103 DR_IS_READ (dr) = is_read; 1104 1105 dr_analyze_innermost (dr, nest); 1106 dr_analyze_indices (dr, nest, loop); 1107 dr_analyze_alias (dr); 1108 1109 if (dump_file && (dump_flags & TDF_DETAILS)) 1110 { 1111 unsigned i; 1112 fprintf (dump_file, "\tbase_address: "); 1113 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM); 1114 fprintf (dump_file, "\n\toffset from base address: "); 1115 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM); 1116 fprintf (dump_file, "\n\tconstant offset from base address: "); 1117 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM); 1118 fprintf (dump_file, "\n\tstep: "); 1119 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM); 1120 fprintf (dump_file, "\n\taligned to: "); 1121 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM); 1122 fprintf (dump_file, "\n\tbase_object: "); 1123 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM); 1124 fprintf (dump_file, "\n"); 1125 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++) 1126 { 1127 fprintf (dump_file, "\tAccess function %d: ", i); 1128 print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM); 1129 } 1130 } 1131 1132 return dr; 1133} 1134 1135/* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical 1136 expressions. */ 1137static bool 1138dr_equal_offsets_p1 (tree offset1, tree offset2) 1139{ 1140 bool res; 1141 1142 STRIP_NOPS (offset1); 1143 STRIP_NOPS (offset2); 1144 1145 if (offset1 == offset2) 1146 return true; 1147 1148 if (TREE_CODE (offset1) != TREE_CODE (offset2) 1149 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1))) 1150 return false; 1151 1152 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0), 1153 TREE_OPERAND (offset2, 0)); 1154 1155 if (!res || !BINARY_CLASS_P (offset1)) 1156 return res; 1157 1158 res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1), 1159 TREE_OPERAND (offset2, 1)); 1160 1161 return res; 1162} 1163 1164/* Check if DRA and DRB have equal offsets. */ 1165bool 1166dr_equal_offsets_p (struct data_reference *dra, 1167 struct data_reference *drb) 1168{ 1169 tree offset1, offset2; 1170 1171 offset1 = DR_OFFSET (dra); 1172 offset2 = DR_OFFSET (drb); 1173 1174 return dr_equal_offsets_p1 (offset1, offset2); 1175} 1176 1177/* Returns true if FNA == FNB. */ 1178 1179static bool 1180affine_function_equal_p (affine_fn fna, affine_fn fnb) 1181{ 1182 unsigned i, n = fna.length (); 1183 1184 if (n != fnb.length ()) 1185 return false; 1186 1187 for (i = 0; i < n; i++) 1188 if (!operand_equal_p (fna[i], fnb[i], 0)) 1189 return false; 1190 1191 return true; 1192} 1193 1194/* If all the functions in CF are the same, returns one of them, 1195 otherwise returns NULL. */ 1196 1197static affine_fn 1198common_affine_function (conflict_function *cf) 1199{ 1200 unsigned i; 1201 affine_fn comm; 1202 1203 if (!CF_NONTRIVIAL_P (cf)) 1204 return affine_fn (); 1205 1206 comm = cf->fns[0]; 1207 1208 for (i = 1; i < cf->n; i++) 1209 if (!affine_function_equal_p (comm, cf->fns[i])) 1210 return affine_fn (); 1211 1212 return comm; 1213} 1214 1215/* Returns the base of the affine function FN. */ 1216 1217static tree 1218affine_function_base (affine_fn fn) 1219{ 1220 return fn[0]; 1221} 1222 1223/* Returns true if FN is a constant. */ 1224 1225static bool 1226affine_function_constant_p (affine_fn fn) 1227{ 1228 unsigned i; 1229 tree coef; 1230 1231 for (i = 1; fn.iterate (i, &coef); i++) 1232 if (!integer_zerop (coef)) 1233 return false; 1234 1235 return true; 1236} 1237 1238/* Returns true if FN is the zero constant function. */ 1239 1240static bool 1241affine_function_zero_p (affine_fn fn) 1242{ 1243 return (integer_zerop (affine_function_base (fn)) 1244 && affine_function_constant_p (fn)); 1245} 1246 1247/* Returns a signed integer type with the largest precision from TA 1248 and TB. */ 1249 1250static tree 1251signed_type_for_types (tree ta, tree tb) 1252{ 1253 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb)) 1254 return signed_type_for (ta); 1255 else 1256 return signed_type_for (tb); 1257} 1258 1259/* Applies operation OP on affine functions FNA and FNB, and returns the 1260 result. */ 1261 1262static affine_fn 1263affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb) 1264{ 1265 unsigned i, n, m; 1266 affine_fn ret; 1267 tree coef; 1268 1269 if (fnb.length () > fna.length ()) 1270 { 1271 n = fna.length (); 1272 m = fnb.length (); 1273 } 1274 else 1275 { 1276 n = fnb.length (); 1277 m = fna.length (); 1278 } 1279 1280 ret.create (m); 1281 for (i = 0; i < n; i++) 1282 { 1283 tree type = signed_type_for_types (TREE_TYPE (fna[i]), 1284 TREE_TYPE (fnb[i])); 1285 ret.quick_push (fold_build2 (op, type, fna[i], fnb[i])); 1286 } 1287 1288 for (; fna.iterate (i, &coef); i++) 1289 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)), 1290 coef, integer_zero_node)); 1291 for (; fnb.iterate (i, &coef); i++) 1292 ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)), 1293 integer_zero_node, coef)); 1294 1295 return ret; 1296} 1297 1298/* Returns the sum of affine functions FNA and FNB. */ 1299 1300static affine_fn 1301affine_fn_plus (affine_fn fna, affine_fn fnb) 1302{ 1303 return affine_fn_op (PLUS_EXPR, fna, fnb); 1304} 1305 1306/* Returns the difference of affine functions FNA and FNB. */ 1307 1308static affine_fn 1309affine_fn_minus (affine_fn fna, affine_fn fnb) 1310{ 1311 return affine_fn_op (MINUS_EXPR, fna, fnb); 1312} 1313 1314/* Frees affine function FN. */ 1315 1316static void 1317affine_fn_free (affine_fn fn) 1318{ 1319 fn.release (); 1320} 1321 1322/* Determine for each subscript in the data dependence relation DDR 1323 the distance. */ 1324 1325static void 1326compute_subscript_distance (struct data_dependence_relation *ddr) 1327{ 1328 conflict_function *cf_a, *cf_b; 1329 affine_fn fn_a, fn_b, diff; 1330 1331 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 1332 { 1333 unsigned int i; 1334 1335 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 1336 { 1337 struct subscript *subscript; 1338 1339 subscript = DDR_SUBSCRIPT (ddr, i); 1340 cf_a = SUB_CONFLICTS_IN_A (subscript); 1341 cf_b = SUB_CONFLICTS_IN_B (subscript); 1342 1343 fn_a = common_affine_function (cf_a); 1344 fn_b = common_affine_function (cf_b); 1345 if (!fn_a.exists () || !fn_b.exists ()) 1346 { 1347 SUB_DISTANCE (subscript) = chrec_dont_know; 1348 return; 1349 } 1350 diff = affine_fn_minus (fn_a, fn_b); 1351 1352 if (affine_function_constant_p (diff)) 1353 SUB_DISTANCE (subscript) = affine_function_base (diff); 1354 else 1355 SUB_DISTANCE (subscript) = chrec_dont_know; 1356 1357 affine_fn_free (diff); 1358 } 1359 } 1360} 1361 1362/* Returns the conflict function for "unknown". */ 1363 1364static conflict_function * 1365conflict_fn_not_known (void) 1366{ 1367 conflict_function *fn = XCNEW (conflict_function); 1368 fn->n = NOT_KNOWN; 1369 1370 return fn; 1371} 1372 1373/* Returns the conflict function for "independent". */ 1374 1375static conflict_function * 1376conflict_fn_no_dependence (void) 1377{ 1378 conflict_function *fn = XCNEW (conflict_function); 1379 fn->n = NO_DEPENDENCE; 1380 1381 return fn; 1382} 1383 1384/* Returns true if the address of OBJ is invariant in LOOP. */ 1385 1386static bool 1387object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj) 1388{ 1389 while (handled_component_p (obj)) 1390 { 1391 if (TREE_CODE (obj) == ARRAY_REF) 1392 { 1393 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only 1394 need to check the stride and the lower bound of the reference. */ 1395 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2), 1396 loop->num) 1397 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3), 1398 loop->num)) 1399 return false; 1400 } 1401 else if (TREE_CODE (obj) == COMPONENT_REF) 1402 { 1403 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2), 1404 loop->num)) 1405 return false; 1406 } 1407 obj = TREE_OPERAND (obj, 0); 1408 } 1409 1410 if (!INDIRECT_REF_P (obj) 1411 && TREE_CODE (obj) != MEM_REF) 1412 return true; 1413 1414 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0), 1415 loop->num); 1416} 1417 1418/* Returns false if we can prove that data references A and B do not alias, 1419 true otherwise. If LOOP_NEST is false no cross-iteration aliases are 1420 considered. */ 1421 1422bool 1423dr_may_alias_p (const struct data_reference *a, const struct data_reference *b, 1424 bool loop_nest) 1425{ 1426 tree addr_a = DR_BASE_OBJECT (a); 1427 tree addr_b = DR_BASE_OBJECT (b); 1428 1429 /* If we are not processing a loop nest but scalar code we 1430 do not need to care about possible cross-iteration dependences 1431 and thus can process the full original reference. Do so, 1432 similar to how loop invariant motion applies extra offset-based 1433 disambiguation. */ 1434 if (!loop_nest) 1435 { 1436 aff_tree off1, off2; 1437 widest_int size1, size2; 1438 get_inner_reference_aff (DR_REF (a), &off1, &size1); 1439 get_inner_reference_aff (DR_REF (b), &off2, &size2); 1440 aff_combination_scale (&off1, -1); 1441 aff_combination_add (&off2, &off1); 1442 if (aff_comb_cannot_overlap_p (&off2, size1, size2)) 1443 return false; 1444 } 1445 1446 if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF) 1447 && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF) 1448 && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b) 1449 && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b)) 1450 return false; 1451 1452 /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we 1453 do not know the size of the base-object. So we cannot do any 1454 offset/overlap based analysis but have to rely on points-to 1455 information only. */ 1456 if (TREE_CODE (addr_a) == MEM_REF 1457 && (DR_UNCONSTRAINED_BASE (a) 1458 || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME)) 1459 { 1460 /* For true dependences we can apply TBAA. */ 1461 if (flag_strict_aliasing 1462 && DR_IS_WRITE (a) && DR_IS_READ (b) 1463 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)), 1464 get_alias_set (DR_REF (b)))) 1465 return false; 1466 if (TREE_CODE (addr_b) == MEM_REF) 1467 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), 1468 TREE_OPERAND (addr_b, 0)); 1469 else 1470 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), 1471 build_fold_addr_expr (addr_b)); 1472 } 1473 else if (TREE_CODE (addr_b) == MEM_REF 1474 && (DR_UNCONSTRAINED_BASE (b) 1475 || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME)) 1476 { 1477 /* For true dependences we can apply TBAA. */ 1478 if (flag_strict_aliasing 1479 && DR_IS_WRITE (a) && DR_IS_READ (b) 1480 && !alias_sets_conflict_p (get_alias_set (DR_REF (a)), 1481 get_alias_set (DR_REF (b)))) 1482 return false; 1483 if (TREE_CODE (addr_a) == MEM_REF) 1484 return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0), 1485 TREE_OPERAND (addr_b, 0)); 1486 else 1487 return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a), 1488 TREE_OPERAND (addr_b, 0)); 1489 } 1490 1491 /* Otherwise DR_BASE_OBJECT is an access that covers the whole object 1492 that is being subsetted in the loop nest. */ 1493 if (DR_IS_WRITE (a) && DR_IS_WRITE (b)) 1494 return refs_output_dependent_p (addr_a, addr_b); 1495 else if (DR_IS_READ (a) && DR_IS_WRITE (b)) 1496 return refs_anti_dependent_p (addr_a, addr_b); 1497 return refs_may_alias_p (addr_a, addr_b); 1498} 1499 1500/* Initialize a data dependence relation between data accesses A and 1501 B. NB_LOOPS is the number of loops surrounding the references: the 1502 size of the classic distance/direction vectors. */ 1503 1504struct data_dependence_relation * 1505initialize_data_dependence_relation (struct data_reference *a, 1506 struct data_reference *b, 1507 vec<loop_p> loop_nest) 1508{ 1509 struct data_dependence_relation *res; 1510 unsigned int i; 1511 1512 res = XNEW (struct data_dependence_relation); 1513 DDR_A (res) = a; 1514 DDR_B (res) = b; 1515 DDR_LOOP_NEST (res).create (0); 1516 DDR_REVERSED_P (res) = false; 1517 DDR_SUBSCRIPTS (res).create (0); 1518 DDR_DIR_VECTS (res).create (0); 1519 DDR_DIST_VECTS (res).create (0); 1520 1521 if (a == NULL || b == NULL) 1522 { 1523 DDR_ARE_DEPENDENT (res) = chrec_dont_know; 1524 return res; 1525 } 1526 1527 /* If the data references do not alias, then they are independent. */ 1528 if (!dr_may_alias_p (a, b, loop_nest.exists ())) 1529 { 1530 DDR_ARE_DEPENDENT (res) = chrec_known; 1531 return res; 1532 } 1533 1534 /* The case where the references are exactly the same. */ 1535 if (operand_equal_p (DR_REF (a), DR_REF (b), 0)) 1536 { 1537 if ((loop_nest.exists () 1538 && !object_address_invariant_in_loop_p (loop_nest[0], 1539 DR_BASE_OBJECT (a))) 1540 || DR_NUM_DIMENSIONS (a) == 0) 1541 { 1542 DDR_ARE_DEPENDENT (res) = chrec_dont_know; 1543 return res; 1544 } 1545 DDR_AFFINE_P (res) = true; 1546 DDR_ARE_DEPENDENT (res) = NULL_TREE; 1547 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a)); 1548 DDR_LOOP_NEST (res) = loop_nest; 1549 DDR_INNER_LOOP (res) = 0; 1550 DDR_SELF_REFERENCE (res) = true; 1551 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++) 1552 { 1553 struct subscript *subscript; 1554 1555 subscript = XNEW (struct subscript); 1556 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known (); 1557 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known (); 1558 SUB_LAST_CONFLICT (subscript) = chrec_dont_know; 1559 SUB_DISTANCE (subscript) = chrec_dont_know; 1560 DDR_SUBSCRIPTS (res).safe_push (subscript); 1561 } 1562 return res; 1563 } 1564 1565 /* If the references do not access the same object, we do not know 1566 whether they alias or not. */ 1567 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0)) 1568 { 1569 DDR_ARE_DEPENDENT (res) = chrec_dont_know; 1570 return res; 1571 } 1572 1573 /* If the base of the object is not invariant in the loop nest, we cannot 1574 analyze it. TODO -- in fact, it would suffice to record that there may 1575 be arbitrary dependences in the loops where the base object varies. */ 1576 if ((loop_nest.exists () 1577 && !object_address_invariant_in_loop_p (loop_nest[0], DR_BASE_OBJECT (a))) 1578 || DR_NUM_DIMENSIONS (a) == 0) 1579 { 1580 DDR_ARE_DEPENDENT (res) = chrec_dont_know; 1581 return res; 1582 } 1583 1584 /* If the number of dimensions of the access to not agree we can have 1585 a pointer access to a component of the array element type and an 1586 array access while the base-objects are still the same. Punt. */ 1587 if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b)) 1588 { 1589 DDR_ARE_DEPENDENT (res) = chrec_dont_know; 1590 return res; 1591 } 1592 1593 DDR_AFFINE_P (res) = true; 1594 DDR_ARE_DEPENDENT (res) = NULL_TREE; 1595 DDR_SUBSCRIPTS (res).create (DR_NUM_DIMENSIONS (a)); 1596 DDR_LOOP_NEST (res) = loop_nest; 1597 DDR_INNER_LOOP (res) = 0; 1598 DDR_SELF_REFERENCE (res) = false; 1599 1600 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++) 1601 { 1602 struct subscript *subscript; 1603 1604 subscript = XNEW (struct subscript); 1605 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known (); 1606 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known (); 1607 SUB_LAST_CONFLICT (subscript) = chrec_dont_know; 1608 SUB_DISTANCE (subscript) = chrec_dont_know; 1609 DDR_SUBSCRIPTS (res).safe_push (subscript); 1610 } 1611 1612 return res; 1613} 1614 1615/* Frees memory used by the conflict function F. */ 1616 1617static void 1618free_conflict_function (conflict_function *f) 1619{ 1620 unsigned i; 1621 1622 if (CF_NONTRIVIAL_P (f)) 1623 { 1624 for (i = 0; i < f->n; i++) 1625 affine_fn_free (f->fns[i]); 1626 } 1627 free (f); 1628} 1629 1630/* Frees memory used by SUBSCRIPTS. */ 1631 1632static void 1633free_subscripts (vec<subscript_p> subscripts) 1634{ 1635 unsigned i; 1636 subscript_p s; 1637 1638 FOR_EACH_VEC_ELT (subscripts, i, s) 1639 { 1640 free_conflict_function (s->conflicting_iterations_in_a); 1641 free_conflict_function (s->conflicting_iterations_in_b); 1642 free (s); 1643 } 1644 subscripts.release (); 1645} 1646 1647/* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap 1648 description. */ 1649 1650static inline void 1651finalize_ddr_dependent (struct data_dependence_relation *ddr, 1652 tree chrec) 1653{ 1654 DDR_ARE_DEPENDENT (ddr) = chrec; 1655 free_subscripts (DDR_SUBSCRIPTS (ddr)); 1656 DDR_SUBSCRIPTS (ddr).create (0); 1657} 1658 1659/* The dependence relation DDR cannot be represented by a distance 1660 vector. */ 1661 1662static inline void 1663non_affine_dependence_relation (struct data_dependence_relation *ddr) 1664{ 1665 if (dump_file && (dump_flags & TDF_DETAILS)) 1666 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n"); 1667 1668 DDR_AFFINE_P (ddr) = false; 1669} 1670 1671 1672 1673/* This section contains the classic Banerjee tests. */ 1674 1675/* Returns true iff CHREC_A and CHREC_B are not dependent on any index 1676 variables, i.e., if the ZIV (Zero Index Variable) test is true. */ 1677 1678static inline bool 1679ziv_subscript_p (const_tree chrec_a, const_tree chrec_b) 1680{ 1681 return (evolution_function_is_constant_p (chrec_a) 1682 && evolution_function_is_constant_p (chrec_b)); 1683} 1684 1685/* Returns true iff CHREC_A and CHREC_B are dependent on an index 1686 variable, i.e., if the SIV (Single Index Variable) test is true. */ 1687 1688static bool 1689siv_subscript_p (const_tree chrec_a, const_tree chrec_b) 1690{ 1691 if ((evolution_function_is_constant_p (chrec_a) 1692 && evolution_function_is_univariate_p (chrec_b)) 1693 || (evolution_function_is_constant_p (chrec_b) 1694 && evolution_function_is_univariate_p (chrec_a))) 1695 return true; 1696 1697 if (evolution_function_is_univariate_p (chrec_a) 1698 && evolution_function_is_univariate_p (chrec_b)) 1699 { 1700 switch (TREE_CODE (chrec_a)) 1701 { 1702 case POLYNOMIAL_CHREC: 1703 switch (TREE_CODE (chrec_b)) 1704 { 1705 case POLYNOMIAL_CHREC: 1706 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b)) 1707 return false; 1708 1709 default: 1710 return true; 1711 } 1712 1713 default: 1714 return true; 1715 } 1716 } 1717 1718 return false; 1719} 1720 1721/* Creates a conflict function with N dimensions. The affine functions 1722 in each dimension follow. */ 1723 1724static conflict_function * 1725conflict_fn (unsigned n, ...) 1726{ 1727 unsigned i; 1728 conflict_function *ret = XCNEW (conflict_function); 1729 va_list ap; 1730 1731 gcc_assert (0 < n && n <= MAX_DIM); 1732 va_start (ap, n); 1733 1734 ret->n = n; 1735 for (i = 0; i < n; i++) 1736 ret->fns[i] = va_arg (ap, affine_fn); 1737 va_end (ap); 1738 1739 return ret; 1740} 1741 1742/* Returns constant affine function with value CST. */ 1743 1744static affine_fn 1745affine_fn_cst (tree cst) 1746{ 1747 affine_fn fn; 1748 fn.create (1); 1749 fn.quick_push (cst); 1750 return fn; 1751} 1752 1753/* Returns affine function with single variable, CST + COEF * x_DIM. */ 1754 1755static affine_fn 1756affine_fn_univar (tree cst, unsigned dim, tree coef) 1757{ 1758 affine_fn fn; 1759 fn.create (dim + 1); 1760 unsigned i; 1761 1762 gcc_assert (dim > 0); 1763 fn.quick_push (cst); 1764 for (i = 1; i < dim; i++) 1765 fn.quick_push (integer_zero_node); 1766 fn.quick_push (coef); 1767 return fn; 1768} 1769 1770/* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and 1771 *OVERLAPS_B are initialized to the functions that describe the 1772 relation between the elements accessed twice by CHREC_A and 1773 CHREC_B. For k >= 0, the following property is verified: 1774 1775 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 1776 1777static void 1778analyze_ziv_subscript (tree chrec_a, 1779 tree chrec_b, 1780 conflict_function **overlaps_a, 1781 conflict_function **overlaps_b, 1782 tree *last_conflicts) 1783{ 1784 tree type, difference; 1785 dependence_stats.num_ziv++; 1786 1787 if (dump_file && (dump_flags & TDF_DETAILS)) 1788 fprintf (dump_file, "(analyze_ziv_subscript \n"); 1789 1790 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); 1791 chrec_a = chrec_convert (type, chrec_a, NULL); 1792 chrec_b = chrec_convert (type, chrec_b, NULL); 1793 difference = chrec_fold_minus (type, chrec_a, chrec_b); 1794 1795 switch (TREE_CODE (difference)) 1796 { 1797 case INTEGER_CST: 1798 if (integer_zerop (difference)) 1799 { 1800 /* The difference is equal to zero: the accessed index 1801 overlaps for each iteration in the loop. */ 1802 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1803 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1804 *last_conflicts = chrec_dont_know; 1805 dependence_stats.num_ziv_dependent++; 1806 } 1807 else 1808 { 1809 /* The accesses do not overlap. */ 1810 *overlaps_a = conflict_fn_no_dependence (); 1811 *overlaps_b = conflict_fn_no_dependence (); 1812 *last_conflicts = integer_zero_node; 1813 dependence_stats.num_ziv_independent++; 1814 } 1815 break; 1816 1817 default: 1818 /* We're not sure whether the indexes overlap. For the moment, 1819 conservatively answer "don't know". */ 1820 if (dump_file && (dump_flags & TDF_DETAILS)) 1821 fprintf (dump_file, "ziv test failed: difference is non-integer.\n"); 1822 1823 *overlaps_a = conflict_fn_not_known (); 1824 *overlaps_b = conflict_fn_not_known (); 1825 *last_conflicts = chrec_dont_know; 1826 dependence_stats.num_ziv_unimplemented++; 1827 break; 1828 } 1829 1830 if (dump_file && (dump_flags & TDF_DETAILS)) 1831 fprintf (dump_file, ")\n"); 1832} 1833 1834/* Similar to max_stmt_executions_int, but returns the bound as a tree, 1835 and only if it fits to the int type. If this is not the case, or the 1836 bound on the number of iterations of LOOP could not be derived, returns 1837 chrec_dont_know. */ 1838 1839static tree 1840max_stmt_executions_tree (struct loop *loop) 1841{ 1842 widest_int nit; 1843 1844 if (!max_stmt_executions (loop, &nit)) 1845 return chrec_dont_know; 1846 1847 if (!wi::fits_to_tree_p (nit, unsigned_type_node)) 1848 return chrec_dont_know; 1849 1850 return wide_int_to_tree (unsigned_type_node, nit); 1851} 1852 1853/* Determine whether the CHREC is always positive/negative. If the expression 1854 cannot be statically analyzed, return false, otherwise set the answer into 1855 VALUE. */ 1856 1857static bool 1858chrec_is_positive (tree chrec, bool *value) 1859{ 1860 bool value0, value1, value2; 1861 tree end_value, nb_iter; 1862 1863 switch (TREE_CODE (chrec)) 1864 { 1865 case POLYNOMIAL_CHREC: 1866 if (!chrec_is_positive (CHREC_LEFT (chrec), &value0) 1867 || !chrec_is_positive (CHREC_RIGHT (chrec), &value1)) 1868 return false; 1869 1870 /* FIXME -- overflows. */ 1871 if (value0 == value1) 1872 { 1873 *value = value0; 1874 return true; 1875 } 1876 1877 /* Otherwise the chrec is under the form: "{-197, +, 2}_1", 1878 and the proof consists in showing that the sign never 1879 changes during the execution of the loop, from 0 to 1880 loop->nb_iterations. */ 1881 if (!evolution_function_is_affine_p (chrec)) 1882 return false; 1883 1884 nb_iter = number_of_latch_executions (get_chrec_loop (chrec)); 1885 if (chrec_contains_undetermined (nb_iter)) 1886 return false; 1887 1888#if 0 1889 /* TODO -- If the test is after the exit, we may decrease the number of 1890 iterations by one. */ 1891 if (after_exit) 1892 nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1)); 1893#endif 1894 1895 end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter); 1896 1897 if (!chrec_is_positive (end_value, &value2)) 1898 return false; 1899 1900 *value = value0; 1901 return value0 == value1; 1902 1903 case INTEGER_CST: 1904 switch (tree_int_cst_sgn (chrec)) 1905 { 1906 case -1: 1907 *value = false; 1908 break; 1909 case 1: 1910 *value = true; 1911 break; 1912 default: 1913 return false; 1914 } 1915 return true; 1916 1917 default: 1918 return false; 1919 } 1920} 1921 1922 1923/* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a 1924 constant, and CHREC_B is an affine function. *OVERLAPS_A and 1925 *OVERLAPS_B are initialized to the functions that describe the 1926 relation between the elements accessed twice by CHREC_A and 1927 CHREC_B. For k >= 0, the following property is verified: 1928 1929 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 1930 1931static void 1932analyze_siv_subscript_cst_affine (tree chrec_a, 1933 tree chrec_b, 1934 conflict_function **overlaps_a, 1935 conflict_function **overlaps_b, 1936 tree *last_conflicts) 1937{ 1938 bool value0, value1, value2; 1939 tree type, difference, tmp; 1940 1941 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); 1942 chrec_a = chrec_convert (type, chrec_a, NULL); 1943 chrec_b = chrec_convert (type, chrec_b, NULL); 1944 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a); 1945 1946 /* Special case overlap in the first iteration. */ 1947 if (integer_zerop (difference)) 1948 { 1949 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1950 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1951 *last_conflicts = integer_one_node; 1952 return; 1953 } 1954 1955 if (!chrec_is_positive (initial_condition (difference), &value0)) 1956 { 1957 if (dump_file && (dump_flags & TDF_DETAILS)) 1958 fprintf (dump_file, "siv test failed: chrec is not positive.\n"); 1959 1960 dependence_stats.num_siv_unimplemented++; 1961 *overlaps_a = conflict_fn_not_known (); 1962 *overlaps_b = conflict_fn_not_known (); 1963 *last_conflicts = chrec_dont_know; 1964 return; 1965 } 1966 else 1967 { 1968 if (value0 == false) 1969 { 1970 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1)) 1971 { 1972 if (dump_file && (dump_flags & TDF_DETAILS)) 1973 fprintf (dump_file, "siv test failed: chrec not positive.\n"); 1974 1975 *overlaps_a = conflict_fn_not_known (); 1976 *overlaps_b = conflict_fn_not_known (); 1977 *last_conflicts = chrec_dont_know; 1978 dependence_stats.num_siv_unimplemented++; 1979 return; 1980 } 1981 else 1982 { 1983 if (value1 == true) 1984 { 1985 /* Example: 1986 chrec_a = 12 1987 chrec_b = {10, +, 1} 1988 */ 1989 1990 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference)) 1991 { 1992 HOST_WIDE_INT numiter; 1993 struct loop *loop = get_chrec_loop (chrec_b); 1994 1995 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 1996 tmp = fold_build2 (EXACT_DIV_EXPR, type, 1997 fold_build1 (ABS_EXPR, type, difference), 1998 CHREC_RIGHT (chrec_b)); 1999 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp)); 2000 *last_conflicts = integer_one_node; 2001 2002 2003 /* Perform weak-zero siv test to see if overlap is 2004 outside the loop bounds. */ 2005 numiter = max_stmt_executions_int (loop); 2006 2007 if (numiter >= 0 2008 && compare_tree_int (tmp, numiter) > 0) 2009 { 2010 free_conflict_function (*overlaps_a); 2011 free_conflict_function (*overlaps_b); 2012 *overlaps_a = conflict_fn_no_dependence (); 2013 *overlaps_b = conflict_fn_no_dependence (); 2014 *last_conflicts = integer_zero_node; 2015 dependence_stats.num_siv_independent++; 2016 return; 2017 } 2018 dependence_stats.num_siv_dependent++; 2019 return; 2020 } 2021 2022 /* When the step does not divide the difference, there are 2023 no overlaps. */ 2024 else 2025 { 2026 *overlaps_a = conflict_fn_no_dependence (); 2027 *overlaps_b = conflict_fn_no_dependence (); 2028 *last_conflicts = integer_zero_node; 2029 dependence_stats.num_siv_independent++; 2030 return; 2031 } 2032 } 2033 2034 else 2035 { 2036 /* Example: 2037 chrec_a = 12 2038 chrec_b = {10, +, -1} 2039 2040 In this case, chrec_a will not overlap with chrec_b. */ 2041 *overlaps_a = conflict_fn_no_dependence (); 2042 *overlaps_b = conflict_fn_no_dependence (); 2043 *last_conflicts = integer_zero_node; 2044 dependence_stats.num_siv_independent++; 2045 return; 2046 } 2047 } 2048 } 2049 else 2050 { 2051 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2)) 2052 { 2053 if (dump_file && (dump_flags & TDF_DETAILS)) 2054 fprintf (dump_file, "siv test failed: chrec not positive.\n"); 2055 2056 *overlaps_a = conflict_fn_not_known (); 2057 *overlaps_b = conflict_fn_not_known (); 2058 *last_conflicts = chrec_dont_know; 2059 dependence_stats.num_siv_unimplemented++; 2060 return; 2061 } 2062 else 2063 { 2064 if (value2 == false) 2065 { 2066 /* Example: 2067 chrec_a = 3 2068 chrec_b = {10, +, -1} 2069 */ 2070 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference)) 2071 { 2072 HOST_WIDE_INT numiter; 2073 struct loop *loop = get_chrec_loop (chrec_b); 2074 2075 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2076 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference, 2077 CHREC_RIGHT (chrec_b)); 2078 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp)); 2079 *last_conflicts = integer_one_node; 2080 2081 /* Perform weak-zero siv test to see if overlap is 2082 outside the loop bounds. */ 2083 numiter = max_stmt_executions_int (loop); 2084 2085 if (numiter >= 0 2086 && compare_tree_int (tmp, numiter) > 0) 2087 { 2088 free_conflict_function (*overlaps_a); 2089 free_conflict_function (*overlaps_b); 2090 *overlaps_a = conflict_fn_no_dependence (); 2091 *overlaps_b = conflict_fn_no_dependence (); 2092 *last_conflicts = integer_zero_node; 2093 dependence_stats.num_siv_independent++; 2094 return; 2095 } 2096 dependence_stats.num_siv_dependent++; 2097 return; 2098 } 2099 2100 /* When the step does not divide the difference, there 2101 are no overlaps. */ 2102 else 2103 { 2104 *overlaps_a = conflict_fn_no_dependence (); 2105 *overlaps_b = conflict_fn_no_dependence (); 2106 *last_conflicts = integer_zero_node; 2107 dependence_stats.num_siv_independent++; 2108 return; 2109 } 2110 } 2111 else 2112 { 2113 /* Example: 2114 chrec_a = 3 2115 chrec_b = {4, +, 1} 2116 2117 In this case, chrec_a will not overlap with chrec_b. */ 2118 *overlaps_a = conflict_fn_no_dependence (); 2119 *overlaps_b = conflict_fn_no_dependence (); 2120 *last_conflicts = integer_zero_node; 2121 dependence_stats.num_siv_independent++; 2122 return; 2123 } 2124 } 2125 } 2126 } 2127} 2128 2129/* Helper recursive function for initializing the matrix A. Returns 2130 the initial value of CHREC. */ 2131 2132static tree 2133initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult) 2134{ 2135 gcc_assert (chrec); 2136 2137 switch (TREE_CODE (chrec)) 2138 { 2139 case POLYNOMIAL_CHREC: 2140 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST); 2141 2142 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec)); 2143 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult); 2144 2145 case PLUS_EXPR: 2146 case MULT_EXPR: 2147 case MINUS_EXPR: 2148 { 2149 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); 2150 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult); 2151 2152 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1); 2153 } 2154 2155 CASE_CONVERT: 2156 { 2157 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); 2158 return chrec_convert (chrec_type (chrec), op, NULL); 2159 } 2160 2161 case BIT_NOT_EXPR: 2162 { 2163 /* Handle ~X as -1 - X. */ 2164 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult); 2165 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec), 2166 build_int_cst (TREE_TYPE (chrec), -1), op); 2167 } 2168 2169 case INTEGER_CST: 2170 return chrec; 2171 2172 default: 2173 gcc_unreachable (); 2174 return NULL_TREE; 2175 } 2176} 2177 2178#define FLOOR_DIV(x,y) ((x) / (y)) 2179 2180/* Solves the special case of the Diophantine equation: 2181 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B) 2182 2183 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the 2184 number of iterations that loops X and Y run. The overlaps will be 2185 constructed as evolutions in dimension DIM. */ 2186 2187static void 2188compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b, 2189 affine_fn *overlaps_a, 2190 affine_fn *overlaps_b, 2191 tree *last_conflicts, int dim) 2192{ 2193 if (((step_a > 0 && step_b > 0) 2194 || (step_a < 0 && step_b < 0))) 2195 { 2196 int step_overlaps_a, step_overlaps_b; 2197 int gcd_steps_a_b, last_conflict, tau2; 2198 2199 gcd_steps_a_b = gcd (step_a, step_b); 2200 step_overlaps_a = step_b / gcd_steps_a_b; 2201 step_overlaps_b = step_a / gcd_steps_a_b; 2202 2203 if (niter > 0) 2204 { 2205 tau2 = FLOOR_DIV (niter, step_overlaps_a); 2206 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b)); 2207 last_conflict = tau2; 2208 *last_conflicts = build_int_cst (NULL_TREE, last_conflict); 2209 } 2210 else 2211 *last_conflicts = chrec_dont_know; 2212 2213 *overlaps_a = affine_fn_univar (integer_zero_node, dim, 2214 build_int_cst (NULL_TREE, 2215 step_overlaps_a)); 2216 *overlaps_b = affine_fn_univar (integer_zero_node, dim, 2217 build_int_cst (NULL_TREE, 2218 step_overlaps_b)); 2219 } 2220 2221 else 2222 { 2223 *overlaps_a = affine_fn_cst (integer_zero_node); 2224 *overlaps_b = affine_fn_cst (integer_zero_node); 2225 *last_conflicts = integer_zero_node; 2226 } 2227} 2228 2229/* Solves the special case of a Diophantine equation where CHREC_A is 2230 an affine bivariate function, and CHREC_B is an affine univariate 2231 function. For example, 2232 2233 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z 2234 2235 has the following overlapping functions: 2236 2237 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v 2238 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v 2239 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v 2240 2241 FORNOW: This is a specialized implementation for a case occurring in 2242 a common benchmark. Implement the general algorithm. */ 2243 2244static void 2245compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b, 2246 conflict_function **overlaps_a, 2247 conflict_function **overlaps_b, 2248 tree *last_conflicts) 2249{ 2250 bool xz_p, yz_p, xyz_p; 2251 int step_x, step_y, step_z; 2252 HOST_WIDE_INT niter_x, niter_y, niter_z, niter; 2253 affine_fn overlaps_a_xz, overlaps_b_xz; 2254 affine_fn overlaps_a_yz, overlaps_b_yz; 2255 affine_fn overlaps_a_xyz, overlaps_b_xyz; 2256 affine_fn ova1, ova2, ovb; 2257 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz; 2258 2259 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a))); 2260 step_y = int_cst_value (CHREC_RIGHT (chrec_a)); 2261 step_z = int_cst_value (CHREC_RIGHT (chrec_b)); 2262 2263 niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a))); 2264 niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a)); 2265 niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b)); 2266 2267 if (niter_x < 0 || niter_y < 0 || niter_z < 0) 2268 { 2269 if (dump_file && (dump_flags & TDF_DETAILS)) 2270 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n"); 2271 2272 *overlaps_a = conflict_fn_not_known (); 2273 *overlaps_b = conflict_fn_not_known (); 2274 *last_conflicts = chrec_dont_know; 2275 return; 2276 } 2277 2278 niter = MIN (niter_x, niter_z); 2279 compute_overlap_steps_for_affine_univar (niter, step_x, step_z, 2280 &overlaps_a_xz, 2281 &overlaps_b_xz, 2282 &last_conflicts_xz, 1); 2283 niter = MIN (niter_y, niter_z); 2284 compute_overlap_steps_for_affine_univar (niter, step_y, step_z, 2285 &overlaps_a_yz, 2286 &overlaps_b_yz, 2287 &last_conflicts_yz, 2); 2288 niter = MIN (niter_x, niter_z); 2289 niter = MIN (niter_y, niter); 2290 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z, 2291 &overlaps_a_xyz, 2292 &overlaps_b_xyz, 2293 &last_conflicts_xyz, 3); 2294 2295 xz_p = !integer_zerop (last_conflicts_xz); 2296 yz_p = !integer_zerop (last_conflicts_yz); 2297 xyz_p = !integer_zerop (last_conflicts_xyz); 2298 2299 if (xz_p || yz_p || xyz_p) 2300 { 2301 ova1 = affine_fn_cst (integer_zero_node); 2302 ova2 = affine_fn_cst (integer_zero_node); 2303 ovb = affine_fn_cst (integer_zero_node); 2304 if (xz_p) 2305 { 2306 affine_fn t0 = ova1; 2307 affine_fn t2 = ovb; 2308 2309 ova1 = affine_fn_plus (ova1, overlaps_a_xz); 2310 ovb = affine_fn_plus (ovb, overlaps_b_xz); 2311 affine_fn_free (t0); 2312 affine_fn_free (t2); 2313 *last_conflicts = last_conflicts_xz; 2314 } 2315 if (yz_p) 2316 { 2317 affine_fn t0 = ova2; 2318 affine_fn t2 = ovb; 2319 2320 ova2 = affine_fn_plus (ova2, overlaps_a_yz); 2321 ovb = affine_fn_plus (ovb, overlaps_b_yz); 2322 affine_fn_free (t0); 2323 affine_fn_free (t2); 2324 *last_conflicts = last_conflicts_yz; 2325 } 2326 if (xyz_p) 2327 { 2328 affine_fn t0 = ova1; 2329 affine_fn t2 = ova2; 2330 affine_fn t4 = ovb; 2331 2332 ova1 = affine_fn_plus (ova1, overlaps_a_xyz); 2333 ova2 = affine_fn_plus (ova2, overlaps_a_xyz); 2334 ovb = affine_fn_plus (ovb, overlaps_b_xyz); 2335 affine_fn_free (t0); 2336 affine_fn_free (t2); 2337 affine_fn_free (t4); 2338 *last_conflicts = last_conflicts_xyz; 2339 } 2340 *overlaps_a = conflict_fn (2, ova1, ova2); 2341 *overlaps_b = conflict_fn (1, ovb); 2342 } 2343 else 2344 { 2345 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2346 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2347 *last_conflicts = integer_zero_node; 2348 } 2349 2350 affine_fn_free (overlaps_a_xz); 2351 affine_fn_free (overlaps_b_xz); 2352 affine_fn_free (overlaps_a_yz); 2353 affine_fn_free (overlaps_b_yz); 2354 affine_fn_free (overlaps_a_xyz); 2355 affine_fn_free (overlaps_b_xyz); 2356} 2357 2358/* Copy the elements of vector VEC1 with length SIZE to VEC2. */ 2359 2360static void 2361lambda_vector_copy (lambda_vector vec1, lambda_vector vec2, 2362 int size) 2363{ 2364 memcpy (vec2, vec1, size * sizeof (*vec1)); 2365} 2366 2367/* Copy the elements of M x N matrix MAT1 to MAT2. */ 2368 2369static void 2370lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2, 2371 int m, int n) 2372{ 2373 int i; 2374 2375 for (i = 0; i < m; i++) 2376 lambda_vector_copy (mat1[i], mat2[i], n); 2377} 2378 2379/* Store the N x N identity matrix in MAT. */ 2380 2381static void 2382lambda_matrix_id (lambda_matrix mat, int size) 2383{ 2384 int i, j; 2385 2386 for (i = 0; i < size; i++) 2387 for (j = 0; j < size; j++) 2388 mat[i][j] = (i == j) ? 1 : 0; 2389} 2390 2391/* Return the first nonzero element of vector VEC1 between START and N. 2392 We must have START <= N. Returns N if VEC1 is the zero vector. */ 2393 2394static int 2395lambda_vector_first_nz (lambda_vector vec1, int n, int start) 2396{ 2397 int j = start; 2398 while (j < n && vec1[j] == 0) 2399 j++; 2400 return j; 2401} 2402 2403/* Add a multiple of row R1 of matrix MAT with N columns to row R2: 2404 R2 = R2 + CONST1 * R1. */ 2405 2406static void 2407lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2, int const1) 2408{ 2409 int i; 2410 2411 if (const1 == 0) 2412 return; 2413 2414 for (i = 0; i < n; i++) 2415 mat[r2][i] += const1 * mat[r1][i]; 2416} 2417 2418/* Swap rows R1 and R2 in matrix MAT. */ 2419 2420static void 2421lambda_matrix_row_exchange (lambda_matrix mat, int r1, int r2) 2422{ 2423 lambda_vector row; 2424 2425 row = mat[r1]; 2426 mat[r1] = mat[r2]; 2427 mat[r2] = row; 2428} 2429 2430/* Multiply vector VEC1 of length SIZE by a constant CONST1, 2431 and store the result in VEC2. */ 2432 2433static void 2434lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2, 2435 int size, int const1) 2436{ 2437 int i; 2438 2439 if (const1 == 0) 2440 lambda_vector_clear (vec2, size); 2441 else 2442 for (i = 0; i < size; i++) 2443 vec2[i] = const1 * vec1[i]; 2444} 2445 2446/* Negate vector VEC1 with length SIZE and store it in VEC2. */ 2447 2448static void 2449lambda_vector_negate (lambda_vector vec1, lambda_vector vec2, 2450 int size) 2451{ 2452 lambda_vector_mult_const (vec1, vec2, size, -1); 2453} 2454 2455/* Negate row R1 of matrix MAT which has N columns. */ 2456 2457static void 2458lambda_matrix_row_negate (lambda_matrix mat, int n, int r1) 2459{ 2460 lambda_vector_negate (mat[r1], mat[r1], n); 2461} 2462 2463/* Return true if two vectors are equal. */ 2464 2465static bool 2466lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size) 2467{ 2468 int i; 2469 for (i = 0; i < size; i++) 2470 if (vec1[i] != vec2[i]) 2471 return false; 2472 return true; 2473} 2474 2475/* Given an M x N integer matrix A, this function determines an M x 2476 M unimodular matrix U, and an M x N echelon matrix S such that 2477 "U.A = S". This decomposition is also known as "right Hermite". 2478 2479 Ref: Algorithm 2.1 page 33 in "Loop Transformations for 2480 Restructuring Compilers" Utpal Banerjee. */ 2481 2482static void 2483lambda_matrix_right_hermite (lambda_matrix A, int m, int n, 2484 lambda_matrix S, lambda_matrix U) 2485{ 2486 int i, j, i0 = 0; 2487 2488 lambda_matrix_copy (A, S, m, n); 2489 lambda_matrix_id (U, m); 2490 2491 for (j = 0; j < n; j++) 2492 { 2493 if (lambda_vector_first_nz (S[j], m, i0) < m) 2494 { 2495 ++i0; 2496 for (i = m - 1; i >= i0; i--) 2497 { 2498 while (S[i][j] != 0) 2499 { 2500 int sigma, factor, a, b; 2501 2502 a = S[i-1][j]; 2503 b = S[i][j]; 2504 sigma = (a * b < 0) ? -1: 1; 2505 a = abs (a); 2506 b = abs (b); 2507 factor = sigma * (a / b); 2508 2509 lambda_matrix_row_add (S, n, i, i-1, -factor); 2510 lambda_matrix_row_exchange (S, i, i-1); 2511 2512 lambda_matrix_row_add (U, m, i, i-1, -factor); 2513 lambda_matrix_row_exchange (U, i, i-1); 2514 } 2515 } 2516 } 2517 } 2518} 2519 2520/* Determines the overlapping elements due to accesses CHREC_A and 2521 CHREC_B, that are affine functions. This function cannot handle 2522 symbolic evolution functions, ie. when initial conditions are 2523 parameters, because it uses lambda matrices of integers. */ 2524 2525static void 2526analyze_subscript_affine_affine (tree chrec_a, 2527 tree chrec_b, 2528 conflict_function **overlaps_a, 2529 conflict_function **overlaps_b, 2530 tree *last_conflicts) 2531{ 2532 unsigned nb_vars_a, nb_vars_b, dim; 2533 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta; 2534 lambda_matrix A, U, S; 2535 struct obstack scratch_obstack; 2536 2537 if (eq_evolutions_p (chrec_a, chrec_b)) 2538 { 2539 /* The accessed index overlaps for each iteration in the 2540 loop. */ 2541 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2542 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2543 *last_conflicts = chrec_dont_know; 2544 return; 2545 } 2546 if (dump_file && (dump_flags & TDF_DETAILS)) 2547 fprintf (dump_file, "(analyze_subscript_affine_affine \n"); 2548 2549 /* For determining the initial intersection, we have to solve a 2550 Diophantine equation. This is the most time consuming part. 2551 2552 For answering to the question: "Is there a dependence?" we have 2553 to prove that there exists a solution to the Diophantine 2554 equation, and that the solution is in the iteration domain, 2555 i.e. the solution is positive or zero, and that the solution 2556 happens before the upper bound loop.nb_iterations. Otherwise 2557 there is no dependence. This function outputs a description of 2558 the iterations that hold the intersections. */ 2559 2560 nb_vars_a = nb_vars_in_chrec (chrec_a); 2561 nb_vars_b = nb_vars_in_chrec (chrec_b); 2562 2563 gcc_obstack_init (&scratch_obstack); 2564 2565 dim = nb_vars_a + nb_vars_b; 2566 U = lambda_matrix_new (dim, dim, &scratch_obstack); 2567 A = lambda_matrix_new (dim, 1, &scratch_obstack); 2568 S = lambda_matrix_new (dim, 1, &scratch_obstack); 2569 2570 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1)); 2571 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1)); 2572 gamma = init_b - init_a; 2573 2574 /* Don't do all the hard work of solving the Diophantine equation 2575 when we already know the solution: for example, 2576 | {3, +, 1}_1 2577 | {3, +, 4}_2 2578 | gamma = 3 - 3 = 0. 2579 Then the first overlap occurs during the first iterations: 2580 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x) 2581 */ 2582 if (gamma == 0) 2583 { 2584 if (nb_vars_a == 1 && nb_vars_b == 1) 2585 { 2586 HOST_WIDE_INT step_a, step_b; 2587 HOST_WIDE_INT niter, niter_a, niter_b; 2588 affine_fn ova, ovb; 2589 2590 niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a)); 2591 niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b)); 2592 niter = MIN (niter_a, niter_b); 2593 step_a = int_cst_value (CHREC_RIGHT (chrec_a)); 2594 step_b = int_cst_value (CHREC_RIGHT (chrec_b)); 2595 2596 compute_overlap_steps_for_affine_univar (niter, step_a, step_b, 2597 &ova, &ovb, 2598 last_conflicts, 1); 2599 *overlaps_a = conflict_fn (1, ova); 2600 *overlaps_b = conflict_fn (1, ovb); 2601 } 2602 2603 else if (nb_vars_a == 2 && nb_vars_b == 1) 2604 compute_overlap_steps_for_affine_1_2 2605 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts); 2606 2607 else if (nb_vars_a == 1 && nb_vars_b == 2) 2608 compute_overlap_steps_for_affine_1_2 2609 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts); 2610 2611 else 2612 { 2613 if (dump_file && (dump_flags & TDF_DETAILS)) 2614 fprintf (dump_file, "affine-affine test failed: too many variables.\n"); 2615 *overlaps_a = conflict_fn_not_known (); 2616 *overlaps_b = conflict_fn_not_known (); 2617 *last_conflicts = chrec_dont_know; 2618 } 2619 goto end_analyze_subs_aa; 2620 } 2621 2622 /* U.A = S */ 2623 lambda_matrix_right_hermite (A, dim, 1, S, U); 2624 2625 if (S[0][0] < 0) 2626 { 2627 S[0][0] *= -1; 2628 lambda_matrix_row_negate (U, dim, 0); 2629 } 2630 gcd_alpha_beta = S[0][0]; 2631 2632 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5, 2633 but that is a quite strange case. Instead of ICEing, answer 2634 don't know. */ 2635 if (gcd_alpha_beta == 0) 2636 { 2637 *overlaps_a = conflict_fn_not_known (); 2638 *overlaps_b = conflict_fn_not_known (); 2639 *last_conflicts = chrec_dont_know; 2640 goto end_analyze_subs_aa; 2641 } 2642 2643 /* The classic "gcd-test". */ 2644 if (!int_divides_p (gcd_alpha_beta, gamma)) 2645 { 2646 /* The "gcd-test" has determined that there is no integer 2647 solution, i.e. there is no dependence. */ 2648 *overlaps_a = conflict_fn_no_dependence (); 2649 *overlaps_b = conflict_fn_no_dependence (); 2650 *last_conflicts = integer_zero_node; 2651 } 2652 2653 /* Both access functions are univariate. This includes SIV and MIV cases. */ 2654 else if (nb_vars_a == 1 && nb_vars_b == 1) 2655 { 2656 /* Both functions should have the same evolution sign. */ 2657 if (((A[0][0] > 0 && -A[1][0] > 0) 2658 || (A[0][0] < 0 && -A[1][0] < 0))) 2659 { 2660 /* The solutions are given by: 2661 | 2662 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0] 2663 | [u21 u22] [y0] 2664 2665 For a given integer t. Using the following variables, 2666 2667 | i0 = u11 * gamma / gcd_alpha_beta 2668 | j0 = u12 * gamma / gcd_alpha_beta 2669 | i1 = u21 2670 | j1 = u22 2671 2672 the solutions are: 2673 2674 | x0 = i0 + i1 * t, 2675 | y0 = j0 + j1 * t. */ 2676 HOST_WIDE_INT i0, j0, i1, j1; 2677 2678 i0 = U[0][0] * gamma / gcd_alpha_beta; 2679 j0 = U[0][1] * gamma / gcd_alpha_beta; 2680 i1 = U[1][0]; 2681 j1 = U[1][1]; 2682 2683 if ((i1 == 0 && i0 < 0) 2684 || (j1 == 0 && j0 < 0)) 2685 { 2686 /* There is no solution. 2687 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations" 2688 falls in here, but for the moment we don't look at the 2689 upper bound of the iteration domain. */ 2690 *overlaps_a = conflict_fn_no_dependence (); 2691 *overlaps_b = conflict_fn_no_dependence (); 2692 *last_conflicts = integer_zero_node; 2693 goto end_analyze_subs_aa; 2694 } 2695 2696 if (i1 > 0 && j1 > 0) 2697 { 2698 HOST_WIDE_INT niter_a 2699 = max_stmt_executions_int (get_chrec_loop (chrec_a)); 2700 HOST_WIDE_INT niter_b 2701 = max_stmt_executions_int (get_chrec_loop (chrec_b)); 2702 HOST_WIDE_INT niter = MIN (niter_a, niter_b); 2703 2704 /* (X0, Y0) is a solution of the Diophantine equation: 2705 "chrec_a (X0) = chrec_b (Y0)". */ 2706 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1), 2707 CEIL (-j0, j1)); 2708 HOST_WIDE_INT x0 = i1 * tau1 + i0; 2709 HOST_WIDE_INT y0 = j1 * tau1 + j0; 2710 2711 /* (X1, Y1) is the smallest positive solution of the eq 2712 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the 2713 first conflict occurs. */ 2714 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1); 2715 HOST_WIDE_INT x1 = x0 - i1 * min_multiple; 2716 HOST_WIDE_INT y1 = y0 - j1 * min_multiple; 2717 2718 if (niter > 0) 2719 { 2720 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1), 2721 FLOOR_DIV (niter - j0, j1)); 2722 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1; 2723 2724 /* If the overlap occurs outside of the bounds of the 2725 loop, there is no dependence. */ 2726 if (x1 >= niter || y1 >= niter) 2727 { 2728 *overlaps_a = conflict_fn_no_dependence (); 2729 *overlaps_b = conflict_fn_no_dependence (); 2730 *last_conflicts = integer_zero_node; 2731 goto end_analyze_subs_aa; 2732 } 2733 else 2734 *last_conflicts = build_int_cst (NULL_TREE, last_conflict); 2735 } 2736 else 2737 *last_conflicts = chrec_dont_know; 2738 2739 *overlaps_a 2740 = conflict_fn (1, 2741 affine_fn_univar (build_int_cst (NULL_TREE, x1), 2742 1, 2743 build_int_cst (NULL_TREE, i1))); 2744 *overlaps_b 2745 = conflict_fn (1, 2746 affine_fn_univar (build_int_cst (NULL_TREE, y1), 2747 1, 2748 build_int_cst (NULL_TREE, j1))); 2749 } 2750 else 2751 { 2752 /* FIXME: For the moment, the upper bound of the 2753 iteration domain for i and j is not checked. */ 2754 if (dump_file && (dump_flags & TDF_DETAILS)) 2755 fprintf (dump_file, "affine-affine test failed: unimplemented.\n"); 2756 *overlaps_a = conflict_fn_not_known (); 2757 *overlaps_b = conflict_fn_not_known (); 2758 *last_conflicts = chrec_dont_know; 2759 } 2760 } 2761 else 2762 { 2763 if (dump_file && (dump_flags & TDF_DETAILS)) 2764 fprintf (dump_file, "affine-affine test failed: unimplemented.\n"); 2765 *overlaps_a = conflict_fn_not_known (); 2766 *overlaps_b = conflict_fn_not_known (); 2767 *last_conflicts = chrec_dont_know; 2768 } 2769 } 2770 else 2771 { 2772 if (dump_file && (dump_flags & TDF_DETAILS)) 2773 fprintf (dump_file, "affine-affine test failed: unimplemented.\n"); 2774 *overlaps_a = conflict_fn_not_known (); 2775 *overlaps_b = conflict_fn_not_known (); 2776 *last_conflicts = chrec_dont_know; 2777 } 2778 2779end_analyze_subs_aa: 2780 obstack_free (&scratch_obstack, NULL); 2781 if (dump_file && (dump_flags & TDF_DETAILS)) 2782 { 2783 fprintf (dump_file, " (overlaps_a = "); 2784 dump_conflict_function (dump_file, *overlaps_a); 2785 fprintf (dump_file, ")\n (overlaps_b = "); 2786 dump_conflict_function (dump_file, *overlaps_b); 2787 fprintf (dump_file, "))\n"); 2788 } 2789} 2790 2791/* Returns true when analyze_subscript_affine_affine can be used for 2792 determining the dependence relation between chrec_a and chrec_b, 2793 that contain symbols. This function modifies chrec_a and chrec_b 2794 such that the analysis result is the same, and such that they don't 2795 contain symbols, and then can safely be passed to the analyzer. 2796 2797 Example: The analysis of the following tuples of evolutions produce 2798 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1 2799 vs. {0, +, 1}_1 2800 2801 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1) 2802 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1) 2803*/ 2804 2805static bool 2806can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b) 2807{ 2808 tree diff, type, left_a, left_b, right_b; 2809 2810 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a)) 2811 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b))) 2812 /* FIXME: For the moment not handled. Might be refined later. */ 2813 return false; 2814 2815 type = chrec_type (*chrec_a); 2816 left_a = CHREC_LEFT (*chrec_a); 2817 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL); 2818 diff = chrec_fold_minus (type, left_a, left_b); 2819 2820 if (!evolution_function_is_constant_p (diff)) 2821 return false; 2822 2823 if (dump_file && (dump_flags & TDF_DETAILS)) 2824 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n"); 2825 2826 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a), 2827 diff, CHREC_RIGHT (*chrec_a)); 2828 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL); 2829 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b), 2830 build_int_cst (type, 0), 2831 right_b); 2832 return true; 2833} 2834 2835/* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and 2836 *OVERLAPS_B are initialized to the functions that describe the 2837 relation between the elements accessed twice by CHREC_A and 2838 CHREC_B. For k >= 0, the following property is verified: 2839 2840 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 2841 2842static void 2843analyze_siv_subscript (tree chrec_a, 2844 tree chrec_b, 2845 conflict_function **overlaps_a, 2846 conflict_function **overlaps_b, 2847 tree *last_conflicts, 2848 int loop_nest_num) 2849{ 2850 dependence_stats.num_siv++; 2851 2852 if (dump_file && (dump_flags & TDF_DETAILS)) 2853 fprintf (dump_file, "(analyze_siv_subscript \n"); 2854 2855 if (evolution_function_is_constant_p (chrec_a) 2856 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num)) 2857 analyze_siv_subscript_cst_affine (chrec_a, chrec_b, 2858 overlaps_a, overlaps_b, last_conflicts); 2859 2860 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num) 2861 && evolution_function_is_constant_p (chrec_b)) 2862 analyze_siv_subscript_cst_affine (chrec_b, chrec_a, 2863 overlaps_b, overlaps_a, last_conflicts); 2864 2865 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num) 2866 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num)) 2867 { 2868 if (!chrec_contains_symbols (chrec_a) 2869 && !chrec_contains_symbols (chrec_b)) 2870 { 2871 analyze_subscript_affine_affine (chrec_a, chrec_b, 2872 overlaps_a, overlaps_b, 2873 last_conflicts); 2874 2875 if (CF_NOT_KNOWN_P (*overlaps_a) 2876 || CF_NOT_KNOWN_P (*overlaps_b)) 2877 dependence_stats.num_siv_unimplemented++; 2878 else if (CF_NO_DEPENDENCE_P (*overlaps_a) 2879 || CF_NO_DEPENDENCE_P (*overlaps_b)) 2880 dependence_stats.num_siv_independent++; 2881 else 2882 dependence_stats.num_siv_dependent++; 2883 } 2884 else if (can_use_analyze_subscript_affine_affine (&chrec_a, 2885 &chrec_b)) 2886 { 2887 analyze_subscript_affine_affine (chrec_a, chrec_b, 2888 overlaps_a, overlaps_b, 2889 last_conflicts); 2890 2891 if (CF_NOT_KNOWN_P (*overlaps_a) 2892 || CF_NOT_KNOWN_P (*overlaps_b)) 2893 dependence_stats.num_siv_unimplemented++; 2894 else if (CF_NO_DEPENDENCE_P (*overlaps_a) 2895 || CF_NO_DEPENDENCE_P (*overlaps_b)) 2896 dependence_stats.num_siv_independent++; 2897 else 2898 dependence_stats.num_siv_dependent++; 2899 } 2900 else 2901 goto siv_subscript_dontknow; 2902 } 2903 2904 else 2905 { 2906 siv_subscript_dontknow:; 2907 if (dump_file && (dump_flags & TDF_DETAILS)) 2908 fprintf (dump_file, " siv test failed: unimplemented"); 2909 *overlaps_a = conflict_fn_not_known (); 2910 *overlaps_b = conflict_fn_not_known (); 2911 *last_conflicts = chrec_dont_know; 2912 dependence_stats.num_siv_unimplemented++; 2913 } 2914 2915 if (dump_file && (dump_flags & TDF_DETAILS)) 2916 fprintf (dump_file, ")\n"); 2917} 2918 2919/* Returns false if we can prove that the greatest common divisor of the steps 2920 of CHREC does not divide CST, false otherwise. */ 2921 2922static bool 2923gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst) 2924{ 2925 HOST_WIDE_INT cd = 0, val; 2926 tree step; 2927 2928 if (!tree_fits_shwi_p (cst)) 2929 return true; 2930 val = tree_to_shwi (cst); 2931 2932 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC) 2933 { 2934 step = CHREC_RIGHT (chrec); 2935 if (!tree_fits_shwi_p (step)) 2936 return true; 2937 cd = gcd (cd, tree_to_shwi (step)); 2938 chrec = CHREC_LEFT (chrec); 2939 } 2940 2941 return val % cd == 0; 2942} 2943 2944/* Analyze a MIV (Multiple Index Variable) subscript with respect to 2945 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the 2946 functions that describe the relation between the elements accessed 2947 twice by CHREC_A and CHREC_B. For k >= 0, the following property 2948 is verified: 2949 2950 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */ 2951 2952static void 2953analyze_miv_subscript (tree chrec_a, 2954 tree chrec_b, 2955 conflict_function **overlaps_a, 2956 conflict_function **overlaps_b, 2957 tree *last_conflicts, 2958 struct loop *loop_nest) 2959{ 2960 tree type, difference; 2961 2962 dependence_stats.num_miv++; 2963 if (dump_file && (dump_flags & TDF_DETAILS)) 2964 fprintf (dump_file, "(analyze_miv_subscript \n"); 2965 2966 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b)); 2967 chrec_a = chrec_convert (type, chrec_a, NULL); 2968 chrec_b = chrec_convert (type, chrec_b, NULL); 2969 difference = chrec_fold_minus (type, chrec_a, chrec_b); 2970 2971 if (eq_evolutions_p (chrec_a, chrec_b)) 2972 { 2973 /* Access functions are the same: all the elements are accessed 2974 in the same order. */ 2975 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2976 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 2977 *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a)); 2978 dependence_stats.num_miv_dependent++; 2979 } 2980 2981 else if (evolution_function_is_constant_p (difference) 2982 /* For the moment, the following is verified: 2983 evolution_function_is_affine_multivariate_p (chrec_a, 2984 loop_nest->num) */ 2985 && !gcd_of_steps_may_divide_p (chrec_a, difference)) 2986 { 2987 /* testsuite/.../ssa-chrec-33.c 2988 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2 2989 2990 The difference is 1, and all the evolution steps are multiples 2991 of 2, consequently there are no overlapping elements. */ 2992 *overlaps_a = conflict_fn_no_dependence (); 2993 *overlaps_b = conflict_fn_no_dependence (); 2994 *last_conflicts = integer_zero_node; 2995 dependence_stats.num_miv_independent++; 2996 } 2997 2998 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num) 2999 && !chrec_contains_symbols (chrec_a) 3000 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num) 3001 && !chrec_contains_symbols (chrec_b)) 3002 { 3003 /* testsuite/.../ssa-chrec-35.c 3004 {0, +, 1}_2 vs. {0, +, 1}_3 3005 the overlapping elements are respectively located at iterations: 3006 {0, +, 1}_x and {0, +, 1}_x, 3007 in other words, we have the equality: 3008 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x) 3009 3010 Other examples: 3011 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) = 3012 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y) 3013 3014 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) = 3015 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) 3016 */ 3017 analyze_subscript_affine_affine (chrec_a, chrec_b, 3018 overlaps_a, overlaps_b, last_conflicts); 3019 3020 if (CF_NOT_KNOWN_P (*overlaps_a) 3021 || CF_NOT_KNOWN_P (*overlaps_b)) 3022 dependence_stats.num_miv_unimplemented++; 3023 else if (CF_NO_DEPENDENCE_P (*overlaps_a) 3024 || CF_NO_DEPENDENCE_P (*overlaps_b)) 3025 dependence_stats.num_miv_independent++; 3026 else 3027 dependence_stats.num_miv_dependent++; 3028 } 3029 3030 else 3031 { 3032 /* When the analysis is too difficult, answer "don't know". */ 3033 if (dump_file && (dump_flags & TDF_DETAILS)) 3034 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n"); 3035 3036 *overlaps_a = conflict_fn_not_known (); 3037 *overlaps_b = conflict_fn_not_known (); 3038 *last_conflicts = chrec_dont_know; 3039 dependence_stats.num_miv_unimplemented++; 3040 } 3041 3042 if (dump_file && (dump_flags & TDF_DETAILS)) 3043 fprintf (dump_file, ")\n"); 3044} 3045 3046/* Determines the iterations for which CHREC_A is equal to CHREC_B in 3047 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and 3048 OVERLAP_ITERATIONS_B are initialized with two functions that 3049 describe the iterations that contain conflicting elements. 3050 3051 Remark: For an integer k >= 0, the following equality is true: 3052 3053 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)). 3054*/ 3055 3056static void 3057analyze_overlapping_iterations (tree chrec_a, 3058 tree chrec_b, 3059 conflict_function **overlap_iterations_a, 3060 conflict_function **overlap_iterations_b, 3061 tree *last_conflicts, struct loop *loop_nest) 3062{ 3063 unsigned int lnn = loop_nest->num; 3064 3065 dependence_stats.num_subscript_tests++; 3066 3067 if (dump_file && (dump_flags & TDF_DETAILS)) 3068 { 3069 fprintf (dump_file, "(analyze_overlapping_iterations \n"); 3070 fprintf (dump_file, " (chrec_a = "); 3071 print_generic_expr (dump_file, chrec_a, 0); 3072 fprintf (dump_file, ")\n (chrec_b = "); 3073 print_generic_expr (dump_file, chrec_b, 0); 3074 fprintf (dump_file, ")\n"); 3075 } 3076 3077 if (chrec_a == NULL_TREE 3078 || chrec_b == NULL_TREE 3079 || chrec_contains_undetermined (chrec_a) 3080 || chrec_contains_undetermined (chrec_b)) 3081 { 3082 dependence_stats.num_subscript_undetermined++; 3083 3084 *overlap_iterations_a = conflict_fn_not_known (); 3085 *overlap_iterations_b = conflict_fn_not_known (); 3086 } 3087 3088 /* If they are the same chrec, and are affine, they overlap 3089 on every iteration. */ 3090 else if (eq_evolutions_p (chrec_a, chrec_b) 3091 && (evolution_function_is_affine_multivariate_p (chrec_a, lnn) 3092 || operand_equal_p (chrec_a, chrec_b, 0))) 3093 { 3094 dependence_stats.num_same_subscript_function++; 3095 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node)); 3096 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node)); 3097 *last_conflicts = chrec_dont_know; 3098 } 3099 3100 /* If they aren't the same, and aren't affine, we can't do anything 3101 yet. */ 3102 else if ((chrec_contains_symbols (chrec_a) 3103 || chrec_contains_symbols (chrec_b)) 3104 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn) 3105 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn))) 3106 { 3107 dependence_stats.num_subscript_undetermined++; 3108 *overlap_iterations_a = conflict_fn_not_known (); 3109 *overlap_iterations_b = conflict_fn_not_known (); 3110 } 3111 3112 else if (ziv_subscript_p (chrec_a, chrec_b)) 3113 analyze_ziv_subscript (chrec_a, chrec_b, 3114 overlap_iterations_a, overlap_iterations_b, 3115 last_conflicts); 3116 3117 else if (siv_subscript_p (chrec_a, chrec_b)) 3118 analyze_siv_subscript (chrec_a, chrec_b, 3119 overlap_iterations_a, overlap_iterations_b, 3120 last_conflicts, lnn); 3121 3122 else 3123 analyze_miv_subscript (chrec_a, chrec_b, 3124 overlap_iterations_a, overlap_iterations_b, 3125 last_conflicts, loop_nest); 3126 3127 if (dump_file && (dump_flags & TDF_DETAILS)) 3128 { 3129 fprintf (dump_file, " (overlap_iterations_a = "); 3130 dump_conflict_function (dump_file, *overlap_iterations_a); 3131 fprintf (dump_file, ")\n (overlap_iterations_b = "); 3132 dump_conflict_function (dump_file, *overlap_iterations_b); 3133 fprintf (dump_file, "))\n"); 3134 } 3135} 3136 3137/* Helper function for uniquely inserting distance vectors. */ 3138 3139static void 3140save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v) 3141{ 3142 unsigned i; 3143 lambda_vector v; 3144 3145 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, v) 3146 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr))) 3147 return; 3148 3149 DDR_DIST_VECTS (ddr).safe_push (dist_v); 3150} 3151 3152/* Helper function for uniquely inserting direction vectors. */ 3153 3154static void 3155save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v) 3156{ 3157 unsigned i; 3158 lambda_vector v; 3159 3160 FOR_EACH_VEC_ELT (DDR_DIR_VECTS (ddr), i, v) 3161 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr))) 3162 return; 3163 3164 DDR_DIR_VECTS (ddr).safe_push (dir_v); 3165} 3166 3167/* Add a distance of 1 on all the loops outer than INDEX. If we 3168 haven't yet determined a distance for this outer loop, push a new 3169 distance vector composed of the previous distance, and a distance 3170 of 1 for this outer loop. Example: 3171 3172 | loop_1 3173 | loop_2 3174 | A[10] 3175 | endloop_2 3176 | endloop_1 3177 3178 Saved vectors are of the form (dist_in_1, dist_in_2). First, we 3179 save (0, 1), then we have to save (1, 0). */ 3180 3181static void 3182add_outer_distances (struct data_dependence_relation *ddr, 3183 lambda_vector dist_v, int index) 3184{ 3185 /* For each outer loop where init_v is not set, the accesses are 3186 in dependence of distance 1 in the loop. */ 3187 while (--index >= 0) 3188 { 3189 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3190 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr)); 3191 save_v[index] = 1; 3192 save_dist_v (ddr, save_v); 3193 } 3194} 3195 3196/* Return false when fail to represent the data dependence as a 3197 distance vector. INIT_B is set to true when a component has been 3198 added to the distance vector DIST_V. INDEX_CARRY is then set to 3199 the index in DIST_V that carries the dependence. */ 3200 3201static bool 3202build_classic_dist_vector_1 (struct data_dependence_relation *ddr, 3203 struct data_reference *ddr_a, 3204 struct data_reference *ddr_b, 3205 lambda_vector dist_v, bool *init_b, 3206 int *index_carry) 3207{ 3208 unsigned i; 3209 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3210 3211 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 3212 { 3213 tree access_fn_a, access_fn_b; 3214 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i); 3215 3216 if (chrec_contains_undetermined (SUB_DISTANCE (subscript))) 3217 { 3218 non_affine_dependence_relation (ddr); 3219 return false; 3220 } 3221 3222 access_fn_a = DR_ACCESS_FN (ddr_a, i); 3223 access_fn_b = DR_ACCESS_FN (ddr_b, i); 3224 3225 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC 3226 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC) 3227 { 3228 int dist, index; 3229 int var_a = CHREC_VARIABLE (access_fn_a); 3230 int var_b = CHREC_VARIABLE (access_fn_b); 3231 3232 if (var_a != var_b 3233 || chrec_contains_undetermined (SUB_DISTANCE (subscript))) 3234 { 3235 non_affine_dependence_relation (ddr); 3236 return false; 3237 } 3238 3239 dist = int_cst_value (SUB_DISTANCE (subscript)); 3240 index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr)); 3241 *index_carry = MIN (index, *index_carry); 3242 3243 /* This is the subscript coupling test. If we have already 3244 recorded a distance for this loop (a distance coming from 3245 another subscript), it should be the same. For example, 3246 in the following code, there is no dependence: 3247 3248 | loop i = 0, N, 1 3249 | T[i+1][i] = ... 3250 | ... = T[i][i] 3251 | endloop 3252 */ 3253 if (init_v[index] != 0 && dist_v[index] != dist) 3254 { 3255 finalize_ddr_dependent (ddr, chrec_known); 3256 return false; 3257 } 3258 3259 dist_v[index] = dist; 3260 init_v[index] = 1; 3261 *init_b = true; 3262 } 3263 else if (!operand_equal_p (access_fn_a, access_fn_b, 0)) 3264 { 3265 /* This can be for example an affine vs. constant dependence 3266 (T[i] vs. T[3]) that is not an affine dependence and is 3267 not representable as a distance vector. */ 3268 non_affine_dependence_relation (ddr); 3269 return false; 3270 } 3271 } 3272 3273 return true; 3274} 3275 3276/* Return true when the DDR contains only constant access functions. */ 3277 3278static bool 3279constant_access_functions (const struct data_dependence_relation *ddr) 3280{ 3281 unsigned i; 3282 3283 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 3284 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i)) 3285 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i))) 3286 return false; 3287 3288 return true; 3289} 3290 3291/* Helper function for the case where DDR_A and DDR_B are the same 3292 multivariate access function with a constant step. For an example 3293 see pr34635-1.c. */ 3294 3295static void 3296add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2) 3297{ 3298 int x_1, x_2; 3299 tree c_1 = CHREC_LEFT (c_2); 3300 tree c_0 = CHREC_LEFT (c_1); 3301 lambda_vector dist_v; 3302 int v1, v2, cd; 3303 3304 /* Polynomials with more than 2 variables are not handled yet. When 3305 the evolution steps are parameters, it is not possible to 3306 represent the dependence using classical distance vectors. */ 3307 if (TREE_CODE (c_0) != INTEGER_CST 3308 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST 3309 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST) 3310 { 3311 DDR_AFFINE_P (ddr) = false; 3312 return; 3313 } 3314 3315 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr)); 3316 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr)); 3317 3318 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */ 3319 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3320 v1 = int_cst_value (CHREC_RIGHT (c_1)); 3321 v2 = int_cst_value (CHREC_RIGHT (c_2)); 3322 cd = gcd (v1, v2); 3323 v1 /= cd; 3324 v2 /= cd; 3325 3326 if (v2 < 0) 3327 { 3328 v2 = -v2; 3329 v1 = -v1; 3330 } 3331 3332 dist_v[x_1] = v2; 3333 dist_v[x_2] = -v1; 3334 save_dist_v (ddr, dist_v); 3335 3336 add_outer_distances (ddr, dist_v, x_1); 3337} 3338 3339/* Helper function for the case where DDR_A and DDR_B are the same 3340 access functions. */ 3341 3342static void 3343add_other_self_distances (struct data_dependence_relation *ddr) 3344{ 3345 lambda_vector dist_v; 3346 unsigned i; 3347 int index_carry = DDR_NB_LOOPS (ddr); 3348 3349 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 3350 { 3351 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i); 3352 3353 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC) 3354 { 3355 if (!evolution_function_is_univariate_p (access_fun)) 3356 { 3357 if (DDR_NUM_SUBSCRIPTS (ddr) != 1) 3358 { 3359 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know; 3360 return; 3361 } 3362 3363 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0); 3364 3365 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC) 3366 add_multivariate_self_dist (ddr, access_fun); 3367 else 3368 /* The evolution step is not constant: it varies in 3369 the outer loop, so this cannot be represented by a 3370 distance vector. For example in pr34635.c the 3371 evolution is {0, +, {0, +, 4}_1}_2. */ 3372 DDR_AFFINE_P (ddr) = false; 3373 3374 return; 3375 } 3376 3377 index_carry = MIN (index_carry, 3378 index_in_loop_nest (CHREC_VARIABLE (access_fun), 3379 DDR_LOOP_NEST (ddr))); 3380 } 3381 } 3382 3383 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3384 add_outer_distances (ddr, dist_v, index_carry); 3385} 3386 3387static void 3388insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr) 3389{ 3390 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3391 3392 dist_v[DDR_INNER_LOOP (ddr)] = 1; 3393 save_dist_v (ddr, dist_v); 3394} 3395 3396/* Adds a unit distance vector to DDR when there is a 0 overlap. This 3397 is the case for example when access functions are the same and 3398 equal to a constant, as in: 3399 3400 | loop_1 3401 | A[3] = ... 3402 | ... = A[3] 3403 | endloop_1 3404 3405 in which case the distance vectors are (0) and (1). */ 3406 3407static void 3408add_distance_for_zero_overlaps (struct data_dependence_relation *ddr) 3409{ 3410 unsigned i, j; 3411 3412 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 3413 { 3414 subscript_p sub = DDR_SUBSCRIPT (ddr, i); 3415 conflict_function *ca = SUB_CONFLICTS_IN_A (sub); 3416 conflict_function *cb = SUB_CONFLICTS_IN_B (sub); 3417 3418 for (j = 0; j < ca->n; j++) 3419 if (affine_function_zero_p (ca->fns[j])) 3420 { 3421 insert_innermost_unit_dist_vector (ddr); 3422 return; 3423 } 3424 3425 for (j = 0; j < cb->n; j++) 3426 if (affine_function_zero_p (cb->fns[j])) 3427 { 3428 insert_innermost_unit_dist_vector (ddr); 3429 return; 3430 } 3431 } 3432} 3433 3434/* Compute the classic per loop distance vector. DDR is the data 3435 dependence relation to build a vector from. Return false when fail 3436 to represent the data dependence as a distance vector. */ 3437 3438static bool 3439build_classic_dist_vector (struct data_dependence_relation *ddr, 3440 struct loop *loop_nest) 3441{ 3442 bool init_b = false; 3443 int index_carry = DDR_NB_LOOPS (ddr); 3444 lambda_vector dist_v; 3445 3446 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE) 3447 return false; 3448 3449 if (same_access_functions (ddr)) 3450 { 3451 /* Save the 0 vector. */ 3452 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3453 save_dist_v (ddr, dist_v); 3454 3455 if (constant_access_functions (ddr)) 3456 add_distance_for_zero_overlaps (ddr); 3457 3458 if (DDR_NB_LOOPS (ddr) > 1) 3459 add_other_self_distances (ddr); 3460 3461 return true; 3462 } 3463 3464 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3465 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr), 3466 dist_v, &init_b, &index_carry)) 3467 return false; 3468 3469 /* Save the distance vector if we initialized one. */ 3470 if (init_b) 3471 { 3472 /* Verify a basic constraint: classic distance vectors should 3473 always be lexicographically positive. 3474 3475 Data references are collected in the order of execution of 3476 the program, thus for the following loop 3477 3478 | for (i = 1; i < 100; i++) 3479 | for (j = 1; j < 100; j++) 3480 | { 3481 | t = T[j+1][i-1]; // A 3482 | T[j][i] = t + 2; // B 3483 | } 3484 3485 references are collected following the direction of the wind: 3486 A then B. The data dependence tests are performed also 3487 following this order, such that we're looking at the distance 3488 separating the elements accessed by A from the elements later 3489 accessed by B. But in this example, the distance returned by 3490 test_dep (A, B) is lexicographically negative (-1, 1), that 3491 means that the access A occurs later than B with respect to 3492 the outer loop, ie. we're actually looking upwind. In this 3493 case we solve test_dep (B, A) looking downwind to the 3494 lexicographically positive solution, that returns the 3495 distance vector (1, -1). */ 3496 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr))) 3497 { 3498 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3499 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr), 3500 loop_nest)) 3501 return false; 3502 compute_subscript_distance (ddr); 3503 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr), 3504 save_v, &init_b, &index_carry)) 3505 return false; 3506 save_dist_v (ddr, save_v); 3507 DDR_REVERSED_P (ddr) = true; 3508 3509 /* In this case there is a dependence forward for all the 3510 outer loops: 3511 3512 | for (k = 1; k < 100; k++) 3513 | for (i = 1; i < 100; i++) 3514 | for (j = 1; j < 100; j++) 3515 | { 3516 | t = T[j+1][i-1]; // A 3517 | T[j][i] = t + 2; // B 3518 | } 3519 3520 the vectors are: 3521 (0, 1, -1) 3522 (1, 1, -1) 3523 (1, -1, 1) 3524 */ 3525 if (DDR_NB_LOOPS (ddr) > 1) 3526 { 3527 add_outer_distances (ddr, save_v, index_carry); 3528 add_outer_distances (ddr, dist_v, index_carry); 3529 } 3530 } 3531 else 3532 { 3533 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3534 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr)); 3535 3536 if (DDR_NB_LOOPS (ddr) > 1) 3537 { 3538 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3539 3540 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), 3541 DDR_A (ddr), loop_nest)) 3542 return false; 3543 compute_subscript_distance (ddr); 3544 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr), 3545 opposite_v, &init_b, 3546 &index_carry)) 3547 return false; 3548 3549 save_dist_v (ddr, save_v); 3550 add_outer_distances (ddr, dist_v, index_carry); 3551 add_outer_distances (ddr, opposite_v, index_carry); 3552 } 3553 else 3554 save_dist_v (ddr, save_v); 3555 } 3556 } 3557 else 3558 { 3559 /* There is a distance of 1 on all the outer loops: Example: 3560 there is a dependence of distance 1 on loop_1 for the array A. 3561 3562 | loop_1 3563 | A[5] = ... 3564 | endloop 3565 */ 3566 add_outer_distances (ddr, dist_v, 3567 lambda_vector_first_nz (dist_v, 3568 DDR_NB_LOOPS (ddr), 0)); 3569 } 3570 3571 if (dump_file && (dump_flags & TDF_DETAILS)) 3572 { 3573 unsigned i; 3574 3575 fprintf (dump_file, "(build_classic_dist_vector\n"); 3576 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) 3577 { 3578 fprintf (dump_file, " dist_vector = ("); 3579 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i), 3580 DDR_NB_LOOPS (ddr)); 3581 fprintf (dump_file, " )\n"); 3582 } 3583 fprintf (dump_file, ")\n"); 3584 } 3585 3586 return true; 3587} 3588 3589/* Return the direction for a given distance. 3590 FIXME: Computing dir this way is suboptimal, since dir can catch 3591 cases that dist is unable to represent. */ 3592 3593static inline enum data_dependence_direction 3594dir_from_dist (int dist) 3595{ 3596 if (dist > 0) 3597 return dir_positive; 3598 else if (dist < 0) 3599 return dir_negative; 3600 else 3601 return dir_equal; 3602} 3603 3604/* Compute the classic per loop direction vector. DDR is the data 3605 dependence relation to build a vector from. */ 3606 3607static void 3608build_classic_dir_vector (struct data_dependence_relation *ddr) 3609{ 3610 unsigned i, j; 3611 lambda_vector dist_v; 3612 3613 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) 3614 { 3615 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3616 3617 for (j = 0; j < DDR_NB_LOOPS (ddr); j++) 3618 dir_v[j] = dir_from_dist (dist_v[j]); 3619 3620 save_dir_v (ddr, dir_v); 3621 } 3622} 3623 3624/* Helper function. Returns true when there is a dependence between 3625 data references DRA and DRB. */ 3626 3627static bool 3628subscript_dependence_tester_1 (struct data_dependence_relation *ddr, 3629 struct data_reference *dra, 3630 struct data_reference *drb, 3631 struct loop *loop_nest) 3632{ 3633 unsigned int i; 3634 tree last_conflicts; 3635 struct subscript *subscript; 3636 tree res = NULL_TREE; 3637 3638 for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++) 3639 { 3640 conflict_function *overlaps_a, *overlaps_b; 3641 3642 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i), 3643 DR_ACCESS_FN (drb, i), 3644 &overlaps_a, &overlaps_b, 3645 &last_conflicts, loop_nest); 3646 3647 if (SUB_CONFLICTS_IN_A (subscript)) 3648 free_conflict_function (SUB_CONFLICTS_IN_A (subscript)); 3649 if (SUB_CONFLICTS_IN_B (subscript)) 3650 free_conflict_function (SUB_CONFLICTS_IN_B (subscript)); 3651 3652 SUB_CONFLICTS_IN_A (subscript) = overlaps_a; 3653 SUB_CONFLICTS_IN_B (subscript) = overlaps_b; 3654 SUB_LAST_CONFLICT (subscript) = last_conflicts; 3655 3656 /* If there is any undetermined conflict function we have to 3657 give a conservative answer in case we cannot prove that 3658 no dependence exists when analyzing another subscript. */ 3659 if (CF_NOT_KNOWN_P (overlaps_a) 3660 || CF_NOT_KNOWN_P (overlaps_b)) 3661 { 3662 res = chrec_dont_know; 3663 continue; 3664 } 3665 3666 /* When there is a subscript with no dependence we can stop. */ 3667 else if (CF_NO_DEPENDENCE_P (overlaps_a) 3668 || CF_NO_DEPENDENCE_P (overlaps_b)) 3669 { 3670 res = chrec_known; 3671 break; 3672 } 3673 } 3674 3675 if (res == NULL_TREE) 3676 return true; 3677 3678 if (res == chrec_known) 3679 dependence_stats.num_dependence_independent++; 3680 else 3681 dependence_stats.num_dependence_undetermined++; 3682 finalize_ddr_dependent (ddr, res); 3683 return false; 3684} 3685 3686/* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */ 3687 3688static void 3689subscript_dependence_tester (struct data_dependence_relation *ddr, 3690 struct loop *loop_nest) 3691{ 3692 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest)) 3693 dependence_stats.num_dependence_dependent++; 3694 3695 compute_subscript_distance (ddr); 3696 if (build_classic_dist_vector (ddr, loop_nest)) 3697 build_classic_dir_vector (ddr); 3698} 3699 3700/* Returns true when all the access functions of A are affine or 3701 constant with respect to LOOP_NEST. */ 3702 3703static bool 3704access_functions_are_affine_or_constant_p (const struct data_reference *a, 3705 const struct loop *loop_nest) 3706{ 3707 unsigned int i; 3708 vec<tree> fns = DR_ACCESS_FNS (a); 3709 tree t; 3710 3711 FOR_EACH_VEC_ELT (fns, i, t) 3712 if (!evolution_function_is_invariant_p (t, loop_nest->num) 3713 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num)) 3714 return false; 3715 3716 return true; 3717} 3718 3719/* Initializes an equation for an OMEGA problem using the information 3720 contained in the ACCESS_FUN. Returns true when the operation 3721 succeeded. 3722 3723 PB is the omega constraint system. 3724 EQ is the number of the equation to be initialized. 3725 OFFSET is used for shifting the variables names in the constraints: 3726 a constrain is composed of 2 * the number of variables surrounding 3727 dependence accesses. OFFSET is set either to 0 for the first n variables, 3728 then it is set to n. 3729 ACCESS_FUN is expected to be an affine chrec. */ 3730 3731static bool 3732init_omega_eq_with_af (omega_pb pb, unsigned eq, 3733 unsigned int offset, tree access_fun, 3734 struct data_dependence_relation *ddr) 3735{ 3736 switch (TREE_CODE (access_fun)) 3737 { 3738 case POLYNOMIAL_CHREC: 3739 { 3740 tree left = CHREC_LEFT (access_fun); 3741 tree right = CHREC_RIGHT (access_fun); 3742 int var = CHREC_VARIABLE (access_fun); 3743 unsigned var_idx; 3744 3745 if (TREE_CODE (right) != INTEGER_CST) 3746 return false; 3747 3748 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr)); 3749 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right); 3750 3751 /* Compute the innermost loop index. */ 3752 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx); 3753 3754 if (offset == 0) 3755 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1] 3756 += int_cst_value (right); 3757 3758 switch (TREE_CODE (left)) 3759 { 3760 case POLYNOMIAL_CHREC: 3761 return init_omega_eq_with_af (pb, eq, offset, left, ddr); 3762 3763 case INTEGER_CST: 3764 pb->eqs[eq].coef[0] += int_cst_value (left); 3765 return true; 3766 3767 default: 3768 return false; 3769 } 3770 } 3771 3772 case INTEGER_CST: 3773 pb->eqs[eq].coef[0] += int_cst_value (access_fun); 3774 return true; 3775 3776 default: 3777 return false; 3778 } 3779} 3780 3781/* As explained in the comments preceding init_omega_for_ddr, we have 3782 to set up a system for each loop level, setting outer loops 3783 variation to zero, and current loop variation to positive or zero. 3784 Save each lexico positive distance vector. */ 3785 3786static void 3787omega_extract_distance_vectors (omega_pb pb, 3788 struct data_dependence_relation *ddr) 3789{ 3790 int eq, geq; 3791 unsigned i, j; 3792 struct loop *loopi, *loopj; 3793 enum omega_result res; 3794 3795 /* Set a new problem for each loop in the nest. The basis is the 3796 problem that we have initialized until now. On top of this we 3797 add new constraints. */ 3798 for (i = 0; i <= DDR_INNER_LOOP (ddr) 3799 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++) 3800 { 3801 int dist = 0; 3802 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), 3803 DDR_NB_LOOPS (ddr)); 3804 3805 omega_copy_problem (copy, pb); 3806 3807 /* For all the outer loops "loop_j", add "dj = 0". */ 3808 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++) 3809 { 3810 eq = omega_add_zero_eq (copy, omega_black); 3811 copy->eqs[eq].coef[j + 1] = 1; 3812 } 3813 3814 /* For "loop_i", add "0 <= di". */ 3815 geq = omega_add_zero_geq (copy, omega_black); 3816 copy->geqs[geq].coef[i + 1] = 1; 3817 3818 /* Reduce the constraint system, and test that the current 3819 problem is feasible. */ 3820 res = omega_simplify_problem (copy); 3821 if (res == omega_false 3822 || res == omega_unknown 3823 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr)) 3824 goto next_problem; 3825 3826 for (eq = 0; eq < copy->num_subs; eq++) 3827 if (copy->subs[eq].key == (int) i + 1) 3828 { 3829 dist = copy->subs[eq].coef[0]; 3830 goto found_dist; 3831 } 3832 3833 if (dist == 0) 3834 { 3835 /* Reinitialize problem... */ 3836 omega_copy_problem (copy, pb); 3837 for (j = 0; j < i && DDR_LOOP_NEST (ddr).iterate (j, &loopj); j++) 3838 { 3839 eq = omega_add_zero_eq (copy, omega_black); 3840 copy->eqs[eq].coef[j + 1] = 1; 3841 } 3842 3843 /* ..., but this time "di = 1". */ 3844 eq = omega_add_zero_eq (copy, omega_black); 3845 copy->eqs[eq].coef[i + 1] = 1; 3846 copy->eqs[eq].coef[0] = -1; 3847 3848 res = omega_simplify_problem (copy); 3849 if (res == omega_false 3850 || res == omega_unknown 3851 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr)) 3852 goto next_problem; 3853 3854 for (eq = 0; eq < copy->num_subs; eq++) 3855 if (copy->subs[eq].key == (int) i + 1) 3856 { 3857 dist = copy->subs[eq].coef[0]; 3858 goto found_dist; 3859 } 3860 } 3861 3862 found_dist:; 3863 /* Save the lexicographically positive distance vector. */ 3864 if (dist >= 0) 3865 { 3866 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3867 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 3868 3869 dist_v[i] = dist; 3870 3871 for (eq = 0; eq < copy->num_subs; eq++) 3872 if (copy->subs[eq].key > 0) 3873 { 3874 dist = copy->subs[eq].coef[0]; 3875 dist_v[copy->subs[eq].key - 1] = dist; 3876 } 3877 3878 for (j = 0; j < DDR_NB_LOOPS (ddr); j++) 3879 dir_v[j] = dir_from_dist (dist_v[j]); 3880 3881 save_dist_v (ddr, dist_v); 3882 save_dir_v (ddr, dir_v); 3883 } 3884 3885 next_problem:; 3886 omega_free_problem (copy); 3887 } 3888} 3889 3890/* This is called for each subscript of a tuple of data references: 3891 insert an equality for representing the conflicts. */ 3892 3893static bool 3894omega_setup_subscript (tree access_fun_a, tree access_fun_b, 3895 struct data_dependence_relation *ddr, 3896 omega_pb pb, bool *maybe_dependent) 3897{ 3898 int eq; 3899 tree type = signed_type_for_types (TREE_TYPE (access_fun_a), 3900 TREE_TYPE (access_fun_b)); 3901 tree fun_a = chrec_convert (type, access_fun_a, NULL); 3902 tree fun_b = chrec_convert (type, access_fun_b, NULL); 3903 tree difference = chrec_fold_minus (type, fun_a, fun_b); 3904 tree minus_one; 3905 3906 /* When the fun_a - fun_b is not constant, the dependence is not 3907 captured by the classic distance vector representation. */ 3908 if (TREE_CODE (difference) != INTEGER_CST) 3909 return false; 3910 3911 /* ZIV test. */ 3912 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference)) 3913 { 3914 /* There is no dependence. */ 3915 *maybe_dependent = false; 3916 return true; 3917 } 3918 3919 minus_one = build_int_cst (type, -1); 3920 fun_b = chrec_fold_multiply (type, fun_b, minus_one); 3921 3922 eq = omega_add_zero_eq (pb, omega_black); 3923 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr) 3924 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr)) 3925 /* There is probably a dependence, but the system of 3926 constraints cannot be built: answer "don't know". */ 3927 return false; 3928 3929 /* GCD test. */ 3930 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0] 3931 && !int_divides_p (lambda_vector_gcd 3932 ((lambda_vector) &(pb->eqs[eq].coef[1]), 3933 2 * DDR_NB_LOOPS (ddr)), 3934 pb->eqs[eq].coef[0])) 3935 { 3936 /* There is no dependence. */ 3937 *maybe_dependent = false; 3938 return true; 3939 } 3940 3941 return true; 3942} 3943 3944/* Helper function, same as init_omega_for_ddr but specialized for 3945 data references A and B. */ 3946 3947static bool 3948init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb, 3949 struct data_dependence_relation *ddr, 3950 omega_pb pb, bool *maybe_dependent) 3951{ 3952 unsigned i; 3953 int ineq; 3954 struct loop *loopi; 3955 unsigned nb_loops = DDR_NB_LOOPS (ddr); 3956 3957 /* Insert an equality per subscript. */ 3958 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++) 3959 { 3960 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i), 3961 ddr, pb, maybe_dependent)) 3962 return false; 3963 else if (*maybe_dependent == false) 3964 { 3965 /* There is no dependence. */ 3966 DDR_ARE_DEPENDENT (ddr) = chrec_known; 3967 return true; 3968 } 3969 } 3970 3971 /* Insert inequalities: constraints corresponding to the iteration 3972 domain, i.e. the loops surrounding the references "loop_x" and 3973 the distance variables "dx". The layout of the OMEGA 3974 representation is as follows: 3975 - coef[0] is the constant 3976 - coef[1..nb_loops] are the protected variables that will not be 3977 removed by the solver: the "dx" 3978 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x". 3979 */ 3980 for (i = 0; i <= DDR_INNER_LOOP (ddr) 3981 && DDR_LOOP_NEST (ddr).iterate (i, &loopi); i++) 3982 { 3983 HOST_WIDE_INT nbi = max_stmt_executions_int (loopi); 3984 3985 /* 0 <= loop_x */ 3986 ineq = omega_add_zero_geq (pb, omega_black); 3987 pb->geqs[ineq].coef[i + nb_loops + 1] = 1; 3988 3989 /* 0 <= loop_x + dx */ 3990 ineq = omega_add_zero_geq (pb, omega_black); 3991 pb->geqs[ineq].coef[i + nb_loops + 1] = 1; 3992 pb->geqs[ineq].coef[i + 1] = 1; 3993 3994 if (nbi != -1) 3995 { 3996 /* loop_x <= nb_iters */ 3997 ineq = omega_add_zero_geq (pb, omega_black); 3998 pb->geqs[ineq].coef[i + nb_loops + 1] = -1; 3999 pb->geqs[ineq].coef[0] = nbi; 4000 4001 /* loop_x + dx <= nb_iters */ 4002 ineq = omega_add_zero_geq (pb, omega_black); 4003 pb->geqs[ineq].coef[i + nb_loops + 1] = -1; 4004 pb->geqs[ineq].coef[i + 1] = -1; 4005 pb->geqs[ineq].coef[0] = nbi; 4006 4007 /* A step "dx" bigger than nb_iters is not feasible, so 4008 add "0 <= nb_iters + dx", */ 4009 ineq = omega_add_zero_geq (pb, omega_black); 4010 pb->geqs[ineq].coef[i + 1] = 1; 4011 pb->geqs[ineq].coef[0] = nbi; 4012 /* and "dx <= nb_iters". */ 4013 ineq = omega_add_zero_geq (pb, omega_black); 4014 pb->geqs[ineq].coef[i + 1] = -1; 4015 pb->geqs[ineq].coef[0] = nbi; 4016 } 4017 } 4018 4019 omega_extract_distance_vectors (pb, ddr); 4020 4021 return true; 4022} 4023 4024/* Sets up the Omega dependence problem for the data dependence 4025 relation DDR. Returns false when the constraint system cannot be 4026 built, ie. when the test answers "don't know". Returns true 4027 otherwise, and when independence has been proved (using one of the 4028 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise 4029 set MAYBE_DEPENDENT to true. 4030 4031 Example: for setting up the dependence system corresponding to the 4032 conflicting accesses 4033 4034 | loop_i 4035 | loop_j 4036 | A[i, i+1] = ... 4037 | ... A[2*j, 2*(i + j)] 4038 | endloop_j 4039 | endloop_i 4040 4041 the following constraints come from the iteration domain: 4042 4043 0 <= i <= Ni 4044 0 <= i + di <= Ni 4045 0 <= j <= Nj 4046 0 <= j + dj <= Nj 4047 4048 where di, dj are the distance variables. The constraints 4049 representing the conflicting elements are: 4050 4051 i = 2 * (j + dj) 4052 i + 1 = 2 * (i + di + j + dj) 4053 4054 For asking that the resulting distance vector (di, dj) be 4055 lexicographically positive, we insert the constraint "di >= 0". If 4056 "di = 0" in the solution, we fix that component to zero, and we 4057 look at the inner loops: we set a new problem where all the outer 4058 loop distances are zero, and fix this inner component to be 4059 positive. When one of the components is positive, we save that 4060 distance, and set a new problem where the distance on this loop is 4061 zero, searching for other distances in the inner loops. Here is 4062 the classic example that illustrates that we have to set for each 4063 inner loop a new problem: 4064 4065 | loop_1 4066 | loop_2 4067 | A[10] 4068 | endloop_2 4069 | endloop_1 4070 4071 we have to save two distances (1, 0) and (0, 1). 4072 4073 Given two array references, refA and refB, we have to set the 4074 dependence problem twice, refA vs. refB and refB vs. refA, and we 4075 cannot do a single test, as refB might occur before refA in the 4076 inner loops, and the contrary when considering outer loops: ex. 4077 4078 | loop_0 4079 | loop_1 4080 | loop_2 4081 | T[{1,+,1}_2][{1,+,1}_1] // refA 4082 | T[{2,+,1}_2][{0,+,1}_1] // refB 4083 | endloop_2 4084 | endloop_1 4085 | endloop_0 4086 4087 refB touches the elements in T before refA, and thus for the same 4088 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1) 4089 but for successive loop_0 iterations, we have (1, -1, 1) 4090 4091 The Omega solver expects the distance variables ("di" in the 4092 previous example) to come first in the constraint system (as 4093 variables to be protected, or "safe" variables), the constraint 4094 system is built using the following layout: 4095 4096 "cst | distance vars | index vars". 4097*/ 4098 4099static bool 4100init_omega_for_ddr (struct data_dependence_relation *ddr, 4101 bool *maybe_dependent) 4102{ 4103 omega_pb pb; 4104 bool res = false; 4105 4106 *maybe_dependent = true; 4107 4108 if (same_access_functions (ddr)) 4109 { 4110 unsigned j; 4111 lambda_vector dir_v; 4112 4113 /* Save the 0 vector. */ 4114 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr))); 4115 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr)); 4116 for (j = 0; j < DDR_NB_LOOPS (ddr); j++) 4117 dir_v[j] = dir_equal; 4118 save_dir_v (ddr, dir_v); 4119 4120 /* Save the dependences carried by outer loops. */ 4121 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr)); 4122 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb, 4123 maybe_dependent); 4124 omega_free_problem (pb); 4125 return res; 4126 } 4127 4128 /* Omega expects the protected variables (those that have to be kept 4129 after elimination) to appear first in the constraint system. 4130 These variables are the distance variables. In the following 4131 initialization we declare NB_LOOPS safe variables, and the total 4132 number of variables for the constraint system is 2*NB_LOOPS. */ 4133 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr)); 4134 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb, 4135 maybe_dependent); 4136 omega_free_problem (pb); 4137 4138 /* Stop computation if not decidable, or no dependence. */ 4139 if (res == false || *maybe_dependent == false) 4140 return res; 4141 4142 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr)); 4143 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb, 4144 maybe_dependent); 4145 omega_free_problem (pb); 4146 4147 return res; 4148} 4149 4150/* Return true when DDR contains the same information as that stored 4151 in DIR_VECTS and in DIST_VECTS, return false otherwise. */ 4152 4153static bool 4154ddr_consistent_p (FILE *file, 4155 struct data_dependence_relation *ddr, 4156 vec<lambda_vector> dist_vects, 4157 vec<lambda_vector> dir_vects) 4158{ 4159 unsigned int i, j; 4160 4161 /* If dump_file is set, output there. */ 4162 if (dump_file && (dump_flags & TDF_DETAILS)) 4163 file = dump_file; 4164 4165 if (dist_vects.length () != DDR_NUM_DIST_VECTS (ddr)) 4166 { 4167 lambda_vector b_dist_v; 4168 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n", 4169 dist_vects.length (), 4170 DDR_NUM_DIST_VECTS (ddr)); 4171 4172 fprintf (file, "Banerjee dist vectors:\n"); 4173 FOR_EACH_VEC_ELT (dist_vects, i, b_dist_v) 4174 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr)); 4175 4176 fprintf (file, "Omega dist vectors:\n"); 4177 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) 4178 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr)); 4179 4180 fprintf (file, "data dependence relation:\n"); 4181 dump_data_dependence_relation (file, ddr); 4182 4183 fprintf (file, ")\n"); 4184 return false; 4185 } 4186 4187 if (dir_vects.length () != DDR_NUM_DIR_VECTS (ddr)) 4188 { 4189 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n", 4190 dir_vects.length (), 4191 DDR_NUM_DIR_VECTS (ddr)); 4192 return false; 4193 } 4194 4195 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++) 4196 { 4197 lambda_vector a_dist_v; 4198 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i); 4199 4200 /* Distance vectors are not ordered in the same way in the DDR 4201 and in the DIST_VECTS: search for a matching vector. */ 4202 FOR_EACH_VEC_ELT (dist_vects, j, a_dist_v) 4203 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr))) 4204 break; 4205 4206 if (j == dist_vects.length ()) 4207 { 4208 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n"); 4209 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr)); 4210 fprintf (file, "not found in Omega dist vectors:\n"); 4211 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr)); 4212 fprintf (file, "data dependence relation:\n"); 4213 dump_data_dependence_relation (file, ddr); 4214 fprintf (file, ")\n"); 4215 } 4216 } 4217 4218 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++) 4219 { 4220 lambda_vector a_dir_v; 4221 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i); 4222 4223 /* Direction vectors are not ordered in the same way in the DDR 4224 and in the DIR_VECTS: search for a matching vector. */ 4225 FOR_EACH_VEC_ELT (dir_vects, j, a_dir_v) 4226 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr))) 4227 break; 4228 4229 if (j == dist_vects.length ()) 4230 { 4231 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n"); 4232 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr)); 4233 fprintf (file, "not found in Omega dir vectors:\n"); 4234 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr)); 4235 fprintf (file, "data dependence relation:\n"); 4236 dump_data_dependence_relation (file, ddr); 4237 fprintf (file, ")\n"); 4238 } 4239 } 4240 4241 return true; 4242} 4243 4244/* This computes the affine dependence relation between A and B with 4245 respect to LOOP_NEST. CHREC_KNOWN is used for representing the 4246 independence between two accesses, while CHREC_DONT_KNOW is used 4247 for representing the unknown relation. 4248 4249 Note that it is possible to stop the computation of the dependence 4250 relation the first time we detect a CHREC_KNOWN element for a given 4251 subscript. */ 4252 4253void 4254compute_affine_dependence (struct data_dependence_relation *ddr, 4255 struct loop *loop_nest) 4256{ 4257 struct data_reference *dra = DDR_A (ddr); 4258 struct data_reference *drb = DDR_B (ddr); 4259 4260 if (dump_file && (dump_flags & TDF_DETAILS)) 4261 { 4262 fprintf (dump_file, "(compute_affine_dependence\n"); 4263 fprintf (dump_file, " stmt_a: "); 4264 print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM); 4265 fprintf (dump_file, " stmt_b: "); 4266 print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM); 4267 } 4268 4269 /* Analyze only when the dependence relation is not yet known. */ 4270 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 4271 { 4272 dependence_stats.num_dependence_tests++; 4273 4274 if (access_functions_are_affine_or_constant_p (dra, loop_nest) 4275 && access_functions_are_affine_or_constant_p (drb, loop_nest)) 4276 { 4277 subscript_dependence_tester (ddr, loop_nest); 4278 4279 if (flag_check_data_deps) 4280 { 4281 /* Dump the dependences from the first algorithm. */ 4282 if (dump_file && (dump_flags & TDF_DETAILS)) 4283 { 4284 fprintf (dump_file, "\n\nBanerjee Analyzer\n"); 4285 dump_data_dependence_relation (dump_file, ddr); 4286 } 4287 4288 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) 4289 { 4290 bool maybe_dependent; 4291 vec<lambda_vector> dir_vects, dist_vects; 4292 4293 /* Save the result of the first DD analyzer. */ 4294 dist_vects = DDR_DIST_VECTS (ddr); 4295 dir_vects = DDR_DIR_VECTS (ddr); 4296 4297 /* Reset the information. */ 4298 DDR_DIST_VECTS (ddr).create (0); 4299 DDR_DIR_VECTS (ddr).create (0); 4300 4301 /* Compute the same information using Omega. */ 4302 if (!init_omega_for_ddr (ddr, &maybe_dependent)) 4303 goto csys_dont_know; 4304 4305 if (dump_file && (dump_flags & TDF_DETAILS)) 4306 { 4307 fprintf (dump_file, "Omega Analyzer\n"); 4308 dump_data_dependence_relation (dump_file, ddr); 4309 } 4310 4311 /* Check that we get the same information. */ 4312 if (maybe_dependent) 4313 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects, 4314 dir_vects)); 4315 } 4316 } 4317 } 4318 4319 /* As a last case, if the dependence cannot be determined, or if 4320 the dependence is considered too difficult to determine, answer 4321 "don't know". */ 4322 else 4323 { 4324 csys_dont_know:; 4325 dependence_stats.num_dependence_undetermined++; 4326 4327 if (dump_file && (dump_flags & TDF_DETAILS)) 4328 { 4329 fprintf (dump_file, "Data ref a:\n"); 4330 dump_data_reference (dump_file, dra); 4331 fprintf (dump_file, "Data ref b:\n"); 4332 dump_data_reference (dump_file, drb); 4333 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n"); 4334 } 4335 finalize_ddr_dependent (ddr, chrec_dont_know); 4336 } 4337 } 4338 4339 if (dump_file && (dump_flags & TDF_DETAILS)) 4340 { 4341 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 4342 fprintf (dump_file, ") -> no dependence\n"); 4343 else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) 4344 fprintf (dump_file, ") -> dependence analysis failed\n"); 4345 else 4346 fprintf (dump_file, ")\n"); 4347 } 4348} 4349 4350/* Compute in DEPENDENCE_RELATIONS the data dependence graph for all 4351 the data references in DATAREFS, in the LOOP_NEST. When 4352 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self 4353 relations. Return true when successful, i.e. data references number 4354 is small enough to be handled. */ 4355 4356bool 4357compute_all_dependences (vec<data_reference_p> datarefs, 4358 vec<ddr_p> *dependence_relations, 4359 vec<loop_p> loop_nest, 4360 bool compute_self_and_rr) 4361{ 4362 struct data_dependence_relation *ddr; 4363 struct data_reference *a, *b; 4364 unsigned int i, j; 4365 4366 if ((int) datarefs.length () 4367 > PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS)) 4368 { 4369 struct data_dependence_relation *ddr; 4370 4371 /* Insert a single relation into dependence_relations: 4372 chrec_dont_know. */ 4373 ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest); 4374 dependence_relations->safe_push (ddr); 4375 return false; 4376 } 4377 4378 FOR_EACH_VEC_ELT (datarefs, i, a) 4379 for (j = i + 1; datarefs.iterate (j, &b); j++) 4380 if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr) 4381 { 4382 ddr = initialize_data_dependence_relation (a, b, loop_nest); 4383 dependence_relations->safe_push (ddr); 4384 if (loop_nest.exists ()) 4385 compute_affine_dependence (ddr, loop_nest[0]); 4386 } 4387 4388 if (compute_self_and_rr) 4389 FOR_EACH_VEC_ELT (datarefs, i, a) 4390 { 4391 ddr = initialize_data_dependence_relation (a, a, loop_nest); 4392 dependence_relations->safe_push (ddr); 4393 if (loop_nest.exists ()) 4394 compute_affine_dependence (ddr, loop_nest[0]); 4395 } 4396 4397 return true; 4398} 4399 4400/* Describes a location of a memory reference. */ 4401 4402typedef struct data_ref_loc_d 4403{ 4404 /* The memory reference. */ 4405 tree ref; 4406 4407 /* True if the memory reference is read. */ 4408 bool is_read; 4409} data_ref_loc; 4410 4411 4412/* Stores the locations of memory references in STMT to REFERENCES. Returns 4413 true if STMT clobbers memory, false otherwise. */ 4414 4415static bool 4416get_references_in_stmt (gimple stmt, vec<data_ref_loc, va_heap> *references) 4417{ 4418 bool clobbers_memory = false; 4419 data_ref_loc ref; 4420 tree op0, op1; 4421 enum gimple_code stmt_code = gimple_code (stmt); 4422 4423 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects. 4424 As we cannot model data-references to not spelled out 4425 accesses give up if they may occur. */ 4426 if (stmt_code == GIMPLE_CALL 4427 && !(gimple_call_flags (stmt) & ECF_CONST)) 4428 { 4429 /* Allow IFN_GOMP_SIMD_LANE in their own loops. */ 4430 if (gimple_call_internal_p (stmt)) 4431 switch (gimple_call_internal_fn (stmt)) 4432 { 4433 case IFN_GOMP_SIMD_LANE: 4434 { 4435 struct loop *loop = gimple_bb (stmt)->loop_father; 4436 tree uid = gimple_call_arg (stmt, 0); 4437 gcc_assert (TREE_CODE (uid) == SSA_NAME); 4438 if (loop == NULL 4439 || loop->simduid != SSA_NAME_VAR (uid)) 4440 clobbers_memory = true; 4441 break; 4442 } 4443 case IFN_MASK_LOAD: 4444 case IFN_MASK_STORE: 4445 break; 4446 default: 4447 clobbers_memory = true; 4448 break; 4449 } 4450 else 4451 clobbers_memory = true; 4452 } 4453 else if (stmt_code == GIMPLE_ASM 4454 && (gimple_asm_volatile_p (as_a <gasm *> (stmt)) 4455 || gimple_vuse (stmt))) 4456 clobbers_memory = true; 4457 4458 if (!gimple_vuse (stmt)) 4459 return clobbers_memory; 4460 4461 if (stmt_code == GIMPLE_ASSIGN) 4462 { 4463 tree base; 4464 op0 = gimple_assign_lhs (stmt); 4465 op1 = gimple_assign_rhs1 (stmt); 4466 4467 if (DECL_P (op1) 4468 || (REFERENCE_CLASS_P (op1) 4469 && (base = get_base_address (op1)) 4470 && TREE_CODE (base) != SSA_NAME)) 4471 { 4472 ref.ref = op1; 4473 ref.is_read = true; 4474 references->safe_push (ref); 4475 } 4476 } 4477 else if (stmt_code == GIMPLE_CALL) 4478 { 4479 unsigned i, n; 4480 4481 ref.is_read = false; 4482 if (gimple_call_internal_p (stmt)) 4483 switch (gimple_call_internal_fn (stmt)) 4484 { 4485 case IFN_MASK_LOAD: 4486 if (gimple_call_lhs (stmt) == NULL_TREE) 4487 break; 4488 ref.is_read = true; 4489 case IFN_MASK_STORE: 4490 ref.ref = fold_build2 (MEM_REF, 4491 ref.is_read 4492 ? TREE_TYPE (gimple_call_lhs (stmt)) 4493 : TREE_TYPE (gimple_call_arg (stmt, 3)), 4494 gimple_call_arg (stmt, 0), 4495 gimple_call_arg (stmt, 1)); 4496 references->safe_push (ref); 4497 return false; 4498 default: 4499 break; 4500 } 4501 4502 op0 = gimple_call_lhs (stmt); 4503 n = gimple_call_num_args (stmt); 4504 for (i = 0; i < n; i++) 4505 { 4506 op1 = gimple_call_arg (stmt, i); 4507 4508 if (DECL_P (op1) 4509 || (REFERENCE_CLASS_P (op1) && get_base_address (op1))) 4510 { 4511 ref.ref = op1; 4512 ref.is_read = true; 4513 references->safe_push (ref); 4514 } 4515 } 4516 } 4517 else 4518 return clobbers_memory; 4519 4520 if (op0 4521 && (DECL_P (op0) 4522 || (REFERENCE_CLASS_P (op0) && get_base_address (op0)))) 4523 { 4524 ref.ref = op0; 4525 ref.is_read = false; 4526 references->safe_push (ref); 4527 } 4528 return clobbers_memory; 4529} 4530 4531/* Stores the data references in STMT to DATAREFS. If there is an unanalyzable 4532 reference, returns false, otherwise returns true. NEST is the outermost 4533 loop of the loop nest in which the references should be analyzed. */ 4534 4535bool 4536find_data_references_in_stmt (struct loop *nest, gimple stmt, 4537 vec<data_reference_p> *datarefs) 4538{ 4539 unsigned i; 4540 auto_vec<data_ref_loc, 2> references; 4541 data_ref_loc *ref; 4542 bool ret = true; 4543 data_reference_p dr; 4544 4545 if (get_references_in_stmt (stmt, &references)) 4546 return false; 4547 4548 FOR_EACH_VEC_ELT (references, i, ref) 4549 { 4550 dr = create_data_ref (nest, loop_containing_stmt (stmt), 4551 ref->ref, stmt, ref->is_read); 4552 gcc_assert (dr != NULL); 4553 datarefs->safe_push (dr); 4554 } 4555 references.release (); 4556 return ret; 4557} 4558 4559/* Stores the data references in STMT to DATAREFS. If there is an 4560 unanalyzable reference, returns false, otherwise returns true. 4561 NEST is the outermost loop of the loop nest in which the references 4562 should be instantiated, LOOP is the loop in which the references 4563 should be analyzed. */ 4564 4565bool 4566graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple stmt, 4567 vec<data_reference_p> *datarefs) 4568{ 4569 unsigned i; 4570 auto_vec<data_ref_loc, 2> references; 4571 data_ref_loc *ref; 4572 bool ret = true; 4573 data_reference_p dr; 4574 4575 if (get_references_in_stmt (stmt, &references)) 4576 return false; 4577 4578 FOR_EACH_VEC_ELT (references, i, ref) 4579 { 4580 dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read); 4581 gcc_assert (dr != NULL); 4582 datarefs->safe_push (dr); 4583 } 4584 4585 references.release (); 4586 return ret; 4587} 4588 4589/* Search the data references in LOOP, and record the information into 4590 DATAREFS. Returns chrec_dont_know when failing to analyze a 4591 difficult case, returns NULL_TREE otherwise. */ 4592 4593tree 4594find_data_references_in_bb (struct loop *loop, basic_block bb, 4595 vec<data_reference_p> *datarefs) 4596{ 4597 gimple_stmt_iterator bsi; 4598 4599 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) 4600 { 4601 gimple stmt = gsi_stmt (bsi); 4602 4603 if (!find_data_references_in_stmt (loop, stmt, datarefs)) 4604 { 4605 struct data_reference *res; 4606 res = XCNEW (struct data_reference); 4607 datarefs->safe_push (res); 4608 4609 return chrec_dont_know; 4610 } 4611 } 4612 4613 return NULL_TREE; 4614} 4615 4616/* Search the data references in LOOP, and record the information into 4617 DATAREFS. Returns chrec_dont_know when failing to analyze a 4618 difficult case, returns NULL_TREE otherwise. 4619 4620 TODO: This function should be made smarter so that it can handle address 4621 arithmetic as if they were array accesses, etc. */ 4622 4623tree 4624find_data_references_in_loop (struct loop *loop, 4625 vec<data_reference_p> *datarefs) 4626{ 4627 basic_block bb, *bbs; 4628 unsigned int i; 4629 4630 bbs = get_loop_body_in_dom_order (loop); 4631 4632 for (i = 0; i < loop->num_nodes; i++) 4633 { 4634 bb = bbs[i]; 4635 4636 if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know) 4637 { 4638 free (bbs); 4639 return chrec_dont_know; 4640 } 4641 } 4642 free (bbs); 4643 4644 return NULL_TREE; 4645} 4646 4647/* Recursive helper function. */ 4648 4649static bool 4650find_loop_nest_1 (struct loop *loop, vec<loop_p> *loop_nest) 4651{ 4652 /* Inner loops of the nest should not contain siblings. Example: 4653 when there are two consecutive loops, 4654 4655 | loop_0 4656 | loop_1 4657 | A[{0, +, 1}_1] 4658 | endloop_1 4659 | loop_2 4660 | A[{0, +, 1}_2] 4661 | endloop_2 4662 | endloop_0 4663 4664 the dependence relation cannot be captured by the distance 4665 abstraction. */ 4666 if (loop->next) 4667 return false; 4668 4669 loop_nest->safe_push (loop); 4670 if (loop->inner) 4671 return find_loop_nest_1 (loop->inner, loop_nest); 4672 return true; 4673} 4674 4675/* Return false when the LOOP is not well nested. Otherwise return 4676 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will 4677 contain the loops from the outermost to the innermost, as they will 4678 appear in the classic distance vector. */ 4679 4680bool 4681find_loop_nest (struct loop *loop, vec<loop_p> *loop_nest) 4682{ 4683 loop_nest->safe_push (loop); 4684 if (loop->inner) 4685 return find_loop_nest_1 (loop->inner, loop_nest); 4686 return true; 4687} 4688 4689/* Returns true when the data dependences have been computed, false otherwise. 4690 Given a loop nest LOOP, the following vectors are returned: 4691 DATAREFS is initialized to all the array elements contained in this loop, 4692 DEPENDENCE_RELATIONS contains the relations between the data references. 4693 Compute read-read and self relations if 4694 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */ 4695 4696bool 4697compute_data_dependences_for_loop (struct loop *loop, 4698 bool compute_self_and_read_read_dependences, 4699 vec<loop_p> *loop_nest, 4700 vec<data_reference_p> *datarefs, 4701 vec<ddr_p> *dependence_relations) 4702{ 4703 bool res = true; 4704 4705 memset (&dependence_stats, 0, sizeof (dependence_stats)); 4706 4707 /* If the loop nest is not well formed, or one of the data references 4708 is not computable, give up without spending time to compute other 4709 dependences. */ 4710 if (!loop 4711 || !find_loop_nest (loop, loop_nest) 4712 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know 4713 || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest, 4714 compute_self_and_read_read_dependences)) 4715 res = false; 4716 4717 if (dump_file && (dump_flags & TDF_STATS)) 4718 { 4719 fprintf (dump_file, "Dependence tester statistics:\n"); 4720 4721 fprintf (dump_file, "Number of dependence tests: %d\n", 4722 dependence_stats.num_dependence_tests); 4723 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n", 4724 dependence_stats.num_dependence_dependent); 4725 fprintf (dump_file, "Number of dependence tests classified independent: %d\n", 4726 dependence_stats.num_dependence_independent); 4727 fprintf (dump_file, "Number of undetermined dependence tests: %d\n", 4728 dependence_stats.num_dependence_undetermined); 4729 4730 fprintf (dump_file, "Number of subscript tests: %d\n", 4731 dependence_stats.num_subscript_tests); 4732 fprintf (dump_file, "Number of undetermined subscript tests: %d\n", 4733 dependence_stats.num_subscript_undetermined); 4734 fprintf (dump_file, "Number of same subscript function: %d\n", 4735 dependence_stats.num_same_subscript_function); 4736 4737 fprintf (dump_file, "Number of ziv tests: %d\n", 4738 dependence_stats.num_ziv); 4739 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n", 4740 dependence_stats.num_ziv_dependent); 4741 fprintf (dump_file, "Number of ziv tests returning independent: %d\n", 4742 dependence_stats.num_ziv_independent); 4743 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n", 4744 dependence_stats.num_ziv_unimplemented); 4745 4746 fprintf (dump_file, "Number of siv tests: %d\n", 4747 dependence_stats.num_siv); 4748 fprintf (dump_file, "Number of siv tests returning dependent: %d\n", 4749 dependence_stats.num_siv_dependent); 4750 fprintf (dump_file, "Number of siv tests returning independent: %d\n", 4751 dependence_stats.num_siv_independent); 4752 fprintf (dump_file, "Number of siv tests unimplemented: %d\n", 4753 dependence_stats.num_siv_unimplemented); 4754 4755 fprintf (dump_file, "Number of miv tests: %d\n", 4756 dependence_stats.num_miv); 4757 fprintf (dump_file, "Number of miv tests returning dependent: %d\n", 4758 dependence_stats.num_miv_dependent); 4759 fprintf (dump_file, "Number of miv tests returning independent: %d\n", 4760 dependence_stats.num_miv_independent); 4761 fprintf (dump_file, "Number of miv tests unimplemented: %d\n", 4762 dependence_stats.num_miv_unimplemented); 4763 } 4764 4765 return res; 4766} 4767 4768/* Returns true when the data dependences for the basic block BB have been 4769 computed, false otherwise. 4770 DATAREFS is initialized to all the array elements contained in this basic 4771 block, DEPENDENCE_RELATIONS contains the relations between the data 4772 references. Compute read-read and self relations if 4773 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */ 4774bool 4775compute_data_dependences_for_bb (basic_block bb, 4776 bool compute_self_and_read_read_dependences, 4777 vec<data_reference_p> *datarefs, 4778 vec<ddr_p> *dependence_relations) 4779{ 4780 if (find_data_references_in_bb (NULL, bb, datarefs) == chrec_dont_know) 4781 return false; 4782 4783 return compute_all_dependences (*datarefs, dependence_relations, vNULL, 4784 compute_self_and_read_read_dependences); 4785} 4786 4787/* Entry point (for testing only). Analyze all the data references 4788 and the dependence relations in LOOP. 4789 4790 The data references are computed first. 4791 4792 A relation on these nodes is represented by a complete graph. Some 4793 of the relations could be of no interest, thus the relations can be 4794 computed on demand. 4795 4796 In the following function we compute all the relations. This is 4797 just a first implementation that is here for: 4798 - for showing how to ask for the dependence relations, 4799 - for the debugging the whole dependence graph, 4800 - for the dejagnu testcases and maintenance. 4801 4802 It is possible to ask only for a part of the graph, avoiding to 4803 compute the whole dependence graph. The computed dependences are 4804 stored in a knowledge base (KB) such that later queries don't 4805 recompute the same information. The implementation of this KB is 4806 transparent to the optimizer, and thus the KB can be changed with a 4807 more efficient implementation, or the KB could be disabled. */ 4808static void 4809analyze_all_data_dependences (struct loop *loop) 4810{ 4811 unsigned int i; 4812 int nb_data_refs = 10; 4813 vec<data_reference_p> datarefs; 4814 datarefs.create (nb_data_refs); 4815 vec<ddr_p> dependence_relations; 4816 dependence_relations.create (nb_data_refs * nb_data_refs); 4817 vec<loop_p> loop_nest; 4818 loop_nest.create (3); 4819 4820 /* Compute DDs on the whole function. */ 4821 compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs, 4822 &dependence_relations); 4823 4824 if (dump_file) 4825 { 4826 dump_data_dependence_relations (dump_file, dependence_relations); 4827 fprintf (dump_file, "\n\n"); 4828 4829 if (dump_flags & TDF_DETAILS) 4830 dump_dist_dir_vectors (dump_file, dependence_relations); 4831 4832 if (dump_flags & TDF_STATS) 4833 { 4834 unsigned nb_top_relations = 0; 4835 unsigned nb_bot_relations = 0; 4836 unsigned nb_chrec_relations = 0; 4837 struct data_dependence_relation *ddr; 4838 4839 FOR_EACH_VEC_ELT (dependence_relations, i, ddr) 4840 { 4841 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr))) 4842 nb_top_relations++; 4843 4844 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 4845 nb_bot_relations++; 4846 4847 else 4848 nb_chrec_relations++; 4849 } 4850 4851 gather_stats_on_scev_database (); 4852 } 4853 } 4854 4855 loop_nest.release (); 4856 free_dependence_relations (dependence_relations); 4857 free_data_refs (datarefs); 4858} 4859 4860/* Computes all the data dependences and check that the results of 4861 several analyzers are the same. */ 4862 4863void 4864tree_check_data_deps (void) 4865{ 4866 struct loop *loop_nest; 4867 4868 FOR_EACH_LOOP (loop_nest, 0) 4869 analyze_all_data_dependences (loop_nest); 4870} 4871 4872/* Free the memory used by a data dependence relation DDR. */ 4873 4874void 4875free_dependence_relation (struct data_dependence_relation *ddr) 4876{ 4877 if (ddr == NULL) 4878 return; 4879 4880 if (DDR_SUBSCRIPTS (ddr).exists ()) 4881 free_subscripts (DDR_SUBSCRIPTS (ddr)); 4882 DDR_DIST_VECTS (ddr).release (); 4883 DDR_DIR_VECTS (ddr).release (); 4884 4885 free (ddr); 4886} 4887 4888/* Free the memory used by the data dependence relations from 4889 DEPENDENCE_RELATIONS. */ 4890 4891void 4892free_dependence_relations (vec<ddr_p> dependence_relations) 4893{ 4894 unsigned int i; 4895 struct data_dependence_relation *ddr; 4896 4897 FOR_EACH_VEC_ELT (dependence_relations, i, ddr) 4898 if (ddr) 4899 free_dependence_relation (ddr); 4900 4901 dependence_relations.release (); 4902} 4903 4904/* Free the memory used by the data references from DATAREFS. */ 4905 4906void 4907free_data_refs (vec<data_reference_p> datarefs) 4908{ 4909 unsigned int i; 4910 struct data_reference *dr; 4911 4912 FOR_EACH_VEC_ELT (datarefs, i, dr) 4913 free_data_ref (dr); 4914 datarefs.release (); 4915} 4916