1/* Induction variable optimizations. 2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc. 3 4This file is part of GCC. 5 6GCC is free software; you can redistribute it and/or modify it 7under the terms of the GNU General Public License as published by the 8Free Software Foundation; either version 2, or (at your option) any 9later version. 10 11GCC is distributed in the hope that it will be useful, but WITHOUT 12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14for more details. 15 16You should have received a copy of the GNU General Public License 17along with GCC; see the file COPYING. If not, write to the Free 18Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 1902110-1301, USA. */ 20 21/* This pass tries to find the optimal set of induction variables for the loop. 22 It optimizes just the basic linear induction variables (although adding 23 support for other types should not be too hard). It includes the 24 optimizations commonly known as strength reduction, induction variable 25 coalescing and induction variable elimination. It does it in the 26 following steps: 27 28 1) The interesting uses of induction variables are found. This includes 29 30 -- uses of induction variables in non-linear expressions 31 -- addresses of arrays 32 -- comparisons of induction variables 33 34 2) Candidates for the induction variables are found. This includes 35 36 -- old induction variables 37 -- the variables defined by expressions derived from the "interesting 38 uses" above 39 40 3) The optimal (w.r. to a cost function) set of variables is chosen. The 41 cost function assigns a cost to sets of induction variables and consists 42 of three parts: 43 44 -- The use costs. Each of the interesting uses chooses the best induction 45 variable in the set and adds its cost to the sum. The cost reflects 46 the time spent on modifying the induction variables value to be usable 47 for the given purpose (adding base and offset for arrays, etc.). 48 -- The variable costs. Each of the variables has a cost assigned that 49 reflects the costs associated with incrementing the value of the 50 variable. The original variables are somewhat preferred. 51 -- The set cost. Depending on the size of the set, extra cost may be 52 added to reflect register pressure. 53 54 All the costs are defined in a machine-specific way, using the target 55 hooks and machine descriptions to determine them. 56 57 4) The trees are transformed to use the new variables, the dead code is 58 removed. 59 60 All of this is done loop by loop. Doing it globally is theoretically 61 possible, it might give a better performance and it might enable us 62 to decide costs more precisely, but getting all the interactions right 63 would be complicated. */ 64 65#include "config.h" 66#include "system.h" 67#include "coretypes.h" 68#include "tm.h" 69#include "tree.h" 70#include "rtl.h" 71#include "tm_p.h" 72#include "hard-reg-set.h" 73#include "basic-block.h" 74#include "output.h" 75#include "diagnostic.h" 76#include "tree-flow.h" 77#include "tree-dump.h" 78#include "timevar.h" 79#include "cfgloop.h" 80#include "varray.h" 81#include "expr.h" 82#include "tree-pass.h" 83#include "ggc.h" 84#include "insn-config.h" 85#include "recog.h" 86#include "hashtab.h" 87#include "tree-chrec.h" 88#include "tree-scalar-evolution.h" 89#include "cfgloop.h" 90#include "params.h" 91#include "langhooks.h" 92 93/* The infinite cost. */ 94#define INFTY 10000000 95 96/* The expected number of loop iterations. TODO -- use profiling instead of 97 this. */ 98#define AVG_LOOP_NITER(LOOP) 5 99 100 101/* Representation of the induction variable. */ 102struct iv 103{ 104 tree base; /* Initial value of the iv. */ 105 tree base_object; /* A memory object to that the induction variable points. */ 106 tree step; /* Step of the iv (constant only). */ 107 tree ssa_name; /* The ssa name with the value. */ 108 bool biv_p; /* Is it a biv? */ 109 bool have_use_for; /* Do we already have a use for it? */ 110 unsigned use_id; /* The identifier in the use if it is the case. */ 111}; 112 113/* Per-ssa version information (induction variable descriptions, etc.). */ 114struct version_info 115{ 116 tree name; /* The ssa name. */ 117 struct iv *iv; /* Induction variable description. */ 118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in 119 an expression that is not an induction variable. */ 120 unsigned inv_id; /* Id of an invariant. */ 121 bool preserve_biv; /* For the original biv, whether to preserve it. */ 122}; 123 124/* Information attached to loop. */ 125struct loop_data 126{ 127 unsigned regs_used; /* Number of registers used. */ 128}; 129 130/* Types of uses. */ 131enum use_type 132{ 133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */ 134 USE_OUTER, /* The induction variable is used outside the loop. */ 135 USE_ADDRESS, /* Use in an address. */ 136 USE_COMPARE /* Use is a compare. */ 137}; 138 139/* The candidate - cost pair. */ 140struct cost_pair 141{ 142 struct iv_cand *cand; /* The candidate. */ 143 unsigned cost; /* The cost. */ 144 bitmap depends_on; /* The list of invariants that have to be 145 preserved. */ 146 tree value; /* For final value elimination, the expression for 147 the final value of the iv. For iv elimination, 148 the new bound to compare with. */ 149}; 150 151/* Use. */ 152struct iv_use 153{ 154 unsigned id; /* The id of the use. */ 155 enum use_type type; /* Type of the use. */ 156 struct iv *iv; /* The induction variable it is based on. */ 157 tree stmt; /* Statement in that it occurs. */ 158 tree *op_p; /* The place where it occurs. */ 159 bitmap related_cands; /* The set of "related" iv candidates, plus the common 160 important ones. */ 161 162 unsigned n_map_members; /* Number of candidates in the cost_map list. */ 163 struct cost_pair *cost_map; 164 /* The costs wrto the iv candidates. */ 165 166 struct iv_cand *selected; 167 /* The selected candidate. */ 168}; 169 170/* The position where the iv is computed. */ 171enum iv_position 172{ 173 IP_NORMAL, /* At the end, just before the exit condition. */ 174 IP_END, /* At the end of the latch block. */ 175 IP_ORIGINAL /* The original biv. */ 176}; 177 178/* The induction variable candidate. */ 179struct iv_cand 180{ 181 unsigned id; /* The number of the candidate. */ 182 bool important; /* Whether this is an "important" candidate, i.e. such 183 that it should be considered by all uses. */ 184 enum iv_position pos; /* Where it is computed. */ 185 tree incremented_at; /* For original biv, the statement where it is 186 incremented. */ 187 tree var_before; /* The variable used for it before increment. */ 188 tree var_after; /* The variable used for it after increment. */ 189 struct iv *iv; /* The value of the candidate. NULL for 190 "pseudocandidate" used to indicate the possibility 191 to replace the final value of an iv by direct 192 computation of the value. */ 193 unsigned cost; /* Cost of the candidate. */ 194 bitmap depends_on; /* The list of invariants that are used in step of the 195 biv. */ 196}; 197 198/* The data used by the induction variable optimizations. */ 199 200typedef struct iv_use *iv_use_p; 201DEF_VEC_P(iv_use_p); 202DEF_VEC_ALLOC_P(iv_use_p,heap); 203 204typedef struct iv_cand *iv_cand_p; 205DEF_VEC_P(iv_cand_p); 206DEF_VEC_ALLOC_P(iv_cand_p,heap); 207 208struct ivopts_data 209{ 210 /* The currently optimized loop. */ 211 struct loop *current_loop; 212 213 /* Numbers of iterations for all exits of the current loop. */ 214 htab_t niters; 215 216 /* The size of version_info array allocated. */ 217 unsigned version_info_size; 218 219 /* The array of information for the ssa names. */ 220 struct version_info *version_info; 221 222 /* The bitmap of indices in version_info whose value was changed. */ 223 bitmap relevant; 224 225 /* The maximum invariant id. */ 226 unsigned max_inv_id; 227 228 /* The uses of induction variables. */ 229 VEC(iv_use_p,heap) *iv_uses; 230 231 /* The candidates. */ 232 VEC(iv_cand_p,heap) *iv_candidates; 233 234 /* A bitmap of important candidates. */ 235 bitmap important_candidates; 236 237 /* Whether to consider just related and important candidates when replacing a 238 use. */ 239 bool consider_all_candidates; 240}; 241 242/* An assignment of iv candidates to uses. */ 243 244struct iv_ca 245{ 246 /* The number of uses covered by the assignment. */ 247 unsigned upto; 248 249 /* Number of uses that cannot be expressed by the candidates in the set. */ 250 unsigned bad_uses; 251 252 /* Candidate assigned to a use, together with the related costs. */ 253 struct cost_pair **cand_for_use; 254 255 /* Number of times each candidate is used. */ 256 unsigned *n_cand_uses; 257 258 /* The candidates used. */ 259 bitmap cands; 260 261 /* The number of candidates in the set. */ 262 unsigned n_cands; 263 264 /* Total number of registers needed. */ 265 unsigned n_regs; 266 267 /* Total cost of expressing uses. */ 268 unsigned cand_use_cost; 269 270 /* Total cost of candidates. */ 271 unsigned cand_cost; 272 273 /* Number of times each invariant is used. */ 274 unsigned *n_invariant_uses; 275 276 /* Total cost of the assignment. */ 277 unsigned cost; 278}; 279 280/* Difference of two iv candidate assignments. */ 281 282struct iv_ca_delta 283{ 284 /* Changed use. */ 285 struct iv_use *use; 286 287 /* An old assignment (for rollback purposes). */ 288 struct cost_pair *old_cp; 289 290 /* A new assignment. */ 291 struct cost_pair *new_cp; 292 293 /* Next change in the list. */ 294 struct iv_ca_delta *next_change; 295}; 296 297/* Bound on number of candidates below that all candidates are considered. */ 298 299#define CONSIDER_ALL_CANDIDATES_BOUND \ 300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND)) 301 302/* If there are more iv occurrences, we just give up (it is quite unlikely that 303 optimizing such a loop would help, and it would take ages). */ 304 305#define MAX_CONSIDERED_USES \ 306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES)) 307 308/* If there are at most this number of ivs in the set, try removing unnecessary 309 ivs from the set always. */ 310 311#define ALWAYS_PRUNE_CAND_SET_BOUND \ 312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND)) 313 314/* The list of trees for that the decl_rtl field must be reset is stored 315 here. */ 316 317static VEC(tree,heap) *decl_rtl_to_reset; 318 319/* Number of uses recorded in DATA. */ 320 321static inline unsigned 322n_iv_uses (struct ivopts_data *data) 323{ 324 return VEC_length (iv_use_p, data->iv_uses); 325} 326 327/* Ith use recorded in DATA. */ 328 329static inline struct iv_use * 330iv_use (struct ivopts_data *data, unsigned i) 331{ 332 return VEC_index (iv_use_p, data->iv_uses, i); 333} 334 335/* Number of candidates recorded in DATA. */ 336 337static inline unsigned 338n_iv_cands (struct ivopts_data *data) 339{ 340 return VEC_length (iv_cand_p, data->iv_candidates); 341} 342 343/* Ith candidate recorded in DATA. */ 344 345static inline struct iv_cand * 346iv_cand (struct ivopts_data *data, unsigned i) 347{ 348 return VEC_index (iv_cand_p, data->iv_candidates, i); 349} 350 351/* The data for LOOP. */ 352 353static inline struct loop_data * 354loop_data (struct loop *loop) 355{ 356 return loop->aux; 357} 358 359/* The single loop exit if it dominates the latch, NULL otherwise. */ 360 361edge 362single_dom_exit (struct loop *loop) 363{ 364 edge exit = loop->single_exit; 365 366 if (!exit) 367 return NULL; 368 369 if (!just_once_each_iteration_p (loop, exit->src)) 370 return NULL; 371 372 return exit; 373} 374 375/* Dumps information about the induction variable IV to FILE. */ 376 377extern void dump_iv (FILE *, struct iv *); 378void 379dump_iv (FILE *file, struct iv *iv) 380{ 381 if (iv->ssa_name) 382 { 383 fprintf (file, "ssa name "); 384 print_generic_expr (file, iv->ssa_name, TDF_SLIM); 385 fprintf (file, "\n"); 386 } 387 388 fprintf (file, " type "); 389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM); 390 fprintf (file, "\n"); 391 392 if (iv->step) 393 { 394 fprintf (file, " base "); 395 print_generic_expr (file, iv->base, TDF_SLIM); 396 fprintf (file, "\n"); 397 398 fprintf (file, " step "); 399 print_generic_expr (file, iv->step, TDF_SLIM); 400 fprintf (file, "\n"); 401 } 402 else 403 { 404 fprintf (file, " invariant "); 405 print_generic_expr (file, iv->base, TDF_SLIM); 406 fprintf (file, "\n"); 407 } 408 409 if (iv->base_object) 410 { 411 fprintf (file, " base object "); 412 print_generic_expr (file, iv->base_object, TDF_SLIM); 413 fprintf (file, "\n"); 414 } 415 416 if (iv->biv_p) 417 fprintf (file, " is a biv\n"); 418} 419 420/* Dumps information about the USE to FILE. */ 421 422extern void dump_use (FILE *, struct iv_use *); 423void 424dump_use (FILE *file, struct iv_use *use) 425{ 426 fprintf (file, "use %d\n", use->id); 427 428 switch (use->type) 429 { 430 case USE_NONLINEAR_EXPR: 431 fprintf (file, " generic\n"); 432 break; 433 434 case USE_OUTER: 435 fprintf (file, " outside\n"); 436 break; 437 438 case USE_ADDRESS: 439 fprintf (file, " address\n"); 440 break; 441 442 case USE_COMPARE: 443 fprintf (file, " compare\n"); 444 break; 445 446 default: 447 gcc_unreachable (); 448 } 449 450 fprintf (file, " in statement "); 451 print_generic_expr (file, use->stmt, TDF_SLIM); 452 fprintf (file, "\n"); 453 454 fprintf (file, " at position "); 455 if (use->op_p) 456 print_generic_expr (file, *use->op_p, TDF_SLIM); 457 fprintf (file, "\n"); 458 459 dump_iv (file, use->iv); 460 461 if (use->related_cands) 462 { 463 fprintf (file, " related candidates "); 464 dump_bitmap (file, use->related_cands); 465 } 466} 467 468/* Dumps information about the uses to FILE. */ 469 470extern void dump_uses (FILE *, struct ivopts_data *); 471void 472dump_uses (FILE *file, struct ivopts_data *data) 473{ 474 unsigned i; 475 struct iv_use *use; 476 477 for (i = 0; i < n_iv_uses (data); i++) 478 { 479 use = iv_use (data, i); 480 481 dump_use (file, use); 482 fprintf (file, "\n"); 483 } 484} 485 486/* Dumps information about induction variable candidate CAND to FILE. */ 487 488extern void dump_cand (FILE *, struct iv_cand *); 489void 490dump_cand (FILE *file, struct iv_cand *cand) 491{ 492 struct iv *iv = cand->iv; 493 494 fprintf (file, "candidate %d%s\n", 495 cand->id, cand->important ? " (important)" : ""); 496 497 if (cand->depends_on) 498 { 499 fprintf (file, " depends on "); 500 dump_bitmap (file, cand->depends_on); 501 } 502 503 if (!iv) 504 { 505 fprintf (file, " final value replacement\n"); 506 return; 507 } 508 509 switch (cand->pos) 510 { 511 case IP_NORMAL: 512 fprintf (file, " incremented before exit test\n"); 513 break; 514 515 case IP_END: 516 fprintf (file, " incremented at end\n"); 517 break; 518 519 case IP_ORIGINAL: 520 fprintf (file, " original biv\n"); 521 break; 522 } 523 524 dump_iv (file, iv); 525} 526 527/* Returns the info for ssa version VER. */ 528 529static inline struct version_info * 530ver_info (struct ivopts_data *data, unsigned ver) 531{ 532 return data->version_info + ver; 533} 534 535/* Returns the info for ssa name NAME. */ 536 537static inline struct version_info * 538name_info (struct ivopts_data *data, tree name) 539{ 540 return ver_info (data, SSA_NAME_VERSION (name)); 541} 542 543/* Checks whether there exists number X such that X * B = A, counting modulo 544 2^BITS. */ 545 546static bool 547divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b, 548 HOST_WIDE_INT *x) 549{ 550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1); 551 unsigned HOST_WIDE_INT inv, ex, val; 552 unsigned i; 553 554 a &= mask; 555 b &= mask; 556 557 /* First divide the whole equation by 2 as long as possible. */ 558 while (!(a & 1) && !(b & 1)) 559 { 560 a >>= 1; 561 b >>= 1; 562 bits--; 563 mask >>= 1; 564 } 565 566 if (!(b & 1)) 567 { 568 /* If b is still even, a is odd and there is no such x. */ 569 return false; 570 } 571 572 /* Find the inverse of b. We compute it as 573 b^(2^(bits - 1) - 1) (mod 2^bits). */ 574 inv = 1; 575 ex = b; 576 for (i = 0; i < bits - 1; i++) 577 { 578 inv = (inv * ex) & mask; 579 ex = (ex * ex) & mask; 580 } 581 582 val = (a * inv) & mask; 583 584 gcc_assert (((val * b) & mask) == a); 585 586 if ((val >> (bits - 1)) & 1) 587 val |= ~mask; 588 589 *x = val; 590 591 return true; 592} 593 594/* Returns true if STMT is after the place where the IP_NORMAL ivs will be 595 emitted in LOOP. */ 596 597static bool 598stmt_after_ip_normal_pos (struct loop *loop, tree stmt) 599{ 600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt); 601 602 gcc_assert (bb); 603 604 if (sbb == loop->latch) 605 return true; 606 607 if (sbb != bb) 608 return false; 609 610 return stmt == last_stmt (bb); 611} 612 613/* Returns true if STMT if after the place where the original induction 614 variable CAND is incremented. */ 615 616static bool 617stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt) 618{ 619 basic_block cand_bb = bb_for_stmt (cand->incremented_at); 620 basic_block stmt_bb = bb_for_stmt (stmt); 621 block_stmt_iterator bsi; 622 623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb)) 624 return false; 625 626 if (stmt_bb != cand_bb) 627 return true; 628 629 /* Scan the block from the end, since the original ivs are usually 630 incremented at the end of the loop body. */ 631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi)) 632 { 633 if (bsi_stmt (bsi) == cand->incremented_at) 634 return false; 635 if (bsi_stmt (bsi) == stmt) 636 return true; 637 } 638} 639 640/* Returns true if STMT if after the place where the induction variable 641 CAND is incremented in LOOP. */ 642 643static bool 644stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt) 645{ 646 switch (cand->pos) 647 { 648 case IP_END: 649 return false; 650 651 case IP_NORMAL: 652 return stmt_after_ip_normal_pos (loop, stmt); 653 654 case IP_ORIGINAL: 655 return stmt_after_ip_original_pos (cand, stmt); 656 657 default: 658 gcc_unreachable (); 659 } 660} 661 662/* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */ 663 664static bool 665abnormal_ssa_name_p (tree exp) 666{ 667 if (!exp) 668 return false; 669 670 if (TREE_CODE (exp) != SSA_NAME) 671 return false; 672 673 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0; 674} 675 676/* Returns false if BASE or INDEX contains a ssa name that occurs in an 677 abnormal phi node. Callback for for_each_index. */ 678 679static bool 680idx_contains_abnormal_ssa_name_p (tree base, tree *index, 681 void *data ATTRIBUTE_UNUSED) 682{ 683 if (TREE_CODE (base) == ARRAY_REF) 684 { 685 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2))) 686 return false; 687 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3))) 688 return false; 689 } 690 691 return !abnormal_ssa_name_p (*index); 692} 693 694/* Returns true if EXPR contains a ssa name that occurs in an 695 abnormal phi node. */ 696 697bool 698contains_abnormal_ssa_name_p (tree expr) 699{ 700 enum tree_code code; 701 enum tree_code_class class; 702 703 if (!expr) 704 return false; 705 706 code = TREE_CODE (expr); 707 class = TREE_CODE_CLASS (code); 708 709 if (code == SSA_NAME) 710 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0; 711 712 if (code == INTEGER_CST 713 || is_gimple_min_invariant (expr)) 714 return false; 715 716 if (code == ADDR_EXPR) 717 return !for_each_index (&TREE_OPERAND (expr, 0), 718 idx_contains_abnormal_ssa_name_p, 719 NULL); 720 721 switch (class) 722 { 723 case tcc_binary: 724 case tcc_comparison: 725 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))) 726 return true; 727 728 /* Fallthru. */ 729 case tcc_unary: 730 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))) 731 return true; 732 733 break; 734 735 default: 736 gcc_unreachable (); 737 } 738 739 return false; 740} 741 742/* Element of the table in that we cache the numbers of iterations obtained 743 from exits of the loop. */ 744 745struct nfe_cache_elt 746{ 747 /* The edge for that the number of iterations is cached. */ 748 edge exit; 749 750 /* Number of iterations corresponding to this exit, or NULL if it cannot be 751 determined. */ 752 tree niter; 753}; 754 755/* Hash function for nfe_cache_elt E. */ 756 757static hashval_t 758nfe_hash (const void *e) 759{ 760 const struct nfe_cache_elt *elt = e; 761 762 return htab_hash_pointer (elt->exit); 763} 764 765/* Equality function for nfe_cache_elt E1 and edge E2. */ 766 767static int 768nfe_eq (const void *e1, const void *e2) 769{ 770 const struct nfe_cache_elt *elt1 = e1; 771 772 return elt1->exit == e2; 773} 774 775/* Returns tree describing number of iterations determined from 776 EXIT of DATA->current_loop, or NULL if something goes wrong. */ 777 778static tree 779niter_for_exit (struct ivopts_data *data, edge exit) 780{ 781 struct nfe_cache_elt *nfe_desc; 782 struct tree_niter_desc desc; 783 PTR *slot; 784 785 slot = htab_find_slot_with_hash (data->niters, exit, 786 htab_hash_pointer (exit), 787 INSERT); 788 789 if (!*slot) 790 { 791 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt)); 792 nfe_desc->exit = exit; 793 794 /* Try to determine number of iterations. We must know it 795 unconditionally (i.e., without possibility of # of iterations 796 being zero). Also, we cannot safely work with ssa names that 797 appear in phi nodes on abnormal edges, so that we do not create 798 overlapping life ranges for them (PR 27283). */ 799 if (number_of_iterations_exit (data->current_loop, 800 exit, &desc, true) 801 && zero_p (desc.may_be_zero) 802 && !contains_abnormal_ssa_name_p (desc.niter)) 803 nfe_desc->niter = desc.niter; 804 else 805 nfe_desc->niter = NULL_TREE; 806 } 807 else 808 nfe_desc = *slot; 809 810 return nfe_desc->niter; 811} 812 813/* Returns tree describing number of iterations determined from 814 single dominating exit of DATA->current_loop, or NULL if something 815 goes wrong. */ 816 817static tree 818niter_for_single_dom_exit (struct ivopts_data *data) 819{ 820 edge exit = single_dom_exit (data->current_loop); 821 822 if (!exit) 823 return NULL; 824 825 return niter_for_exit (data, exit); 826} 827 828/* Initializes data structures used by the iv optimization pass, stored 829 in DATA. LOOPS is the loop tree. */ 830 831static void 832tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data) 833{ 834 unsigned i; 835 836 data->version_info_size = 2 * num_ssa_names; 837 data->version_info = xcalloc (data->version_info_size, 838 sizeof (struct version_info)); 839 data->relevant = BITMAP_ALLOC (NULL); 840 data->important_candidates = BITMAP_ALLOC (NULL); 841 data->max_inv_id = 0; 842 data->niters = htab_create (10, nfe_hash, nfe_eq, free); 843 844 for (i = 1; i < loops->num; i++) 845 if (loops->parray[i]) 846 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data)); 847 848 data->iv_uses = VEC_alloc (iv_use_p, heap, 20); 849 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20); 850 decl_rtl_to_reset = VEC_alloc (tree, heap, 20); 851} 852 853/* Returns a memory object to that EXPR points. In case we are able to 854 determine that it does not point to any such object, NULL is returned. */ 855 856static tree 857determine_base_object (tree expr) 858{ 859 enum tree_code code = TREE_CODE (expr); 860 tree base, obj, op0, op1; 861 862 if (!POINTER_TYPE_P (TREE_TYPE (expr))) 863 return NULL_TREE; 864 865 switch (code) 866 { 867 case INTEGER_CST: 868 return NULL_TREE; 869 870 case ADDR_EXPR: 871 obj = TREE_OPERAND (expr, 0); 872 base = get_base_address (obj); 873 874 if (!base) 875 return expr; 876 877 if (TREE_CODE (base) == INDIRECT_REF) 878 return determine_base_object (TREE_OPERAND (base, 0)); 879 880 return fold_convert (ptr_type_node, 881 build_fold_addr_expr (base)); 882 883 case PLUS_EXPR: 884 case MINUS_EXPR: 885 op0 = determine_base_object (TREE_OPERAND (expr, 0)); 886 op1 = determine_base_object (TREE_OPERAND (expr, 1)); 887 888 if (!op1) 889 return op0; 890 891 if (!op0) 892 return (code == PLUS_EXPR 893 ? op1 894 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1)); 895 896 return fold_build2 (code, ptr_type_node, op0, op1); 897 898 case NOP_EXPR: 899 case CONVERT_EXPR: 900 return determine_base_object (TREE_OPERAND (expr, 0)); 901 902 default: 903 return fold_convert (ptr_type_node, expr); 904 } 905} 906 907/* Allocates an induction variable with given initial value BASE and step STEP 908 for loop LOOP. */ 909 910static struct iv * 911alloc_iv (tree base, tree step) 912{ 913 struct iv *iv = xcalloc (1, sizeof (struct iv)); 914 915 if (step && integer_zerop (step)) 916 step = NULL_TREE; 917 918 iv->base = base; 919 iv->base_object = determine_base_object (base); 920 iv->step = step; 921 iv->biv_p = false; 922 iv->have_use_for = false; 923 iv->use_id = 0; 924 iv->ssa_name = NULL_TREE; 925 926 return iv; 927} 928 929/* Sets STEP and BASE for induction variable IV. */ 930 931static void 932set_iv (struct ivopts_data *data, tree iv, tree base, tree step) 933{ 934 struct version_info *info = name_info (data, iv); 935 936 gcc_assert (!info->iv); 937 938 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv)); 939 info->iv = alloc_iv (base, step); 940 info->iv->ssa_name = iv; 941} 942 943/* Finds induction variable declaration for VAR. */ 944 945static struct iv * 946get_iv (struct ivopts_data *data, tree var) 947{ 948 basic_block bb; 949 950 if (!name_info (data, var)->iv) 951 { 952 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var)); 953 954 if (!bb 955 || !flow_bb_inside_loop_p (data->current_loop, bb)) 956 set_iv (data, var, var, NULL_TREE); 957 } 958 959 return name_info (data, var)->iv; 960} 961 962/* Determines the step of a biv defined in PHI. Returns NULL if PHI does 963 not define a simple affine biv with nonzero step. */ 964 965static tree 966determine_biv_step (tree phi) 967{ 968 struct loop *loop = bb_for_stmt (phi)->loop_father; 969 tree name = PHI_RESULT (phi); 970 affine_iv iv; 971 972 if (!is_gimple_reg (name)) 973 return NULL_TREE; 974 975 if (!simple_iv (loop, phi, name, &iv, true)) 976 return NULL_TREE; 977 978 return (zero_p (iv.step) ? NULL_TREE : iv.step); 979} 980 981/* Finds basic ivs. */ 982 983static bool 984find_bivs (struct ivopts_data *data) 985{ 986 tree phi, step, type, base; 987 bool found = false; 988 struct loop *loop = data->current_loop; 989 990 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi)) 991 { 992 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi))) 993 continue; 994 995 step = determine_biv_step (phi); 996 if (!step) 997 continue; 998 999 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); 1000 base = expand_simple_operations (base); 1001 if (contains_abnormal_ssa_name_p (base) 1002 || contains_abnormal_ssa_name_p (step)) 1003 continue; 1004 1005 type = TREE_TYPE (PHI_RESULT (phi)); 1006 base = fold_convert (type, base); 1007 if (step) 1008 step = fold_convert (type, step); 1009 1010 set_iv (data, PHI_RESULT (phi), base, step); 1011 found = true; 1012 } 1013 1014 return found; 1015} 1016 1017/* Marks basic ivs. */ 1018 1019static void 1020mark_bivs (struct ivopts_data *data) 1021{ 1022 tree phi, var; 1023 struct iv *iv, *incr_iv; 1024 struct loop *loop = data->current_loop; 1025 basic_block incr_bb; 1026 1027 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi)) 1028 { 1029 iv = get_iv (data, PHI_RESULT (phi)); 1030 if (!iv) 1031 continue; 1032 1033 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); 1034 incr_iv = get_iv (data, var); 1035 if (!incr_iv) 1036 continue; 1037 1038 /* If the increment is in the subloop, ignore it. */ 1039 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var)); 1040 if (incr_bb->loop_father != data->current_loop 1041 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP)) 1042 continue; 1043 1044 iv->biv_p = true; 1045 incr_iv->biv_p = true; 1046 } 1047} 1048 1049/* Checks whether STMT defines a linear induction variable and stores its 1050 parameters to IV. */ 1051 1052static bool 1053find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv) 1054{ 1055 tree lhs; 1056 struct loop *loop = data->current_loop; 1057 1058 iv->base = NULL_TREE; 1059 iv->step = NULL_TREE; 1060 1061 if (TREE_CODE (stmt) != MODIFY_EXPR) 1062 return false; 1063 1064 lhs = TREE_OPERAND (stmt, 0); 1065 if (TREE_CODE (lhs) != SSA_NAME) 1066 return false; 1067 1068 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true)) 1069 return false; 1070 iv->base = expand_simple_operations (iv->base); 1071 1072 if (contains_abnormal_ssa_name_p (iv->base) 1073 || contains_abnormal_ssa_name_p (iv->step)) 1074 return false; 1075 1076 return true; 1077} 1078 1079/* Finds general ivs in statement STMT. */ 1080 1081static void 1082find_givs_in_stmt (struct ivopts_data *data, tree stmt) 1083{ 1084 affine_iv iv; 1085 1086 if (!find_givs_in_stmt_scev (data, stmt, &iv)) 1087 return; 1088 1089 set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step); 1090} 1091 1092/* Finds general ivs in basic block BB. */ 1093 1094static void 1095find_givs_in_bb (struct ivopts_data *data, basic_block bb) 1096{ 1097 block_stmt_iterator bsi; 1098 1099 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) 1100 find_givs_in_stmt (data, bsi_stmt (bsi)); 1101} 1102 1103/* Finds general ivs. */ 1104 1105static void 1106find_givs (struct ivopts_data *data) 1107{ 1108 struct loop *loop = data->current_loop; 1109 basic_block *body = get_loop_body_in_dom_order (loop); 1110 unsigned i; 1111 1112 for (i = 0; i < loop->num_nodes; i++) 1113 find_givs_in_bb (data, body[i]); 1114 free (body); 1115} 1116 1117/* For each ssa name defined in LOOP determines whether it is an induction 1118 variable and if so, its initial value and step. */ 1119 1120static bool 1121find_induction_variables (struct ivopts_data *data) 1122{ 1123 unsigned i; 1124 bitmap_iterator bi; 1125 1126 if (!find_bivs (data)) 1127 return false; 1128 1129 find_givs (data); 1130 mark_bivs (data); 1131 1132 if (dump_file && (dump_flags & TDF_DETAILS)) 1133 { 1134 tree niter = niter_for_single_dom_exit (data); 1135 1136 if (niter) 1137 { 1138 fprintf (dump_file, " number of iterations "); 1139 print_generic_expr (dump_file, niter, TDF_SLIM); 1140 fprintf (dump_file, "\n\n"); 1141 }; 1142 1143 fprintf (dump_file, "Induction variables:\n\n"); 1144 1145 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 1146 { 1147 if (ver_info (data, i)->iv) 1148 dump_iv (dump_file, ver_info (data, i)->iv); 1149 } 1150 } 1151 1152 return true; 1153} 1154 1155/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */ 1156 1157static struct iv_use * 1158record_use (struct ivopts_data *data, tree *use_p, struct iv *iv, 1159 tree stmt, enum use_type use_type) 1160{ 1161 struct iv_use *use = xcalloc (1, sizeof (struct iv_use)); 1162 1163 use->id = n_iv_uses (data); 1164 use->type = use_type; 1165 use->iv = iv; 1166 use->stmt = stmt; 1167 use->op_p = use_p; 1168 use->related_cands = BITMAP_ALLOC (NULL); 1169 1170 /* To avoid showing ssa name in the dumps, if it was not reset by the 1171 caller. */ 1172 iv->ssa_name = NULL_TREE; 1173 1174 if (dump_file && (dump_flags & TDF_DETAILS)) 1175 dump_use (dump_file, use); 1176 1177 VEC_safe_push (iv_use_p, heap, data->iv_uses, use); 1178 1179 return use; 1180} 1181 1182/* Checks whether OP is a loop-level invariant and if so, records it. 1183 NONLINEAR_USE is true if the invariant is used in a way we do not 1184 handle specially. */ 1185 1186static void 1187record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use) 1188{ 1189 basic_block bb; 1190 struct version_info *info; 1191 1192 if (TREE_CODE (op) != SSA_NAME 1193 || !is_gimple_reg (op)) 1194 return; 1195 1196 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op)); 1197 if (bb 1198 && flow_bb_inside_loop_p (data->current_loop, bb)) 1199 return; 1200 1201 info = name_info (data, op); 1202 info->name = op; 1203 info->has_nonlin_use |= nonlinear_use; 1204 if (!info->inv_id) 1205 info->inv_id = ++data->max_inv_id; 1206 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op)); 1207} 1208 1209/* Checks whether the use OP is interesting and if so, records it 1210 as TYPE. */ 1211 1212static struct iv_use * 1213find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op, 1214 enum use_type type) 1215{ 1216 struct iv *iv; 1217 struct iv *civ; 1218 tree stmt; 1219 struct iv_use *use; 1220 1221 if (TREE_CODE (op) != SSA_NAME) 1222 return NULL; 1223 1224 iv = get_iv (data, op); 1225 if (!iv) 1226 return NULL; 1227 1228 if (iv->have_use_for) 1229 { 1230 use = iv_use (data, iv->use_id); 1231 1232 gcc_assert (use->type == USE_NONLINEAR_EXPR 1233 || use->type == USE_OUTER); 1234 1235 if (type == USE_NONLINEAR_EXPR) 1236 use->type = USE_NONLINEAR_EXPR; 1237 return use; 1238 } 1239 1240 if (zero_p (iv->step)) 1241 { 1242 record_invariant (data, op, true); 1243 return NULL; 1244 } 1245 iv->have_use_for = true; 1246 1247 civ = xmalloc (sizeof (struct iv)); 1248 *civ = *iv; 1249 1250 stmt = SSA_NAME_DEF_STMT (op); 1251 gcc_assert (TREE_CODE (stmt) == PHI_NODE 1252 || TREE_CODE (stmt) == MODIFY_EXPR); 1253 1254 use = record_use (data, NULL, civ, stmt, type); 1255 iv->use_id = use->id; 1256 1257 return use; 1258} 1259 1260/* Checks whether the use OP is interesting and if so, records it. */ 1261 1262static struct iv_use * 1263find_interesting_uses_op (struct ivopts_data *data, tree op) 1264{ 1265 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR); 1266} 1267 1268/* Records a definition of induction variable OP that is used outside of the 1269 loop. */ 1270 1271static struct iv_use * 1272find_interesting_uses_outer (struct ivopts_data *data, tree op) 1273{ 1274 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER); 1275} 1276 1277/* Checks whether the condition *COND_P in STMT is interesting 1278 and if so, records it. */ 1279 1280static void 1281find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p) 1282{ 1283 tree *op0_p; 1284 tree *op1_p; 1285 struct iv *iv0 = NULL, *iv1 = NULL, *civ; 1286 struct iv const_iv; 1287 tree zero = integer_zero_node; 1288 1289 const_iv.step = NULL_TREE; 1290 1291 if (TREE_CODE (*cond_p) != SSA_NAME 1292 && !COMPARISON_CLASS_P (*cond_p)) 1293 return; 1294 1295 if (TREE_CODE (*cond_p) == SSA_NAME) 1296 { 1297 op0_p = cond_p; 1298 op1_p = &zero; 1299 } 1300 else 1301 { 1302 op0_p = &TREE_OPERAND (*cond_p, 0); 1303 op1_p = &TREE_OPERAND (*cond_p, 1); 1304 } 1305 1306 if (TREE_CODE (*op0_p) == SSA_NAME) 1307 iv0 = get_iv (data, *op0_p); 1308 else 1309 iv0 = &const_iv; 1310 1311 if (TREE_CODE (*op1_p) == SSA_NAME) 1312 iv1 = get_iv (data, *op1_p); 1313 else 1314 iv1 = &const_iv; 1315 1316 if (/* When comparing with non-invariant value, we may not do any senseful 1317 induction variable elimination. */ 1318 (!iv0 || !iv1) 1319 /* Eliminating condition based on two ivs would be nontrivial. 1320 ??? TODO -- it is not really important to handle this case. */ 1321 || (!zero_p (iv0->step) && !zero_p (iv1->step))) 1322 { 1323 find_interesting_uses_op (data, *op0_p); 1324 find_interesting_uses_op (data, *op1_p); 1325 return; 1326 } 1327 1328 if (zero_p (iv0->step) && zero_p (iv1->step)) 1329 { 1330 /* If both are invariants, this is a work for unswitching. */ 1331 return; 1332 } 1333 1334 civ = xmalloc (sizeof (struct iv)); 1335 *civ = zero_p (iv0->step) ? *iv1: *iv0; 1336 record_use (data, cond_p, civ, stmt, USE_COMPARE); 1337} 1338 1339/* Returns true if expression EXPR is obviously invariant in LOOP, 1340 i.e. if all its operands are defined outside of the LOOP. */ 1341 1342bool 1343expr_invariant_in_loop_p (struct loop *loop, tree expr) 1344{ 1345 basic_block def_bb; 1346 unsigned i, len; 1347 1348 if (is_gimple_min_invariant (expr)) 1349 return true; 1350 1351 if (TREE_CODE (expr) == SSA_NAME) 1352 { 1353 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr)); 1354 if (def_bb 1355 && flow_bb_inside_loop_p (loop, def_bb)) 1356 return false; 1357 1358 return true; 1359 } 1360 1361 if (!EXPR_P (expr)) 1362 return false; 1363 1364 len = TREE_CODE_LENGTH (TREE_CODE (expr)); 1365 for (i = 0; i < len; i++) 1366 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i))) 1367 return false; 1368 1369 return true; 1370} 1371 1372/* Cumulates the steps of indices into DATA and replaces their values with the 1373 initial ones. Returns false when the value of the index cannot be determined. 1374 Callback for for_each_index. */ 1375 1376struct ifs_ivopts_data 1377{ 1378 struct ivopts_data *ivopts_data; 1379 tree stmt; 1380 tree *step_p; 1381}; 1382 1383static bool 1384idx_find_step (tree base, tree *idx, void *data) 1385{ 1386 struct ifs_ivopts_data *dta = data; 1387 struct iv *iv; 1388 tree step, iv_base, iv_step, lbound, off; 1389 struct loop *loop = dta->ivopts_data->current_loop; 1390 1391 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF 1392 || TREE_CODE (base) == ALIGN_INDIRECT_REF) 1393 return false; 1394 1395 /* If base is a component ref, require that the offset of the reference 1396 be invariant. */ 1397 if (TREE_CODE (base) == COMPONENT_REF) 1398 { 1399 off = component_ref_field_offset (base); 1400 return expr_invariant_in_loop_p (loop, off); 1401 } 1402 1403 /* If base is array, first check whether we will be able to move the 1404 reference out of the loop (in order to take its address in strength 1405 reduction). In order for this to work we need both lower bound 1406 and step to be loop invariants. */ 1407 if (TREE_CODE (base) == ARRAY_REF) 1408 { 1409 step = array_ref_element_size (base); 1410 lbound = array_ref_low_bound (base); 1411 1412 if (!expr_invariant_in_loop_p (loop, step) 1413 || !expr_invariant_in_loop_p (loop, lbound)) 1414 return false; 1415 } 1416 1417 if (TREE_CODE (*idx) != SSA_NAME) 1418 return true; 1419 1420 iv = get_iv (dta->ivopts_data, *idx); 1421 if (!iv) 1422 return false; 1423 1424 *idx = iv->base; 1425 1426 if (!iv->step) 1427 return true; 1428 1429 if (TREE_CODE (base) == ARRAY_REF) 1430 { 1431 step = array_ref_element_size (base); 1432 1433 /* We only handle addresses whose step is an integer constant. */ 1434 if (TREE_CODE (step) != INTEGER_CST) 1435 return false; 1436 } 1437 else 1438 /* The step for pointer arithmetics already is 1 byte. */ 1439 step = build_int_cst (sizetype, 1); 1440 1441 iv_base = iv->base; 1442 iv_step = iv->step; 1443 if (!convert_affine_scev (dta->ivopts_data->current_loop, 1444 sizetype, &iv_base, &iv_step, dta->stmt, 1445 false)) 1446 { 1447 /* The index might wrap. */ 1448 return false; 1449 } 1450 1451 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step); 1452 1453 if (!*dta->step_p) 1454 *dta->step_p = step; 1455 else 1456 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step); 1457 1458 return true; 1459} 1460 1461/* Records use in index IDX. Callback for for_each_index. Ivopts data 1462 object is passed to it in DATA. */ 1463 1464static bool 1465idx_record_use (tree base, tree *idx, 1466 void *data) 1467{ 1468 find_interesting_uses_op (data, *idx); 1469 if (TREE_CODE (base) == ARRAY_REF) 1470 { 1471 find_interesting_uses_op (data, array_ref_element_size (base)); 1472 find_interesting_uses_op (data, array_ref_low_bound (base)); 1473 } 1474 return true; 1475} 1476 1477/* Returns true if memory reference REF may be unaligned. */ 1478 1479static bool 1480may_be_unaligned_p (tree ref) 1481{ 1482 tree base; 1483 tree base_type; 1484 HOST_WIDE_INT bitsize; 1485 HOST_WIDE_INT bitpos; 1486 tree toffset; 1487 enum machine_mode mode; 1488 int unsignedp, volatilep; 1489 unsigned base_align; 1490 1491 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target, 1492 thus they are not misaligned. */ 1493 if (TREE_CODE (ref) == TARGET_MEM_REF) 1494 return false; 1495 1496 /* The test below is basically copy of what expr.c:normal_inner_ref 1497 does to check whether the object must be loaded by parts when 1498 STRICT_ALIGNMENT is true. */ 1499 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode, 1500 &unsignedp, &volatilep, true); 1501 base_type = TREE_TYPE (base); 1502 base_align = TYPE_ALIGN (base_type); 1503 1504 if (mode != BLKmode 1505 && (base_align < GET_MODE_ALIGNMENT (mode) 1506 || bitpos % GET_MODE_ALIGNMENT (mode) != 0 1507 || bitpos % BITS_PER_UNIT != 0)) 1508 return true; 1509 1510 return false; 1511} 1512 1513/* Finds addresses in *OP_P inside STMT. */ 1514 1515static void 1516find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p) 1517{ 1518 tree base = *op_p, step = NULL; 1519 struct iv *civ; 1520 struct ifs_ivopts_data ifs_ivopts_data; 1521 1522 /* Do not play with volatile memory references. A bit too conservative, 1523 perhaps, but safe. */ 1524 if (stmt_ann (stmt)->has_volatile_ops) 1525 goto fail; 1526 1527 /* Ignore bitfields for now. Not really something terribly complicated 1528 to handle. TODO. */ 1529 if (TREE_CODE (base) == BIT_FIELD_REF 1530 || (TREE_CODE (base) == COMPONENT_REF 1531 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))) 1532 goto fail; 1533 1534 if (STRICT_ALIGNMENT 1535 && may_be_unaligned_p (base)) 1536 goto fail; 1537 1538 base = unshare_expr (base); 1539 1540 if (TREE_CODE (base) == TARGET_MEM_REF) 1541 { 1542 tree type = build_pointer_type (TREE_TYPE (base)); 1543 tree astep; 1544 1545 if (TMR_BASE (base) 1546 && TREE_CODE (TMR_BASE (base)) == SSA_NAME) 1547 { 1548 civ = get_iv (data, TMR_BASE (base)); 1549 if (!civ) 1550 goto fail; 1551 1552 TMR_BASE (base) = civ->base; 1553 step = civ->step; 1554 } 1555 if (TMR_INDEX (base) 1556 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME) 1557 { 1558 civ = get_iv (data, TMR_INDEX (base)); 1559 if (!civ) 1560 goto fail; 1561 1562 TMR_INDEX (base) = civ->base; 1563 astep = civ->step; 1564 1565 if (astep) 1566 { 1567 if (TMR_STEP (base)) 1568 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep); 1569 1570 if (step) 1571 step = fold_build2 (PLUS_EXPR, type, step, astep); 1572 else 1573 step = astep; 1574 } 1575 } 1576 1577 if (zero_p (step)) 1578 goto fail; 1579 base = tree_mem_ref_addr (type, base); 1580 } 1581 else 1582 { 1583 ifs_ivopts_data.ivopts_data = data; 1584 ifs_ivopts_data.stmt = stmt; 1585 ifs_ivopts_data.step_p = &step; 1586 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data) 1587 || zero_p (step)) 1588 goto fail; 1589 1590 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF); 1591 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF); 1592 1593 base = build_fold_addr_expr (base); 1594 } 1595 1596 civ = alloc_iv (base, step); 1597 record_use (data, op_p, civ, stmt, USE_ADDRESS); 1598 return; 1599 1600fail: 1601 for_each_index (op_p, idx_record_use, data); 1602} 1603 1604/* Finds and records invariants used in STMT. */ 1605 1606static void 1607find_invariants_stmt (struct ivopts_data *data, tree stmt) 1608{ 1609 ssa_op_iter iter; 1610 use_operand_p use_p; 1611 tree op; 1612 1613 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 1614 { 1615 op = USE_FROM_PTR (use_p); 1616 record_invariant (data, op, false); 1617 } 1618} 1619 1620/* Finds interesting uses of induction variables in the statement STMT. */ 1621 1622static void 1623find_interesting_uses_stmt (struct ivopts_data *data, tree stmt) 1624{ 1625 struct iv *iv; 1626 tree op, lhs, rhs; 1627 ssa_op_iter iter; 1628 use_operand_p use_p; 1629 1630 find_invariants_stmt (data, stmt); 1631 1632 if (TREE_CODE (stmt) == COND_EXPR) 1633 { 1634 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt)); 1635 return; 1636 } 1637 1638 if (TREE_CODE (stmt) == MODIFY_EXPR) 1639 { 1640 lhs = TREE_OPERAND (stmt, 0); 1641 rhs = TREE_OPERAND (stmt, 1); 1642 1643 if (TREE_CODE (lhs) == SSA_NAME) 1644 { 1645 /* If the statement defines an induction variable, the uses are not 1646 interesting by themselves. */ 1647 1648 iv = get_iv (data, lhs); 1649 1650 if (iv && !zero_p (iv->step)) 1651 return; 1652 } 1653 1654 switch (TREE_CODE_CLASS (TREE_CODE (rhs))) 1655 { 1656 case tcc_comparison: 1657 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1)); 1658 return; 1659 1660 case tcc_reference: 1661 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1)); 1662 if (REFERENCE_CLASS_P (lhs)) 1663 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0)); 1664 return; 1665 1666 default: ; 1667 } 1668 1669 if (REFERENCE_CLASS_P (lhs) 1670 && is_gimple_val (rhs)) 1671 { 1672 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0)); 1673 find_interesting_uses_op (data, rhs); 1674 return; 1675 } 1676 1677 /* TODO -- we should also handle address uses of type 1678 1679 memory = call (whatever); 1680 1681 and 1682 1683 call (memory). */ 1684 } 1685 1686 if (TREE_CODE (stmt) == PHI_NODE 1687 && bb_for_stmt (stmt) == data->current_loop->header) 1688 { 1689 lhs = PHI_RESULT (stmt); 1690 iv = get_iv (data, lhs); 1691 1692 if (iv && !zero_p (iv->step)) 1693 return; 1694 } 1695 1696 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE) 1697 { 1698 op = USE_FROM_PTR (use_p); 1699 1700 if (TREE_CODE (op) != SSA_NAME) 1701 continue; 1702 1703 iv = get_iv (data, op); 1704 if (!iv) 1705 continue; 1706 1707 find_interesting_uses_op (data, op); 1708 } 1709} 1710 1711/* Finds interesting uses of induction variables outside of loops 1712 on loop exit edge EXIT. */ 1713 1714static void 1715find_interesting_uses_outside (struct ivopts_data *data, edge exit) 1716{ 1717 tree phi, def; 1718 1719 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi)) 1720 { 1721 def = PHI_ARG_DEF_FROM_EDGE (phi, exit); 1722 find_interesting_uses_outer (data, def); 1723 } 1724} 1725 1726/* Finds uses of the induction variables that are interesting. */ 1727 1728static void 1729find_interesting_uses (struct ivopts_data *data) 1730{ 1731 basic_block bb; 1732 block_stmt_iterator bsi; 1733 tree phi; 1734 basic_block *body = get_loop_body (data->current_loop); 1735 unsigned i; 1736 struct version_info *info; 1737 edge e; 1738 1739 if (dump_file && (dump_flags & TDF_DETAILS)) 1740 fprintf (dump_file, "Uses:\n\n"); 1741 1742 for (i = 0; i < data->current_loop->num_nodes; i++) 1743 { 1744 edge_iterator ei; 1745 bb = body[i]; 1746 1747 FOR_EACH_EDGE (e, ei, bb->succs) 1748 if (e->dest != EXIT_BLOCK_PTR 1749 && !flow_bb_inside_loop_p (data->current_loop, e->dest)) 1750 find_interesting_uses_outside (data, e); 1751 1752 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi)) 1753 find_interesting_uses_stmt (data, phi); 1754 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi)) 1755 find_interesting_uses_stmt (data, bsi_stmt (bsi)); 1756 } 1757 1758 if (dump_file && (dump_flags & TDF_DETAILS)) 1759 { 1760 bitmap_iterator bi; 1761 1762 fprintf (dump_file, "\n"); 1763 1764 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 1765 { 1766 info = ver_info (data, i); 1767 if (info->inv_id) 1768 { 1769 fprintf (dump_file, " "); 1770 print_generic_expr (dump_file, info->name, TDF_SLIM); 1771 fprintf (dump_file, " is invariant (%d)%s\n", 1772 info->inv_id, info->has_nonlin_use ? "" : ", eliminable"); 1773 } 1774 } 1775 1776 fprintf (dump_file, "\n"); 1777 } 1778 1779 free (body); 1780} 1781 1782/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR 1783 is true, assume we are inside an address. If TOP_COMPREF is true, assume 1784 we are at the top-level of the processed address. */ 1785 1786static tree 1787strip_offset_1 (tree expr, bool inside_addr, bool top_compref, 1788 unsigned HOST_WIDE_INT *offset) 1789{ 1790 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step; 1791 enum tree_code code; 1792 tree type, orig_type = TREE_TYPE (expr); 1793 unsigned HOST_WIDE_INT off0, off1, st; 1794 tree orig_expr = expr; 1795 1796 STRIP_NOPS (expr); 1797 1798 type = TREE_TYPE (expr); 1799 code = TREE_CODE (expr); 1800 *offset = 0; 1801 1802 switch (code) 1803 { 1804 case INTEGER_CST: 1805 if (!cst_and_fits_in_hwi (expr) 1806 || zero_p (expr)) 1807 return orig_expr; 1808 1809 *offset = int_cst_value (expr); 1810 return build_int_cst_type (orig_type, 0); 1811 1812 case PLUS_EXPR: 1813 case MINUS_EXPR: 1814 op0 = TREE_OPERAND (expr, 0); 1815 op1 = TREE_OPERAND (expr, 1); 1816 1817 op0 = strip_offset_1 (op0, false, false, &off0); 1818 op1 = strip_offset_1 (op1, false, false, &off1); 1819 1820 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1); 1821 if (op0 == TREE_OPERAND (expr, 0) 1822 && op1 == TREE_OPERAND (expr, 1)) 1823 return orig_expr; 1824 1825 if (zero_p (op1)) 1826 expr = op0; 1827 else if (zero_p (op0)) 1828 { 1829 if (code == PLUS_EXPR) 1830 expr = op1; 1831 else 1832 expr = fold_build1 (NEGATE_EXPR, type, op1); 1833 } 1834 else 1835 expr = fold_build2 (code, type, op0, op1); 1836 1837 return fold_convert (orig_type, expr); 1838 1839 case ARRAY_REF: 1840 if (!inside_addr) 1841 return orig_expr; 1842 1843 step = array_ref_element_size (expr); 1844 if (!cst_and_fits_in_hwi (step)) 1845 break; 1846 1847 st = int_cst_value (step); 1848 op1 = TREE_OPERAND (expr, 1); 1849 op1 = strip_offset_1 (op1, false, false, &off1); 1850 *offset = off1 * st; 1851 1852 if (top_compref 1853 && zero_p (op1)) 1854 { 1855 /* Strip the component reference completely. */ 1856 op0 = TREE_OPERAND (expr, 0); 1857 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 1858 *offset += off0; 1859 return op0; 1860 } 1861 break; 1862 1863 case COMPONENT_REF: 1864 if (!inside_addr) 1865 return orig_expr; 1866 1867 tmp = component_ref_field_offset (expr); 1868 if (top_compref 1869 && cst_and_fits_in_hwi (tmp)) 1870 { 1871 /* Strip the component reference completely. */ 1872 op0 = TREE_OPERAND (expr, 0); 1873 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0); 1874 *offset = off0 + int_cst_value (tmp); 1875 return op0; 1876 } 1877 break; 1878 1879 case ADDR_EXPR: 1880 op0 = TREE_OPERAND (expr, 0); 1881 op0 = strip_offset_1 (op0, true, true, &off0); 1882 *offset += off0; 1883 1884 if (op0 == TREE_OPERAND (expr, 0)) 1885 return orig_expr; 1886 1887 expr = build_fold_addr_expr (op0); 1888 return fold_convert (orig_type, expr); 1889 1890 case INDIRECT_REF: 1891 inside_addr = false; 1892 break; 1893 1894 default: 1895 return orig_expr; 1896 } 1897 1898 /* Default handling of expressions for that we want to recurse into 1899 the first operand. */ 1900 op0 = TREE_OPERAND (expr, 0); 1901 op0 = strip_offset_1 (op0, inside_addr, false, &off0); 1902 *offset += off0; 1903 1904 if (op0 == TREE_OPERAND (expr, 0) 1905 && (!op1 || op1 == TREE_OPERAND (expr, 1))) 1906 return orig_expr; 1907 1908 expr = copy_node (expr); 1909 TREE_OPERAND (expr, 0) = op0; 1910 if (op1) 1911 TREE_OPERAND (expr, 1) = op1; 1912 1913 /* Inside address, we might strip the top level component references, 1914 thus changing type of the expression. Handling of ADDR_EXPR 1915 will fix that. */ 1916 expr = fold_convert (orig_type, expr); 1917 1918 return expr; 1919} 1920 1921/* Strips constant offsets from EXPR and stores them to OFFSET. */ 1922 1923static tree 1924strip_offset (tree expr, unsigned HOST_WIDE_INT *offset) 1925{ 1926 return strip_offset_1 (expr, false, false, offset); 1927} 1928 1929/* Returns variant of TYPE that can be used as base for different uses. 1930 For integer types, we return unsigned variant of the type, which 1931 avoids problems with overflows. For pointer types, we return void *. */ 1932 1933static tree 1934generic_type_for (tree type) 1935{ 1936 if (POINTER_TYPE_P (type)) 1937 return ptr_type_node; 1938 1939 if (TYPE_UNSIGNED (type)) 1940 return type; 1941 1942 return unsigned_type_for (type); 1943} 1944 1945/* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains 1946 the bitmap to that we should store it. */ 1947 1948static struct ivopts_data *fd_ivopts_data; 1949static tree 1950find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data) 1951{ 1952 bitmap *depends_on = data; 1953 struct version_info *info; 1954 1955 if (TREE_CODE (*expr_p) != SSA_NAME) 1956 return NULL_TREE; 1957 info = name_info (fd_ivopts_data, *expr_p); 1958 1959 if (!info->inv_id || info->has_nonlin_use) 1960 return NULL_TREE; 1961 1962 if (!*depends_on) 1963 *depends_on = BITMAP_ALLOC (NULL); 1964 bitmap_set_bit (*depends_on, info->inv_id); 1965 1966 return NULL_TREE; 1967} 1968 1969/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 1970 position to POS. If USE is not NULL, the candidate is set as related to 1971 it. If both BASE and STEP are NULL, we add a pseudocandidate for the 1972 replacement of the final value of the iv by a direct computation. */ 1973 1974static struct iv_cand * 1975add_candidate_1 (struct ivopts_data *data, 1976 tree base, tree step, bool important, enum iv_position pos, 1977 struct iv_use *use, tree incremented_at) 1978{ 1979 unsigned i; 1980 struct iv_cand *cand = NULL; 1981 tree type, orig_type; 1982 1983 if (base) 1984 { 1985 orig_type = TREE_TYPE (base); 1986 type = generic_type_for (orig_type); 1987 if (type != orig_type) 1988 { 1989 base = fold_convert (type, base); 1990 if (step) 1991 step = fold_convert (type, step); 1992 } 1993 } 1994 1995 for (i = 0; i < n_iv_cands (data); i++) 1996 { 1997 cand = iv_cand (data, i); 1998 1999 if (cand->pos != pos) 2000 continue; 2001 2002 if (cand->incremented_at != incremented_at) 2003 continue; 2004 2005 if (!cand->iv) 2006 { 2007 if (!base && !step) 2008 break; 2009 2010 continue; 2011 } 2012 2013 if (!base && !step) 2014 continue; 2015 2016 if (!operand_equal_p (base, cand->iv->base, 0)) 2017 continue; 2018 2019 if (zero_p (cand->iv->step)) 2020 { 2021 if (zero_p (step)) 2022 break; 2023 } 2024 else 2025 { 2026 if (step && operand_equal_p (step, cand->iv->step, 0)) 2027 break; 2028 } 2029 } 2030 2031 if (i == n_iv_cands (data)) 2032 { 2033 cand = xcalloc (1, sizeof (struct iv_cand)); 2034 cand->id = i; 2035 2036 if (!base && !step) 2037 cand->iv = NULL; 2038 else 2039 cand->iv = alloc_iv (base, step); 2040 2041 cand->pos = pos; 2042 if (pos != IP_ORIGINAL && cand->iv) 2043 { 2044 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp"); 2045 cand->var_after = cand->var_before; 2046 } 2047 cand->important = important; 2048 cand->incremented_at = incremented_at; 2049 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand); 2050 2051 if (step 2052 && TREE_CODE (step) != INTEGER_CST) 2053 { 2054 fd_ivopts_data = data; 2055 walk_tree (&step, find_depends, &cand->depends_on, NULL); 2056 } 2057 2058 if (dump_file && (dump_flags & TDF_DETAILS)) 2059 dump_cand (dump_file, cand); 2060 } 2061 2062 if (important && !cand->important) 2063 { 2064 cand->important = true; 2065 if (dump_file && (dump_flags & TDF_DETAILS)) 2066 fprintf (dump_file, "Candidate %d is important\n", cand->id); 2067 } 2068 2069 if (use) 2070 { 2071 bitmap_set_bit (use->related_cands, i); 2072 if (dump_file && (dump_flags & TDF_DETAILS)) 2073 fprintf (dump_file, "Candidate %d is related to use %d\n", 2074 cand->id, use->id); 2075 } 2076 2077 return cand; 2078} 2079 2080/* Returns true if incrementing the induction variable at the end of the LOOP 2081 is allowed. 2082 2083 The purpose is to avoid splitting latch edge with a biv increment, thus 2084 creating a jump, possibly confusing other optimization passes and leaving 2085 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS 2086 is not available (so we do not have a better alternative), or if the latch 2087 edge is already nonempty. */ 2088 2089static bool 2090allow_ip_end_pos_p (struct loop *loop) 2091{ 2092 if (!ip_normal_pos (loop)) 2093 return true; 2094 2095 if (!empty_block_p (ip_end_pos (loop))) 2096 return true; 2097 2098 return false; 2099} 2100 2101/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and 2102 position to POS. If USE is not NULL, the candidate is set as related to 2103 it. The candidate computation is scheduled on all available positions. */ 2104 2105static void 2106add_candidate (struct ivopts_data *data, 2107 tree base, tree step, bool important, struct iv_use *use) 2108{ 2109 if (ip_normal_pos (data->current_loop)) 2110 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE); 2111 if (ip_end_pos (data->current_loop) 2112 && allow_ip_end_pos_p (data->current_loop)) 2113 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE); 2114} 2115 2116/* Add a standard "0 + 1 * iteration" iv candidate for a 2117 type with SIZE bits. */ 2118 2119static void 2120add_standard_iv_candidates_for_size (struct ivopts_data *data, 2121 unsigned int size) 2122{ 2123 tree type = lang_hooks.types.type_for_size (size, true); 2124 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1), 2125 true, NULL); 2126} 2127 2128/* Adds standard iv candidates. */ 2129 2130static void 2131add_standard_iv_candidates (struct ivopts_data *data) 2132{ 2133 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE); 2134 2135 /* The same for a double-integer type if it is still fast enough. */ 2136 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2) 2137 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2); 2138} 2139 2140 2141/* Adds candidates bases on the old induction variable IV. */ 2142 2143static void 2144add_old_iv_candidates (struct ivopts_data *data, struct iv *iv) 2145{ 2146 tree phi, def; 2147 struct iv_cand *cand; 2148 2149 add_candidate (data, iv->base, iv->step, true, NULL); 2150 2151 /* The same, but with initial value zero. */ 2152 add_candidate (data, 2153 build_int_cst (TREE_TYPE (iv->base), 0), 2154 iv->step, true, NULL); 2155 2156 phi = SSA_NAME_DEF_STMT (iv->ssa_name); 2157 if (TREE_CODE (phi) == PHI_NODE) 2158 { 2159 /* Additionally record the possibility of leaving the original iv 2160 untouched. */ 2161 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop)); 2162 cand = add_candidate_1 (data, 2163 iv->base, iv->step, true, IP_ORIGINAL, NULL, 2164 SSA_NAME_DEF_STMT (def)); 2165 cand->var_before = iv->ssa_name; 2166 cand->var_after = def; 2167 } 2168} 2169 2170/* Adds candidates based on the old induction variables. */ 2171 2172static void 2173add_old_ivs_candidates (struct ivopts_data *data) 2174{ 2175 unsigned i; 2176 struct iv *iv; 2177 bitmap_iterator bi; 2178 2179 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 2180 { 2181 iv = ver_info (data, i)->iv; 2182 if (iv && iv->biv_p && !zero_p (iv->step)) 2183 add_old_iv_candidates (data, iv); 2184 } 2185} 2186 2187/* Adds candidates based on the value of the induction variable IV and USE. */ 2188 2189static void 2190add_iv_value_candidates (struct ivopts_data *data, 2191 struct iv *iv, struct iv_use *use) 2192{ 2193 unsigned HOST_WIDE_INT offset; 2194 tree base; 2195 2196 add_candidate (data, iv->base, iv->step, false, use); 2197 2198 /* The same, but with initial value zero. Make such variable important, 2199 since it is generic enough so that possibly many uses may be based 2200 on it. */ 2201 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0), 2202 iv->step, true, use); 2203 2204 /* Third, try removing the constant offset. */ 2205 base = strip_offset (iv->base, &offset); 2206 if (offset) 2207 add_candidate (data, base, iv->step, false, use); 2208} 2209 2210/* Possibly adds pseudocandidate for replacing the final value of USE by 2211 a direct computation. */ 2212 2213static void 2214add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use) 2215{ 2216 /* We must know where we exit the loop and how many times does it roll. */ 2217 if (! niter_for_single_dom_exit (data)) 2218 return; 2219 2220 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE); 2221} 2222 2223/* Adds candidates based on the uses. */ 2224 2225static void 2226add_derived_ivs_candidates (struct ivopts_data *data) 2227{ 2228 unsigned i; 2229 2230 for (i = 0; i < n_iv_uses (data); i++) 2231 { 2232 struct iv_use *use = iv_use (data, i); 2233 2234 if (!use) 2235 continue; 2236 2237 switch (use->type) 2238 { 2239 case USE_NONLINEAR_EXPR: 2240 case USE_COMPARE: 2241 case USE_ADDRESS: 2242 /* Just add the ivs based on the value of the iv used here. */ 2243 add_iv_value_candidates (data, use->iv, use); 2244 break; 2245 2246 case USE_OUTER: 2247 add_iv_value_candidates (data, use->iv, use); 2248 2249 /* Additionally, add the pseudocandidate for the possibility to 2250 replace the final value by a direct computation. */ 2251 add_iv_outer_candidates (data, use); 2252 break; 2253 2254 default: 2255 gcc_unreachable (); 2256 } 2257 } 2258} 2259 2260/* Record important candidates and add them to related_cands bitmaps 2261 if needed. */ 2262 2263static void 2264record_important_candidates (struct ivopts_data *data) 2265{ 2266 unsigned i; 2267 struct iv_use *use; 2268 2269 for (i = 0; i < n_iv_cands (data); i++) 2270 { 2271 struct iv_cand *cand = iv_cand (data, i); 2272 2273 if (cand->important) 2274 bitmap_set_bit (data->important_candidates, i); 2275 } 2276 2277 data->consider_all_candidates = (n_iv_cands (data) 2278 <= CONSIDER_ALL_CANDIDATES_BOUND); 2279 2280 if (data->consider_all_candidates) 2281 { 2282 /* We will not need "related_cands" bitmaps in this case, 2283 so release them to decrease peak memory consumption. */ 2284 for (i = 0; i < n_iv_uses (data); i++) 2285 { 2286 use = iv_use (data, i); 2287 BITMAP_FREE (use->related_cands); 2288 } 2289 } 2290 else 2291 { 2292 /* Add important candidates to the related_cands bitmaps. */ 2293 for (i = 0; i < n_iv_uses (data); i++) 2294 bitmap_ior_into (iv_use (data, i)->related_cands, 2295 data->important_candidates); 2296 } 2297} 2298 2299/* Finds the candidates for the induction variables. */ 2300 2301static void 2302find_iv_candidates (struct ivopts_data *data) 2303{ 2304 /* Add commonly used ivs. */ 2305 add_standard_iv_candidates (data); 2306 2307 /* Add old induction variables. */ 2308 add_old_ivs_candidates (data); 2309 2310 /* Add induction variables derived from uses. */ 2311 add_derived_ivs_candidates (data); 2312 2313 /* Record the important candidates. */ 2314 record_important_candidates (data); 2315} 2316 2317/* Allocates the data structure mapping the (use, candidate) pairs to costs. 2318 If consider_all_candidates is true, we use a two-dimensional array, otherwise 2319 we allocate a simple list to every use. */ 2320 2321static void 2322alloc_use_cost_map (struct ivopts_data *data) 2323{ 2324 unsigned i, size, s, j; 2325 2326 for (i = 0; i < n_iv_uses (data); i++) 2327 { 2328 struct iv_use *use = iv_use (data, i); 2329 bitmap_iterator bi; 2330 2331 if (data->consider_all_candidates) 2332 size = n_iv_cands (data); 2333 else 2334 { 2335 s = 0; 2336 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi) 2337 { 2338 s++; 2339 } 2340 2341 /* Round up to the power of two, so that moduling by it is fast. */ 2342 for (size = 1; size < s; size <<= 1) 2343 continue; 2344 } 2345 2346 use->n_map_members = size; 2347 use->cost_map = xcalloc (size, sizeof (struct cost_pair)); 2348 } 2349} 2350 2351/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends 2352 on invariants DEPENDS_ON and that the value used in expressing it 2353 is VALUE.*/ 2354 2355static void 2356set_use_iv_cost (struct ivopts_data *data, 2357 struct iv_use *use, struct iv_cand *cand, unsigned cost, 2358 bitmap depends_on, tree value) 2359{ 2360 unsigned i, s; 2361 2362 if (cost == INFTY) 2363 { 2364 BITMAP_FREE (depends_on); 2365 return; 2366 } 2367 2368 if (data->consider_all_candidates) 2369 { 2370 use->cost_map[cand->id].cand = cand; 2371 use->cost_map[cand->id].cost = cost; 2372 use->cost_map[cand->id].depends_on = depends_on; 2373 use->cost_map[cand->id].value = value; 2374 return; 2375 } 2376 2377 /* n_map_members is a power of two, so this computes modulo. */ 2378 s = cand->id & (use->n_map_members - 1); 2379 for (i = s; i < use->n_map_members; i++) 2380 if (!use->cost_map[i].cand) 2381 goto found; 2382 for (i = 0; i < s; i++) 2383 if (!use->cost_map[i].cand) 2384 goto found; 2385 2386 gcc_unreachable (); 2387 2388found: 2389 use->cost_map[i].cand = cand; 2390 use->cost_map[i].cost = cost; 2391 use->cost_map[i].depends_on = depends_on; 2392 use->cost_map[i].value = value; 2393} 2394 2395/* Gets cost of (USE, CANDIDATE) pair. */ 2396 2397static struct cost_pair * 2398get_use_iv_cost (struct ivopts_data *data, struct iv_use *use, 2399 struct iv_cand *cand) 2400{ 2401 unsigned i, s; 2402 struct cost_pair *ret; 2403 2404 if (!cand) 2405 return NULL; 2406 2407 if (data->consider_all_candidates) 2408 { 2409 ret = use->cost_map + cand->id; 2410 if (!ret->cand) 2411 return NULL; 2412 2413 return ret; 2414 } 2415 2416 /* n_map_members is a power of two, so this computes modulo. */ 2417 s = cand->id & (use->n_map_members - 1); 2418 for (i = s; i < use->n_map_members; i++) 2419 if (use->cost_map[i].cand == cand) 2420 return use->cost_map + i; 2421 2422 for (i = 0; i < s; i++) 2423 if (use->cost_map[i].cand == cand) 2424 return use->cost_map + i; 2425 2426 return NULL; 2427} 2428 2429/* Returns estimate on cost of computing SEQ. */ 2430 2431static unsigned 2432seq_cost (rtx seq) 2433{ 2434 unsigned cost = 0; 2435 rtx set; 2436 2437 for (; seq; seq = NEXT_INSN (seq)) 2438 { 2439 set = single_set (seq); 2440 if (set) 2441 cost += rtx_cost (set, SET); 2442 else 2443 cost++; 2444 } 2445 2446 return cost; 2447} 2448 2449/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */ 2450static rtx 2451produce_memory_decl_rtl (tree obj, int *regno) 2452{ 2453 rtx x; 2454 2455 gcc_assert (obj); 2456 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj)) 2457 { 2458 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj)); 2459 x = gen_rtx_SYMBOL_REF (Pmode, name); 2460 } 2461 else 2462 x = gen_raw_REG (Pmode, (*regno)++); 2463 2464 return gen_rtx_MEM (DECL_MODE (obj), x); 2465} 2466 2467/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for 2468 walk_tree. DATA contains the actual fake register number. */ 2469 2470static tree 2471prepare_decl_rtl (tree *expr_p, int *ws, void *data) 2472{ 2473 tree obj = NULL_TREE; 2474 rtx x = NULL_RTX; 2475 int *regno = data; 2476 2477 switch (TREE_CODE (*expr_p)) 2478 { 2479 case ADDR_EXPR: 2480 for (expr_p = &TREE_OPERAND (*expr_p, 0); 2481 handled_component_p (*expr_p); 2482 expr_p = &TREE_OPERAND (*expr_p, 0)) 2483 continue; 2484 obj = *expr_p; 2485 if (DECL_P (obj)) 2486 x = produce_memory_decl_rtl (obj, regno); 2487 break; 2488 2489 case SSA_NAME: 2490 *ws = 0; 2491 obj = SSA_NAME_VAR (*expr_p); 2492 if (!DECL_RTL_SET_P (obj)) 2493 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 2494 break; 2495 2496 case VAR_DECL: 2497 case PARM_DECL: 2498 case RESULT_DECL: 2499 *ws = 0; 2500 obj = *expr_p; 2501 2502 if (DECL_RTL_SET_P (obj)) 2503 break; 2504 2505 if (DECL_MODE (obj) == BLKmode) 2506 x = produce_memory_decl_rtl (obj, regno); 2507 else 2508 x = gen_raw_REG (DECL_MODE (obj), (*regno)++); 2509 2510 break; 2511 2512 default: 2513 break; 2514 } 2515 2516 if (x) 2517 { 2518 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj); 2519 SET_DECL_RTL (obj, x); 2520 } 2521 2522 return NULL_TREE; 2523} 2524 2525/* Determines cost of the computation of EXPR. */ 2526 2527static unsigned 2528computation_cost (tree expr) 2529{ 2530 rtx seq, rslt; 2531 tree type = TREE_TYPE (expr); 2532 unsigned cost; 2533 /* Avoid using hard regs in ways which may be unsupported. */ 2534 int regno = LAST_VIRTUAL_REGISTER + 1; 2535 2536 walk_tree (&expr, prepare_decl_rtl, ®no, NULL); 2537 start_sequence (); 2538 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL); 2539 seq = get_insns (); 2540 end_sequence (); 2541 2542 cost = seq_cost (seq); 2543 if (MEM_P (rslt)) 2544 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type)); 2545 2546 return cost; 2547} 2548 2549/* Returns variable containing the value of candidate CAND at statement AT. */ 2550 2551static tree 2552var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt) 2553{ 2554 if (stmt_after_increment (loop, cand, stmt)) 2555 return cand->var_after; 2556 else 2557 return cand->var_before; 2558} 2559 2560/* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb, 2561 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */ 2562 2563int 2564tree_int_cst_sign_bit (tree t) 2565{ 2566 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1; 2567 unsigned HOST_WIDE_INT w; 2568 2569 if (bitno < HOST_BITS_PER_WIDE_INT) 2570 w = TREE_INT_CST_LOW (t); 2571 else 2572 { 2573 w = TREE_INT_CST_HIGH (t); 2574 bitno -= HOST_BITS_PER_WIDE_INT; 2575 } 2576 2577 return (w >> bitno) & 1; 2578} 2579 2580/* If we can prove that TOP = cst * BOT for some constant cst in TYPE, 2581 return cst. Otherwise return NULL_TREE. */ 2582 2583static tree 2584constant_multiple_of (tree type, tree top, tree bot) 2585{ 2586 tree res, mby, p0, p1; 2587 enum tree_code code; 2588 bool negate; 2589 2590 STRIP_NOPS (top); 2591 STRIP_NOPS (bot); 2592 2593 if (operand_equal_p (top, bot, 0)) 2594 return build_int_cst (type, 1); 2595 2596 code = TREE_CODE (top); 2597 switch (code) 2598 { 2599 case MULT_EXPR: 2600 mby = TREE_OPERAND (top, 1); 2601 if (TREE_CODE (mby) != INTEGER_CST) 2602 return NULL_TREE; 2603 2604 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot); 2605 if (!res) 2606 return NULL_TREE; 2607 2608 return fold_binary_to_constant (MULT_EXPR, type, res, 2609 fold_convert (type, mby)); 2610 2611 case PLUS_EXPR: 2612 case MINUS_EXPR: 2613 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot); 2614 if (!p0) 2615 return NULL_TREE; 2616 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot); 2617 if (!p1) 2618 return NULL_TREE; 2619 2620 return fold_binary_to_constant (code, type, p0, p1); 2621 2622 case INTEGER_CST: 2623 if (TREE_CODE (bot) != INTEGER_CST) 2624 return NULL_TREE; 2625 2626 bot = fold_convert (type, bot); 2627 top = fold_convert (type, top); 2628 2629 /* If BOT seems to be negative, try dividing by -BOT instead, and negate 2630 the result afterwards. */ 2631 if (tree_int_cst_sign_bit (bot)) 2632 { 2633 negate = true; 2634 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot); 2635 } 2636 else 2637 negate = false; 2638 2639 /* Ditto for TOP. */ 2640 if (tree_int_cst_sign_bit (top)) 2641 { 2642 negate = !negate; 2643 top = fold_unary_to_constant (NEGATE_EXPR, type, top); 2644 } 2645 2646 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot))) 2647 return NULL_TREE; 2648 2649 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot); 2650 if (negate) 2651 res = fold_unary_to_constant (NEGATE_EXPR, type, res); 2652 return res; 2653 2654 default: 2655 return NULL_TREE; 2656 } 2657} 2658 2659/* Sets COMB to CST. */ 2660 2661static void 2662aff_combination_const (struct affine_tree_combination *comb, tree type, 2663 unsigned HOST_WIDE_INT cst) 2664{ 2665 unsigned prec = TYPE_PRECISION (type); 2666 2667 comb->type = type; 2668 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1); 2669 2670 comb->n = 0; 2671 comb->rest = NULL_TREE; 2672 comb->offset = cst & comb->mask; 2673} 2674 2675/* Sets COMB to single element ELT. */ 2676 2677static void 2678aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt) 2679{ 2680 unsigned prec = TYPE_PRECISION (type); 2681 2682 comb->type = type; 2683 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1); 2684 2685 comb->n = 1; 2686 comb->elts[0] = elt; 2687 comb->coefs[0] = 1; 2688 comb->rest = NULL_TREE; 2689 comb->offset = 0; 2690} 2691 2692/* Scales COMB by SCALE. */ 2693 2694static void 2695aff_combination_scale (struct affine_tree_combination *comb, 2696 unsigned HOST_WIDE_INT scale) 2697{ 2698 unsigned i, j; 2699 2700 if (scale == 1) 2701 return; 2702 2703 if (scale == 0) 2704 { 2705 aff_combination_const (comb, comb->type, 0); 2706 return; 2707 } 2708 2709 comb->offset = (scale * comb->offset) & comb->mask; 2710 for (i = 0, j = 0; i < comb->n; i++) 2711 { 2712 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask; 2713 comb->elts[j] = comb->elts[i]; 2714 if (comb->coefs[j] != 0) 2715 j++; 2716 } 2717 comb->n = j; 2718 2719 if (comb->rest) 2720 { 2721 if (comb->n < MAX_AFF_ELTS) 2722 { 2723 comb->coefs[comb->n] = scale; 2724 comb->elts[comb->n] = comb->rest; 2725 comb->rest = NULL_TREE; 2726 comb->n++; 2727 } 2728 else 2729 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest, 2730 build_int_cst_type (comb->type, scale)); 2731 } 2732} 2733 2734/* Adds ELT * SCALE to COMB. */ 2735 2736static void 2737aff_combination_add_elt (struct affine_tree_combination *comb, tree elt, 2738 unsigned HOST_WIDE_INT scale) 2739{ 2740 unsigned i; 2741 2742 if (scale == 0) 2743 return; 2744 2745 for (i = 0; i < comb->n; i++) 2746 if (operand_equal_p (comb->elts[i], elt, 0)) 2747 { 2748 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask; 2749 if (comb->coefs[i]) 2750 return; 2751 2752 comb->n--; 2753 comb->coefs[i] = comb->coefs[comb->n]; 2754 comb->elts[i] = comb->elts[comb->n]; 2755 2756 if (comb->rest) 2757 { 2758 gcc_assert (comb->n == MAX_AFF_ELTS - 1); 2759 comb->coefs[comb->n] = 1; 2760 comb->elts[comb->n] = comb->rest; 2761 comb->rest = NULL_TREE; 2762 comb->n++; 2763 } 2764 return; 2765 } 2766 if (comb->n < MAX_AFF_ELTS) 2767 { 2768 comb->coefs[comb->n] = scale; 2769 comb->elts[comb->n] = elt; 2770 comb->n++; 2771 return; 2772 } 2773 2774 if (scale == 1) 2775 elt = fold_convert (comb->type, elt); 2776 else 2777 elt = fold_build2 (MULT_EXPR, comb->type, 2778 fold_convert (comb->type, elt), 2779 build_int_cst_type (comb->type, scale)); 2780 2781 if (comb->rest) 2782 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt); 2783 else 2784 comb->rest = elt; 2785} 2786 2787/* Adds COMB2 to COMB1. */ 2788 2789static void 2790aff_combination_add (struct affine_tree_combination *comb1, 2791 struct affine_tree_combination *comb2) 2792{ 2793 unsigned i; 2794 2795 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask; 2796 for (i = 0; i < comb2->n; i++) 2797 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]); 2798 if (comb2->rest) 2799 aff_combination_add_elt (comb1, comb2->rest, 1); 2800} 2801 2802/* Splits EXPR into an affine combination of parts. */ 2803 2804static void 2805tree_to_aff_combination (tree expr, tree type, 2806 struct affine_tree_combination *comb) 2807{ 2808 struct affine_tree_combination tmp; 2809 enum tree_code code; 2810 tree cst, core, toffset; 2811 HOST_WIDE_INT bitpos, bitsize; 2812 enum machine_mode mode; 2813 int unsignedp, volatilep; 2814 2815 STRIP_NOPS (expr); 2816 2817 code = TREE_CODE (expr); 2818 switch (code) 2819 { 2820 case INTEGER_CST: 2821 aff_combination_const (comb, type, int_cst_value (expr)); 2822 return; 2823 2824 case PLUS_EXPR: 2825 case MINUS_EXPR: 2826 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); 2827 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp); 2828 if (code == MINUS_EXPR) 2829 aff_combination_scale (&tmp, -1); 2830 aff_combination_add (comb, &tmp); 2831 return; 2832 2833 case MULT_EXPR: 2834 cst = TREE_OPERAND (expr, 1); 2835 if (TREE_CODE (cst) != INTEGER_CST) 2836 break; 2837 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); 2838 aff_combination_scale (comb, int_cst_value (cst)); 2839 return; 2840 2841 case NEGATE_EXPR: 2842 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb); 2843 aff_combination_scale (comb, -1); 2844 return; 2845 2846 case ADDR_EXPR: 2847 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos, 2848 &toffset, &mode, &unsignedp, &volatilep, 2849 false); 2850 if (bitpos % BITS_PER_UNIT != 0) 2851 break; 2852 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT); 2853 core = build_fold_addr_expr (core); 2854 if (TREE_CODE (core) == ADDR_EXPR) 2855 aff_combination_add_elt (comb, core, 1); 2856 else 2857 { 2858 tree_to_aff_combination (core, type, &tmp); 2859 aff_combination_add (comb, &tmp); 2860 } 2861 if (toffset) 2862 { 2863 tree_to_aff_combination (toffset, type, &tmp); 2864 aff_combination_add (comb, &tmp); 2865 } 2866 return; 2867 2868 default: 2869 break; 2870 } 2871 2872 aff_combination_elt (comb, type, expr); 2873} 2874 2875/* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */ 2876 2877static tree 2878add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale, 2879 unsigned HOST_WIDE_INT mask) 2880{ 2881 enum tree_code code; 2882 2883 scale &= mask; 2884 elt = fold_convert (type, elt); 2885 2886 if (scale == 1) 2887 { 2888 if (!expr) 2889 return elt; 2890 2891 return fold_build2 (PLUS_EXPR, type, expr, elt); 2892 } 2893 2894 if (scale == mask) 2895 { 2896 if (!expr) 2897 return fold_build1 (NEGATE_EXPR, type, elt); 2898 2899 return fold_build2 (MINUS_EXPR, type, expr, elt); 2900 } 2901 2902 if (!expr) 2903 return fold_build2 (MULT_EXPR, type, elt, 2904 build_int_cst_type (type, scale)); 2905 2906 if ((scale | (mask >> 1)) == mask) 2907 { 2908 /* Scale is negative. */ 2909 code = MINUS_EXPR; 2910 scale = (-scale) & mask; 2911 } 2912 else 2913 code = PLUS_EXPR; 2914 2915 elt = fold_build2 (MULT_EXPR, type, elt, 2916 build_int_cst_type (type, scale)); 2917 return fold_build2 (code, type, expr, elt); 2918} 2919 2920/* Copies the tree elements of COMB to ensure that they are not shared. */ 2921 2922static void 2923unshare_aff_combination (struct affine_tree_combination *comb) 2924{ 2925 unsigned i; 2926 2927 for (i = 0; i < comb->n; i++) 2928 comb->elts[i] = unshare_expr (comb->elts[i]); 2929 if (comb->rest) 2930 comb->rest = unshare_expr (comb->rest); 2931} 2932 2933/* Makes tree from the affine combination COMB. */ 2934 2935static tree 2936aff_combination_to_tree (struct affine_tree_combination *comb) 2937{ 2938 tree type = comb->type; 2939 tree expr = comb->rest; 2940 unsigned i; 2941 unsigned HOST_WIDE_INT off, sgn; 2942 2943 /* Handle the special case produced by get_computation_aff when 2944 the type does not fit in HOST_WIDE_INT. */ 2945 if (comb->n == 0 && comb->offset == 0) 2946 return fold_convert (type, expr); 2947 2948 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE); 2949 2950 for (i = 0; i < comb->n; i++) 2951 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i], 2952 comb->mask); 2953 2954 if ((comb->offset | (comb->mask >> 1)) == comb->mask) 2955 { 2956 /* Offset is negative. */ 2957 off = (-comb->offset) & comb->mask; 2958 sgn = comb->mask; 2959 } 2960 else 2961 { 2962 off = comb->offset; 2963 sgn = 1; 2964 } 2965 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn, 2966 comb->mask); 2967} 2968 2969/* Determines the expression by that USE is expressed from induction variable 2970 CAND at statement AT in LOOP. The expression is stored in a decomposed 2971 form into AFF. Returns false if USE cannot be expressed using CAND. */ 2972 2973static bool 2974get_computation_aff (struct loop *loop, 2975 struct iv_use *use, struct iv_cand *cand, tree at, 2976 struct affine_tree_combination *aff) 2977{ 2978 tree ubase = use->iv->base; 2979 tree ustep = use->iv->step; 2980 tree cbase = cand->iv->base; 2981 tree cstep = cand->iv->step; 2982 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase); 2983 tree uutype; 2984 tree expr, delta; 2985 tree ratio; 2986 unsigned HOST_WIDE_INT ustepi, cstepi; 2987 HOST_WIDE_INT ratioi; 2988 struct affine_tree_combination cbase_aff, expr_aff; 2989 tree cstep_orig = cstep, ustep_orig = ustep; 2990 2991 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 2992 { 2993 /* We do not have a precision to express the values of use. */ 2994 return false; 2995 } 2996 2997 expr = var_at_stmt (loop, cand, at); 2998 2999 if (TREE_TYPE (expr) != ctype) 3000 { 3001 /* This may happen with the original ivs. */ 3002 expr = fold_convert (ctype, expr); 3003 } 3004 3005 if (TYPE_UNSIGNED (utype)) 3006 uutype = utype; 3007 else 3008 { 3009 uutype = unsigned_type_for (utype); 3010 ubase = fold_convert (uutype, ubase); 3011 ustep = fold_convert (uutype, ustep); 3012 } 3013 3014 if (uutype != ctype) 3015 { 3016 expr = fold_convert (uutype, expr); 3017 cbase = fold_convert (uutype, cbase); 3018 cstep = fold_convert (uutype, cstep); 3019 3020 /* If the conversion is not noop, we must take it into account when 3021 considering the value of the step. */ 3022 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype)) 3023 cstep_orig = cstep; 3024 } 3025 3026 if (cst_and_fits_in_hwi (cstep_orig) 3027 && cst_and_fits_in_hwi (ustep_orig)) 3028 { 3029 ustepi = int_cst_value (ustep_orig); 3030 cstepi = int_cst_value (cstep_orig); 3031 3032 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi)) 3033 { 3034 /* TODO maybe consider case when ustep divides cstep and the ratio is 3035 a power of 2 (so that the division is fast to execute)? We would 3036 need to be much more careful with overflows etc. then. */ 3037 return false; 3038 } 3039 3040 ratio = build_int_cst_type (uutype, ratioi); 3041 } 3042 else 3043 { 3044 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig); 3045 if (!ratio) 3046 return false; 3047 3048 /* Ratioi is only used to detect special cases when the multiplicative 3049 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT, 3050 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value 3051 to integer_onep/integer_all_onesp, since the former ignores 3052 TREE_OVERFLOW. */ 3053 if (cst_and_fits_in_hwi (ratio)) 3054 ratioi = int_cst_value (ratio); 3055 else if (integer_onep (ratio)) 3056 ratioi = 1; 3057 else if (integer_all_onesp (ratio)) 3058 ratioi = -1; 3059 else 3060 ratioi = 0; 3061 } 3062 3063 /* We may need to shift the value if we are after the increment. */ 3064 if (stmt_after_increment (loop, cand, at)) 3065 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep); 3066 3067 /* use = ubase - ratio * cbase + ratio * var. 3068 3069 In general case ubase + ratio * (var - cbase) could be better (one less 3070 multiplication), but often it is possible to eliminate redundant parts 3071 of computations from (ubase - ratio * cbase) term, and if it does not 3072 happen, fold is able to apply the distributive law to obtain this form 3073 anyway. */ 3074 3075 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT) 3076 { 3077 /* Let's compute in trees and just return the result in AFF. This case 3078 should not be very common, and fold itself is not that bad either, 3079 so making the aff. functions more complicated to handle this case 3080 is not that urgent. */ 3081 if (ratioi == 1) 3082 { 3083 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase); 3084 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta); 3085 } 3086 else if (ratioi == -1) 3087 { 3088 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase); 3089 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr); 3090 } 3091 else 3092 { 3093 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio); 3094 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta); 3095 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr); 3096 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr); 3097 } 3098 3099 aff->type = uutype; 3100 aff->n = 0; 3101 aff->offset = 0; 3102 aff->mask = 0; 3103 aff->rest = expr; 3104 return true; 3105 } 3106 3107 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be 3108 possible to compute ratioi. */ 3109 gcc_assert (ratioi); 3110 3111 tree_to_aff_combination (ubase, uutype, aff); 3112 tree_to_aff_combination (cbase, uutype, &cbase_aff); 3113 tree_to_aff_combination (expr, uutype, &expr_aff); 3114 aff_combination_scale (&cbase_aff, -ratioi); 3115 aff_combination_scale (&expr_aff, ratioi); 3116 aff_combination_add (aff, &cbase_aff); 3117 aff_combination_add (aff, &expr_aff); 3118 3119 return true; 3120} 3121 3122/* Determines the expression by that USE is expressed from induction variable 3123 CAND at statement AT in LOOP. The computation is unshared. */ 3124 3125static tree 3126get_computation_at (struct loop *loop, 3127 struct iv_use *use, struct iv_cand *cand, tree at) 3128{ 3129 struct affine_tree_combination aff; 3130 tree type = TREE_TYPE (use->iv->base); 3131 3132 if (!get_computation_aff (loop, use, cand, at, &aff)) 3133 return NULL_TREE; 3134 unshare_aff_combination (&aff); 3135 return fold_convert (type, aff_combination_to_tree (&aff)); 3136} 3137 3138/* Determines the expression by that USE is expressed from induction variable 3139 CAND in LOOP. The computation is unshared. */ 3140 3141static tree 3142get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand) 3143{ 3144 return get_computation_at (loop, use, cand, use->stmt); 3145} 3146 3147/* Returns cost of addition in MODE. */ 3148 3149static unsigned 3150add_cost (enum machine_mode mode) 3151{ 3152 static unsigned costs[NUM_MACHINE_MODES]; 3153 rtx seq; 3154 unsigned cost; 3155 3156 if (costs[mode]) 3157 return costs[mode]; 3158 3159 start_sequence (); 3160 force_operand (gen_rtx_fmt_ee (PLUS, mode, 3161 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1), 3162 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)), 3163 NULL_RTX); 3164 seq = get_insns (); 3165 end_sequence (); 3166 3167 cost = seq_cost (seq); 3168 if (!cost) 3169 cost = 1; 3170 3171 costs[mode] = cost; 3172 3173 if (dump_file && (dump_flags & TDF_DETAILS)) 3174 fprintf (dump_file, "Addition in %s costs %d\n", 3175 GET_MODE_NAME (mode), cost); 3176 return cost; 3177} 3178 3179/* Entry in a hashtable of already known costs for multiplication. */ 3180struct mbc_entry 3181{ 3182 HOST_WIDE_INT cst; /* The constant to multiply by. */ 3183 enum machine_mode mode; /* In mode. */ 3184 unsigned cost; /* The cost. */ 3185}; 3186 3187/* Counts hash value for the ENTRY. */ 3188 3189static hashval_t 3190mbc_entry_hash (const void *entry) 3191{ 3192 const struct mbc_entry *e = entry; 3193 3194 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877); 3195} 3196 3197/* Compares the hash table entries ENTRY1 and ENTRY2. */ 3198 3199static int 3200mbc_entry_eq (const void *entry1, const void *entry2) 3201{ 3202 const struct mbc_entry *e1 = entry1; 3203 const struct mbc_entry *e2 = entry2; 3204 3205 return (e1->mode == e2->mode 3206 && e1->cst == e2->cst); 3207} 3208 3209/* Returns cost of multiplication by constant CST in MODE. */ 3210 3211unsigned 3212multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode) 3213{ 3214 static htab_t costs; 3215 struct mbc_entry **cached, act; 3216 rtx seq; 3217 unsigned cost; 3218 3219 if (!costs) 3220 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free); 3221 3222 act.mode = mode; 3223 act.cst = cst; 3224 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT); 3225 if (*cached) 3226 return (*cached)->cost; 3227 3228 *cached = xmalloc (sizeof (struct mbc_entry)); 3229 (*cached)->mode = mode; 3230 (*cached)->cst = cst; 3231 3232 start_sequence (); 3233 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1), 3234 gen_int_mode (cst, mode), NULL_RTX, 0); 3235 seq = get_insns (); 3236 end_sequence (); 3237 3238 cost = seq_cost (seq); 3239 3240 if (dump_file && (dump_flags & TDF_DETAILS)) 3241 fprintf (dump_file, "Multiplication by %d in %s costs %d\n", 3242 (int) cst, GET_MODE_NAME (mode), cost); 3243 3244 (*cached)->cost = cost; 3245 3246 return cost; 3247} 3248 3249/* Returns true if multiplying by RATIO is allowed in address. */ 3250 3251bool 3252multiplier_allowed_in_address_p (HOST_WIDE_INT ratio) 3253{ 3254#define MAX_RATIO 128 3255 static sbitmap valid_mult; 3256 3257 if (!valid_mult) 3258 { 3259 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1); 3260 rtx addr; 3261 HOST_WIDE_INT i; 3262 3263 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1); 3264 sbitmap_zero (valid_mult); 3265 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX); 3266 for (i = -MAX_RATIO; i <= MAX_RATIO; i++) 3267 { 3268 XEXP (addr, 1) = gen_int_mode (i, Pmode); 3269 if (memory_address_p (Pmode, addr)) 3270 SET_BIT (valid_mult, i + MAX_RATIO); 3271 } 3272 3273 if (dump_file && (dump_flags & TDF_DETAILS)) 3274 { 3275 fprintf (dump_file, " allowed multipliers:"); 3276 for (i = -MAX_RATIO; i <= MAX_RATIO; i++) 3277 if (TEST_BIT (valid_mult, i + MAX_RATIO)) 3278 fprintf (dump_file, " %d", (int) i); 3279 fprintf (dump_file, "\n"); 3280 fprintf (dump_file, "\n"); 3281 } 3282 } 3283 3284 if (ratio > MAX_RATIO || ratio < -MAX_RATIO) 3285 return false; 3286 3287 return TEST_BIT (valid_mult, ratio + MAX_RATIO); 3288} 3289 3290/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index. 3291 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false, 3292 variable is omitted. The created memory accesses MODE. 3293 3294 TODO -- there must be some better way. This all is quite crude. */ 3295 3296static unsigned 3297get_address_cost (bool symbol_present, bool var_present, 3298 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio) 3299{ 3300 static bool initialized = false; 3301 static HOST_WIDE_INT rat, off; 3302 static HOST_WIDE_INT min_offset, max_offset; 3303 static unsigned costs[2][2][2][2]; 3304 unsigned cost, acost; 3305 rtx seq, addr, base; 3306 bool offset_p, ratio_p; 3307 rtx reg1; 3308 HOST_WIDE_INT s_offset; 3309 unsigned HOST_WIDE_INT mask; 3310 unsigned bits; 3311 3312 if (!initialized) 3313 { 3314 HOST_WIDE_INT i; 3315 initialized = true; 3316 3317 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1); 3318 3319 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX); 3320 for (i = 1; i <= 1 << 20; i <<= 1) 3321 { 3322 XEXP (addr, 1) = gen_int_mode (i, Pmode); 3323 if (!memory_address_p (Pmode, addr)) 3324 break; 3325 } 3326 max_offset = i >> 1; 3327 off = max_offset; 3328 3329 for (i = 1; i <= 1 << 20; i <<= 1) 3330 { 3331 XEXP (addr, 1) = gen_int_mode (-i, Pmode); 3332 if (!memory_address_p (Pmode, addr)) 3333 break; 3334 } 3335 min_offset = -(i >> 1); 3336 3337 if (dump_file && (dump_flags & TDF_DETAILS)) 3338 { 3339 fprintf (dump_file, "get_address_cost:\n"); 3340 fprintf (dump_file, " min offset %d\n", (int) min_offset); 3341 fprintf (dump_file, " max offset %d\n", (int) max_offset); 3342 } 3343 3344 rat = 1; 3345 for (i = 2; i <= MAX_RATIO; i++) 3346 if (multiplier_allowed_in_address_p (i)) 3347 { 3348 rat = i; 3349 break; 3350 } 3351 } 3352 3353 bits = GET_MODE_BITSIZE (Pmode); 3354 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1); 3355 offset &= mask; 3356 if ((offset >> (bits - 1) & 1)) 3357 offset |= ~mask; 3358 s_offset = offset; 3359 3360 cost = 0; 3361 offset_p = (s_offset != 0 3362 && min_offset <= s_offset && s_offset <= max_offset); 3363 ratio_p = (ratio != 1 3364 && multiplier_allowed_in_address_p (ratio)); 3365 3366 if (ratio != 1 && !ratio_p) 3367 cost += multiply_by_cost (ratio, Pmode); 3368 3369 if (s_offset && !offset_p && !symbol_present) 3370 { 3371 cost += add_cost (Pmode); 3372 var_present = true; 3373 } 3374 3375 acost = costs[symbol_present][var_present][offset_p][ratio_p]; 3376 if (!acost) 3377 { 3378 int old_cse_not_expected; 3379 acost = 0; 3380 3381 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1); 3382 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2); 3383 if (ratio_p) 3384 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode)); 3385 3386 if (var_present) 3387 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1); 3388 3389 if (symbol_present) 3390 { 3391 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup ("")); 3392 if (offset_p) 3393 base = gen_rtx_fmt_e (CONST, Pmode, 3394 gen_rtx_fmt_ee (PLUS, Pmode, 3395 base, 3396 gen_int_mode (off, Pmode))); 3397 } 3398 else if (offset_p) 3399 base = gen_int_mode (off, Pmode); 3400 else 3401 base = NULL_RTX; 3402 3403 if (base) 3404 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base); 3405 3406 start_sequence (); 3407 /* To avoid splitting addressing modes, pretend that no cse will 3408 follow. */ 3409 old_cse_not_expected = cse_not_expected; 3410 cse_not_expected = true; 3411 addr = memory_address (Pmode, addr); 3412 cse_not_expected = old_cse_not_expected; 3413 seq = get_insns (); 3414 end_sequence (); 3415 3416 acost = seq_cost (seq); 3417 acost += address_cost (addr, Pmode); 3418 3419 if (!acost) 3420 acost = 1; 3421 costs[symbol_present][var_present][offset_p][ratio_p] = acost; 3422 } 3423 3424 return cost + acost; 3425} 3426 3427/* Estimates cost of forcing expression EXPR into a variable. */ 3428 3429unsigned 3430force_expr_to_var_cost (tree expr) 3431{ 3432 static bool costs_initialized = false; 3433 static unsigned integer_cost; 3434 static unsigned symbol_cost; 3435 static unsigned address_cost; 3436 tree op0, op1; 3437 unsigned cost0, cost1, cost; 3438 enum machine_mode mode; 3439 3440 if (!costs_initialized) 3441 { 3442 tree var = create_tmp_var_raw (integer_type_node, "test_var"); 3443 rtx x = gen_rtx_MEM (DECL_MODE (var), 3444 gen_rtx_SYMBOL_REF (Pmode, "test_var")); 3445 tree addr; 3446 tree type = build_pointer_type (integer_type_node); 3447 3448 integer_cost = computation_cost (build_int_cst_type (integer_type_node, 3449 2000)); 3450 3451 SET_DECL_RTL (var, x); 3452 TREE_STATIC (var) = 1; 3453 addr = build1 (ADDR_EXPR, type, var); 3454 symbol_cost = computation_cost (addr) + 1; 3455 3456 address_cost 3457 = computation_cost (build2 (PLUS_EXPR, type, 3458 addr, 3459 build_int_cst_type (type, 2000))) + 1; 3460 if (dump_file && (dump_flags & TDF_DETAILS)) 3461 { 3462 fprintf (dump_file, "force_expr_to_var_cost:\n"); 3463 fprintf (dump_file, " integer %d\n", (int) integer_cost); 3464 fprintf (dump_file, " symbol %d\n", (int) symbol_cost); 3465 fprintf (dump_file, " address %d\n", (int) address_cost); 3466 fprintf (dump_file, " other %d\n", (int) target_spill_cost); 3467 fprintf (dump_file, "\n"); 3468 } 3469 3470 costs_initialized = true; 3471 } 3472 3473 STRIP_NOPS (expr); 3474 3475 if (SSA_VAR_P (expr)) 3476 return 0; 3477 3478 if (TREE_INVARIANT (expr)) 3479 { 3480 if (TREE_CODE (expr) == INTEGER_CST) 3481 return integer_cost; 3482 3483 if (TREE_CODE (expr) == ADDR_EXPR) 3484 { 3485 tree obj = TREE_OPERAND (expr, 0); 3486 3487 if (TREE_CODE (obj) == VAR_DECL 3488 || TREE_CODE (obj) == PARM_DECL 3489 || TREE_CODE (obj) == RESULT_DECL) 3490 return symbol_cost; 3491 } 3492 3493 return address_cost; 3494 } 3495 3496 switch (TREE_CODE (expr)) 3497 { 3498 case PLUS_EXPR: 3499 case MINUS_EXPR: 3500 case MULT_EXPR: 3501 op0 = TREE_OPERAND (expr, 0); 3502 op1 = TREE_OPERAND (expr, 1); 3503 STRIP_NOPS (op0); 3504 STRIP_NOPS (op1); 3505 3506 if (is_gimple_val (op0)) 3507 cost0 = 0; 3508 else 3509 cost0 = force_expr_to_var_cost (op0); 3510 3511 if (is_gimple_val (op1)) 3512 cost1 = 0; 3513 else 3514 cost1 = force_expr_to_var_cost (op1); 3515 3516 break; 3517 3518 default: 3519 /* Just an arbitrary value, FIXME. */ 3520 return target_spill_cost; 3521 } 3522 3523 mode = TYPE_MODE (TREE_TYPE (expr)); 3524 switch (TREE_CODE (expr)) 3525 { 3526 case PLUS_EXPR: 3527 case MINUS_EXPR: 3528 cost = add_cost (mode); 3529 break; 3530 3531 case MULT_EXPR: 3532 if (cst_and_fits_in_hwi (op0)) 3533 cost = multiply_by_cost (int_cst_value (op0), mode); 3534 else if (cst_and_fits_in_hwi (op1)) 3535 cost = multiply_by_cost (int_cst_value (op1), mode); 3536 else 3537 return target_spill_cost; 3538 break; 3539 3540 default: 3541 gcc_unreachable (); 3542 } 3543 3544 cost += cost0; 3545 cost += cost1; 3546 3547 /* Bound the cost by target_spill_cost. The parts of complicated 3548 computations often are either loop invariant or at least can 3549 be shared between several iv uses, so letting this grow without 3550 limits would not give reasonable results. */ 3551 return cost < target_spill_cost ? cost : target_spill_cost; 3552} 3553 3554/* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the 3555 invariants the computation depends on. */ 3556 3557static unsigned 3558force_var_cost (struct ivopts_data *data, 3559 tree expr, bitmap *depends_on) 3560{ 3561 if (depends_on) 3562 { 3563 fd_ivopts_data = data; 3564 walk_tree (&expr, find_depends, depends_on, NULL); 3565 } 3566 3567 return force_expr_to_var_cost (expr); 3568} 3569 3570/* Estimates cost of expressing address ADDR as var + symbol + offset. The 3571 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set 3572 to false if the corresponding part is missing. DEPENDS_ON is a set of the 3573 invariants the computation depends on. */ 3574 3575static unsigned 3576split_address_cost (struct ivopts_data *data, 3577 tree addr, bool *symbol_present, bool *var_present, 3578 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3579{ 3580 tree core; 3581 HOST_WIDE_INT bitsize; 3582 HOST_WIDE_INT bitpos; 3583 tree toffset; 3584 enum machine_mode mode; 3585 int unsignedp, volatilep; 3586 3587 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode, 3588 &unsignedp, &volatilep, false); 3589 3590 if (toffset != 0 3591 || bitpos % BITS_PER_UNIT != 0 3592 || TREE_CODE (core) != VAR_DECL) 3593 { 3594 *symbol_present = false; 3595 *var_present = true; 3596 fd_ivopts_data = data; 3597 walk_tree (&addr, find_depends, depends_on, NULL); 3598 return target_spill_cost; 3599 } 3600 3601 *offset += bitpos / BITS_PER_UNIT; 3602 if (TREE_STATIC (core) 3603 || DECL_EXTERNAL (core)) 3604 { 3605 *symbol_present = true; 3606 *var_present = false; 3607 return 0; 3608 } 3609 3610 *symbol_present = false; 3611 *var_present = true; 3612 return 0; 3613} 3614 3615/* Estimates cost of expressing difference of addresses E1 - E2 as 3616 var + symbol + offset. The value of offset is added to OFFSET, 3617 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding 3618 part is missing. DEPENDS_ON is a set of the invariants the computation 3619 depends on. */ 3620 3621static unsigned 3622ptr_difference_cost (struct ivopts_data *data, 3623 tree e1, tree e2, bool *symbol_present, bool *var_present, 3624 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3625{ 3626 HOST_WIDE_INT diff = 0; 3627 unsigned cost; 3628 3629 gcc_assert (TREE_CODE (e1) == ADDR_EXPR); 3630 3631 if (ptr_difference_const (e1, e2, &diff)) 3632 { 3633 *offset += diff; 3634 *symbol_present = false; 3635 *var_present = false; 3636 return 0; 3637 } 3638 3639 if (e2 == integer_zero_node) 3640 return split_address_cost (data, TREE_OPERAND (e1, 0), 3641 symbol_present, var_present, offset, depends_on); 3642 3643 *symbol_present = false; 3644 *var_present = true; 3645 3646 cost = force_var_cost (data, e1, depends_on); 3647 cost += force_var_cost (data, e2, depends_on); 3648 cost += add_cost (Pmode); 3649 3650 return cost; 3651} 3652 3653/* Estimates cost of expressing difference E1 - E2 as 3654 var + symbol + offset. The value of offset is added to OFFSET, 3655 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding 3656 part is missing. DEPENDS_ON is a set of the invariants the computation 3657 depends on. */ 3658 3659static unsigned 3660difference_cost (struct ivopts_data *data, 3661 tree e1, tree e2, bool *symbol_present, bool *var_present, 3662 unsigned HOST_WIDE_INT *offset, bitmap *depends_on) 3663{ 3664 unsigned cost; 3665 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1)); 3666 unsigned HOST_WIDE_INT off1, off2; 3667 3668 e1 = strip_offset (e1, &off1); 3669 e2 = strip_offset (e2, &off2); 3670 *offset += off1 - off2; 3671 3672 STRIP_NOPS (e1); 3673 STRIP_NOPS (e2); 3674 3675 if (TREE_CODE (e1) == ADDR_EXPR) 3676 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset, 3677 depends_on); 3678 *symbol_present = false; 3679 3680 if (operand_equal_p (e1, e2, 0)) 3681 { 3682 *var_present = false; 3683 return 0; 3684 } 3685 *var_present = true; 3686 if (zero_p (e2)) 3687 return force_var_cost (data, e1, depends_on); 3688 3689 if (zero_p (e1)) 3690 { 3691 cost = force_var_cost (data, e2, depends_on); 3692 cost += multiply_by_cost (-1, mode); 3693 3694 return cost; 3695 } 3696 3697 cost = force_var_cost (data, e1, depends_on); 3698 cost += force_var_cost (data, e2, depends_on); 3699 cost += add_cost (mode); 3700 3701 return cost; 3702} 3703 3704/* Determines the cost of the computation by that USE is expressed 3705 from induction variable CAND. If ADDRESS_P is true, we just need 3706 to create an address from it, otherwise we want to get it into 3707 register. A set of invariants we depend on is stored in 3708 DEPENDS_ON. AT is the statement at that the value is computed. */ 3709 3710static unsigned 3711get_computation_cost_at (struct ivopts_data *data, 3712 struct iv_use *use, struct iv_cand *cand, 3713 bool address_p, bitmap *depends_on, tree at) 3714{ 3715 tree ubase = use->iv->base, ustep = use->iv->step; 3716 tree cbase, cstep; 3717 tree utype = TREE_TYPE (ubase), ctype; 3718 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0; 3719 HOST_WIDE_INT ratio, aratio; 3720 bool var_present, symbol_present; 3721 unsigned cost = 0, n_sums; 3722 3723 *depends_on = NULL; 3724 3725 /* Only consider real candidates. */ 3726 if (!cand->iv) 3727 return INFTY; 3728 3729 cbase = cand->iv->base; 3730 cstep = cand->iv->step; 3731 ctype = TREE_TYPE (cbase); 3732 3733 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype)) 3734 { 3735 /* We do not have a precision to express the values of use. */ 3736 return INFTY; 3737 } 3738 3739 if (address_p) 3740 { 3741 /* Do not try to express address of an object with computation based 3742 on address of a different object. This may cause problems in rtl 3743 level alias analysis (that does not expect this to be happening, 3744 as this is illegal in C), and would be unlikely to be useful 3745 anyway. */ 3746 if (use->iv->base_object 3747 && cand->iv->base_object 3748 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0)) 3749 return INFTY; 3750 } 3751 3752 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype)) 3753 { 3754 /* TODO -- add direct handling of this case. */ 3755 goto fallback; 3756 } 3757 3758 /* CSTEPI is removed from the offset in case statement is after the 3759 increment. If the step is not constant, we use zero instead. 3760 This is a bit imprecise (there is the extra addition), but 3761 redundancy elimination is likely to transform the code so that 3762 it uses value of the variable before increment anyway, 3763 so it is not that much unrealistic. */ 3764 if (cst_and_fits_in_hwi (cstep)) 3765 cstepi = int_cst_value (cstep); 3766 else 3767 cstepi = 0; 3768 3769 if (cst_and_fits_in_hwi (ustep) 3770 && cst_and_fits_in_hwi (cstep)) 3771 { 3772 ustepi = int_cst_value (ustep); 3773 3774 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio)) 3775 return INFTY; 3776 } 3777 else 3778 { 3779 tree rat; 3780 3781 rat = constant_multiple_of (utype, ustep, cstep); 3782 3783 if (!rat) 3784 return INFTY; 3785 3786 if (cst_and_fits_in_hwi (rat)) 3787 ratio = int_cst_value (rat); 3788 else if (integer_onep (rat)) 3789 ratio = 1; 3790 else if (integer_all_onesp (rat)) 3791 ratio = -1; 3792 else 3793 return INFTY; 3794 } 3795 3796 /* use = ubase + ratio * (var - cbase). If either cbase is a constant 3797 or ratio == 1, it is better to handle this like 3798 3799 ubase - ratio * cbase + ratio * var 3800 3801 (also holds in the case ratio == -1, TODO. */ 3802 3803 if (cst_and_fits_in_hwi (cbase)) 3804 { 3805 offset = - ratio * int_cst_value (cbase); 3806 cost += difference_cost (data, 3807 ubase, integer_zero_node, 3808 &symbol_present, &var_present, &offset, 3809 depends_on); 3810 } 3811 else if (ratio == 1) 3812 { 3813 cost += difference_cost (data, 3814 ubase, cbase, 3815 &symbol_present, &var_present, &offset, 3816 depends_on); 3817 } 3818 else 3819 { 3820 cost += force_var_cost (data, cbase, depends_on); 3821 cost += add_cost (TYPE_MODE (ctype)); 3822 cost += difference_cost (data, 3823 ubase, integer_zero_node, 3824 &symbol_present, &var_present, &offset, 3825 depends_on); 3826 } 3827 3828 /* If we are after the increment, the value of the candidate is higher by 3829 one iteration. */ 3830 if (stmt_after_increment (data->current_loop, cand, at)) 3831 offset -= ratio * cstepi; 3832 3833 /* Now the computation is in shape symbol + var1 + const + ratio * var2. 3834 (symbol/var/const parts may be omitted). If we are looking for an address, 3835 find the cost of addressing this. */ 3836 if (address_p) 3837 return cost + get_address_cost (symbol_present, var_present, offset, ratio); 3838 3839 /* Otherwise estimate the costs for computing the expression. */ 3840 aratio = ratio > 0 ? ratio : -ratio; 3841 if (!symbol_present && !var_present && !offset) 3842 { 3843 if (ratio != 1) 3844 cost += multiply_by_cost (ratio, TYPE_MODE (ctype)); 3845 3846 return cost; 3847 } 3848 3849 if (aratio != 1) 3850 cost += multiply_by_cost (aratio, TYPE_MODE (ctype)); 3851 3852 n_sums = 1; 3853 if (var_present 3854 /* Symbol + offset should be compile-time computable. */ 3855 && (symbol_present || offset)) 3856 n_sums++; 3857 3858 return cost + n_sums * add_cost (TYPE_MODE (ctype)); 3859 3860fallback: 3861 { 3862 /* Just get the expression, expand it and measure the cost. */ 3863 tree comp = get_computation_at (data->current_loop, use, cand, at); 3864 3865 if (!comp) 3866 return INFTY; 3867 3868 if (address_p) 3869 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp); 3870 3871 return computation_cost (comp); 3872 } 3873} 3874 3875/* Determines the cost of the computation by that USE is expressed 3876 from induction variable CAND. If ADDRESS_P is true, we just need 3877 to create an address from it, otherwise we want to get it into 3878 register. A set of invariants we depend on is stored in 3879 DEPENDS_ON. */ 3880 3881static unsigned 3882get_computation_cost (struct ivopts_data *data, 3883 struct iv_use *use, struct iv_cand *cand, 3884 bool address_p, bitmap *depends_on) 3885{ 3886 return get_computation_cost_at (data, 3887 use, cand, address_p, depends_on, use->stmt); 3888} 3889 3890/* Determines cost of basing replacement of USE on CAND in a generic 3891 expression. */ 3892 3893static bool 3894determine_use_iv_cost_generic (struct ivopts_data *data, 3895 struct iv_use *use, struct iv_cand *cand) 3896{ 3897 bitmap depends_on; 3898 unsigned cost; 3899 3900 /* The simple case first -- if we need to express value of the preserved 3901 original biv, the cost is 0. This also prevents us from counting the 3902 cost of increment twice -- once at this use and once in the cost of 3903 the candidate. */ 3904 if (cand->pos == IP_ORIGINAL 3905 && cand->incremented_at == use->stmt) 3906 { 3907 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE); 3908 return true; 3909 } 3910 3911 cost = get_computation_cost (data, use, cand, false, &depends_on); 3912 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE); 3913 3914 return cost != INFTY; 3915} 3916 3917/* Determines cost of basing replacement of USE on CAND in an address. */ 3918 3919static bool 3920determine_use_iv_cost_address (struct ivopts_data *data, 3921 struct iv_use *use, struct iv_cand *cand) 3922{ 3923 bitmap depends_on; 3924 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on); 3925 3926 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE); 3927 3928 return cost != INFTY; 3929} 3930 3931/* Computes value of induction variable IV in iteration NITER. */ 3932 3933static tree 3934iv_value (struct iv *iv, tree niter) 3935{ 3936 tree val; 3937 tree type = TREE_TYPE (iv->base); 3938 3939 niter = fold_convert (type, niter); 3940 val = fold_build2 (MULT_EXPR, type, iv->step, niter); 3941 3942 return fold_build2 (PLUS_EXPR, type, iv->base, val); 3943} 3944 3945/* Computes value of candidate CAND at position AT in iteration NITER. */ 3946 3947static tree 3948cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter) 3949{ 3950 tree val = iv_value (cand->iv, niter); 3951 tree type = TREE_TYPE (cand->iv->base); 3952 3953 if (stmt_after_increment (loop, cand, at)) 3954 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step); 3955 3956 return val; 3957} 3958 3959/* Returns period of induction variable iv. */ 3960 3961static tree 3962iv_period (struct iv *iv) 3963{ 3964 tree step = iv->step, period, type; 3965 tree pow2div; 3966 3967 gcc_assert (step && TREE_CODE (step) == INTEGER_CST); 3968 3969 /* Period of the iv is gcd (step, type range). Since type range is power 3970 of two, it suffices to determine the maximum power of two that divides 3971 step. */ 3972 pow2div = num_ending_zeros (step); 3973 type = unsigned_type_for (TREE_TYPE (step)); 3974 3975 period = build_low_bits_mask (type, 3976 (TYPE_PRECISION (type) 3977 - tree_low_cst (pow2div, 1))); 3978 3979 return period; 3980} 3981 3982/* Returns the comparison operator used when eliminating the iv USE. */ 3983 3984static enum tree_code 3985iv_elimination_compare (struct ivopts_data *data, struct iv_use *use) 3986{ 3987 struct loop *loop = data->current_loop; 3988 basic_block ex_bb; 3989 edge exit; 3990 3991 ex_bb = bb_for_stmt (use->stmt); 3992 exit = EDGE_SUCC (ex_bb, 0); 3993 if (flow_bb_inside_loop_p (loop, exit->dest)) 3994 exit = EDGE_SUCC (ex_bb, 1); 3995 3996 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR); 3997} 3998 3999/* Check whether it is possible to express the condition in USE by comparison 4000 of candidate CAND. If so, store the value compared with to BOUND. */ 4001 4002static bool 4003may_eliminate_iv (struct ivopts_data *data, 4004 struct iv_use *use, struct iv_cand *cand, tree *bound) 4005{ 4006 basic_block ex_bb; 4007 edge exit; 4008 tree nit, nit_type; 4009 tree wider_type, period, per_type; 4010 struct loop *loop = data->current_loop; 4011 4012 if (TREE_CODE (cand->iv->step) != INTEGER_CST) 4013 return false; 4014 4015 /* For now works only for exits that dominate the loop latch. TODO -- extend 4016 for other conditions inside loop body. */ 4017 ex_bb = bb_for_stmt (use->stmt); 4018 if (use->stmt != last_stmt (ex_bb) 4019 || TREE_CODE (use->stmt) != COND_EXPR) 4020 return false; 4021 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb)) 4022 return false; 4023 4024 exit = EDGE_SUCC (ex_bb, 0); 4025 if (flow_bb_inside_loop_p (loop, exit->dest)) 4026 exit = EDGE_SUCC (ex_bb, 1); 4027 if (flow_bb_inside_loop_p (loop, exit->dest)) 4028 return false; 4029 4030 nit = niter_for_exit (data, exit); 4031 if (!nit) 4032 return false; 4033 4034 nit_type = TREE_TYPE (nit); 4035 4036 /* Determine whether we may use the variable to test whether niter iterations 4037 elapsed. This is the case iff the period of the induction variable is 4038 greater than the number of iterations. */ 4039 period = iv_period (cand->iv); 4040 if (!period) 4041 return false; 4042 per_type = TREE_TYPE (period); 4043 4044 wider_type = TREE_TYPE (period); 4045 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type)) 4046 wider_type = per_type; 4047 else 4048 wider_type = nit_type; 4049 4050 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node, 4051 fold_convert (wider_type, period), 4052 fold_convert (wider_type, nit)))) 4053 return false; 4054 4055 *bound = cand_value_at (loop, cand, use->stmt, nit); 4056 return true; 4057} 4058 4059/* Determines cost of basing replacement of USE on CAND in a condition. */ 4060 4061static bool 4062determine_use_iv_cost_condition (struct ivopts_data *data, 4063 struct iv_use *use, struct iv_cand *cand) 4064{ 4065 tree bound = NULL_TREE, op, cond; 4066 bitmap depends_on = NULL; 4067 unsigned cost; 4068 4069 /* Only consider real candidates. */ 4070 if (!cand->iv) 4071 { 4072 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE); 4073 return false; 4074 } 4075 4076 if (may_eliminate_iv (data, use, cand, &bound)) 4077 { 4078 cost = force_var_cost (data, bound, &depends_on); 4079 4080 set_use_iv_cost (data, use, cand, cost, depends_on, bound); 4081 return cost != INFTY; 4082 } 4083 4084 /* The induction variable elimination failed; just express the original 4085 giv. If it is compared with an invariant, note that we cannot get 4086 rid of it. */ 4087 cost = get_computation_cost (data, use, cand, false, &depends_on); 4088 4089 cond = *use->op_p; 4090 if (TREE_CODE (cond) != SSA_NAME) 4091 { 4092 op = TREE_OPERAND (cond, 0); 4093 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step)) 4094 op = TREE_OPERAND (cond, 1); 4095 if (TREE_CODE (op) == SSA_NAME) 4096 { 4097 op = get_iv (data, op)->base; 4098 fd_ivopts_data = data; 4099 walk_tree (&op, find_depends, &depends_on, NULL); 4100 } 4101 } 4102 4103 set_use_iv_cost (data, use, cand, cost, depends_on, NULL); 4104 return cost != INFTY; 4105} 4106 4107/* Checks whether it is possible to replace the final value of USE by 4108 a direct computation. If so, the formula is stored to *VALUE. */ 4109 4110static bool 4111may_replace_final_value (struct ivopts_data *data, struct iv_use *use, 4112 tree *value) 4113{ 4114 struct loop *loop = data->current_loop; 4115 edge exit; 4116 tree nit; 4117 4118 exit = single_dom_exit (loop); 4119 if (!exit) 4120 return false; 4121 4122 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src, 4123 bb_for_stmt (use->stmt))); 4124 4125 nit = niter_for_single_dom_exit (data); 4126 if (!nit) 4127 return false; 4128 4129 *value = iv_value (use->iv, nit); 4130 4131 return true; 4132} 4133 4134/* Determines cost of replacing final value of USE using CAND. */ 4135 4136static bool 4137determine_use_iv_cost_outer (struct ivopts_data *data, 4138 struct iv_use *use, struct iv_cand *cand) 4139{ 4140 bitmap depends_on; 4141 unsigned cost; 4142 edge exit; 4143 tree value = NULL_TREE; 4144 struct loop *loop = data->current_loop; 4145 4146 /* The simple case first -- if we need to express value of the preserved 4147 original biv, the cost is 0. This also prevents us from counting the 4148 cost of increment twice -- once at this use and once in the cost of 4149 the candidate. */ 4150 if (cand->pos == IP_ORIGINAL 4151 && cand->incremented_at == use->stmt) 4152 { 4153 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE); 4154 return true; 4155 } 4156 4157 if (!cand->iv) 4158 { 4159 if (!may_replace_final_value (data, use, &value)) 4160 { 4161 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE); 4162 return false; 4163 } 4164 4165 depends_on = NULL; 4166 cost = force_var_cost (data, value, &depends_on); 4167 4168 cost /= AVG_LOOP_NITER (loop); 4169 4170 set_use_iv_cost (data, use, cand, cost, depends_on, value); 4171 return cost != INFTY; 4172 } 4173 4174 exit = single_dom_exit (loop); 4175 if (exit) 4176 { 4177 /* If there is just a single exit, we may use value of the candidate 4178 after we take it to determine the value of use. */ 4179 cost = get_computation_cost_at (data, use, cand, false, &depends_on, 4180 last_stmt (exit->src)); 4181 if (cost != INFTY) 4182 cost /= AVG_LOOP_NITER (loop); 4183 } 4184 else 4185 { 4186 /* Otherwise we just need to compute the iv. */ 4187 cost = get_computation_cost (data, use, cand, false, &depends_on); 4188 } 4189 4190 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE); 4191 4192 return cost != INFTY; 4193} 4194 4195/* Determines cost of basing replacement of USE on CAND. Returns false 4196 if USE cannot be based on CAND. */ 4197 4198static bool 4199determine_use_iv_cost (struct ivopts_data *data, 4200 struct iv_use *use, struct iv_cand *cand) 4201{ 4202 switch (use->type) 4203 { 4204 case USE_NONLINEAR_EXPR: 4205 return determine_use_iv_cost_generic (data, use, cand); 4206 4207 case USE_OUTER: 4208 return determine_use_iv_cost_outer (data, use, cand); 4209 4210 case USE_ADDRESS: 4211 return determine_use_iv_cost_address (data, use, cand); 4212 4213 case USE_COMPARE: 4214 return determine_use_iv_cost_condition (data, use, cand); 4215 4216 default: 4217 gcc_unreachable (); 4218 } 4219} 4220 4221/* Determines costs of basing the use of the iv on an iv candidate. */ 4222 4223static void 4224determine_use_iv_costs (struct ivopts_data *data) 4225{ 4226 unsigned i, j; 4227 struct iv_use *use; 4228 struct iv_cand *cand; 4229 bitmap to_clear = BITMAP_ALLOC (NULL); 4230 4231 alloc_use_cost_map (data); 4232 4233 for (i = 0; i < n_iv_uses (data); i++) 4234 { 4235 use = iv_use (data, i); 4236 4237 if (data->consider_all_candidates) 4238 { 4239 for (j = 0; j < n_iv_cands (data); j++) 4240 { 4241 cand = iv_cand (data, j); 4242 determine_use_iv_cost (data, use, cand); 4243 } 4244 } 4245 else 4246 { 4247 bitmap_iterator bi; 4248 4249 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi) 4250 { 4251 cand = iv_cand (data, j); 4252 if (!determine_use_iv_cost (data, use, cand)) 4253 bitmap_set_bit (to_clear, j); 4254 } 4255 4256 /* Remove the candidates for that the cost is infinite from 4257 the list of related candidates. */ 4258 bitmap_and_compl_into (use->related_cands, to_clear); 4259 bitmap_clear (to_clear); 4260 } 4261 } 4262 4263 BITMAP_FREE (to_clear); 4264 4265 if (dump_file && (dump_flags & TDF_DETAILS)) 4266 { 4267 fprintf (dump_file, "Use-candidate costs:\n"); 4268 4269 for (i = 0; i < n_iv_uses (data); i++) 4270 { 4271 use = iv_use (data, i); 4272 4273 fprintf (dump_file, "Use %d:\n", i); 4274 fprintf (dump_file, " cand\tcost\tdepends on\n"); 4275 for (j = 0; j < use->n_map_members; j++) 4276 { 4277 if (!use->cost_map[j].cand 4278 || use->cost_map[j].cost == INFTY) 4279 continue; 4280 4281 fprintf (dump_file, " %d\t%d\t", 4282 use->cost_map[j].cand->id, 4283 use->cost_map[j].cost); 4284 if (use->cost_map[j].depends_on) 4285 bitmap_print (dump_file, 4286 use->cost_map[j].depends_on, "",""); 4287 fprintf (dump_file, "\n"); 4288 } 4289 4290 fprintf (dump_file, "\n"); 4291 } 4292 fprintf (dump_file, "\n"); 4293 } 4294} 4295 4296/* Determines cost of the candidate CAND. */ 4297 4298static void 4299determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand) 4300{ 4301 unsigned cost_base, cost_step; 4302 tree base; 4303 4304 if (!cand->iv) 4305 { 4306 cand->cost = 0; 4307 return; 4308 } 4309 4310 /* There are two costs associated with the candidate -- its increment 4311 and its initialization. The second is almost negligible for any loop 4312 that rolls enough, so we take it just very little into account. */ 4313 4314 base = cand->iv->base; 4315 cost_base = force_var_cost (data, base, NULL); 4316 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base))); 4317 4318 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop); 4319 4320 /* Prefer the original iv unless we may gain something by replacing it; 4321 this is not really relevant for artificial ivs created by other 4322 passes. */ 4323 if (cand->pos == IP_ORIGINAL 4324 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before))) 4325 cand->cost--; 4326 4327 /* Prefer not to insert statements into latch unless there are some 4328 already (so that we do not create unnecessary jumps). */ 4329 if (cand->pos == IP_END 4330 && empty_block_p (ip_end_pos (data->current_loop))) 4331 cand->cost++; 4332} 4333 4334/* Determines costs of computation of the candidates. */ 4335 4336static void 4337determine_iv_costs (struct ivopts_data *data) 4338{ 4339 unsigned i; 4340 4341 if (dump_file && (dump_flags & TDF_DETAILS)) 4342 { 4343 fprintf (dump_file, "Candidate costs:\n"); 4344 fprintf (dump_file, " cand\tcost\n"); 4345 } 4346 4347 for (i = 0; i < n_iv_cands (data); i++) 4348 { 4349 struct iv_cand *cand = iv_cand (data, i); 4350 4351 determine_iv_cost (data, cand); 4352 4353 if (dump_file && (dump_flags & TDF_DETAILS)) 4354 fprintf (dump_file, " %d\t%d\n", i, cand->cost); 4355 } 4356 4357if (dump_file && (dump_flags & TDF_DETAILS)) 4358 fprintf (dump_file, "\n"); 4359} 4360 4361/* Calculates cost for having SIZE induction variables. */ 4362 4363static unsigned 4364ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size) 4365{ 4366 return global_cost_for_size (size, 4367 loop_data (data->current_loop)->regs_used, 4368 n_iv_uses (data)); 4369} 4370 4371/* For each size of the induction variable set determine the penalty. */ 4372 4373static void 4374determine_set_costs (struct ivopts_data *data) 4375{ 4376 unsigned j, n; 4377 tree phi, op; 4378 struct loop *loop = data->current_loop; 4379 bitmap_iterator bi; 4380 4381 /* We use the following model (definitely improvable, especially the 4382 cost function -- TODO): 4383 4384 We estimate the number of registers available (using MD data), name it A. 4385 4386 We estimate the number of registers used by the loop, name it U. This 4387 number is obtained as the number of loop phi nodes (not counting virtual 4388 registers and bivs) + the number of variables from outside of the loop. 4389 4390 We set a reserve R (free regs that are used for temporary computations, 4391 etc.). For now the reserve is a constant 3. 4392 4393 Let I be the number of induction variables. 4394 4395 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage 4396 make a lot of ivs without a reason). 4397 -- if A - R < U + I <= A, the cost is I * PRES_COST 4398 -- if U + I > A, the cost is I * PRES_COST and 4399 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */ 4400 4401 if (dump_file && (dump_flags & TDF_DETAILS)) 4402 { 4403 fprintf (dump_file, "Global costs:\n"); 4404 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs); 4405 fprintf (dump_file, " target_small_cost %d\n", target_small_cost); 4406 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost); 4407 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost); 4408 } 4409 4410 n = 0; 4411 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi)) 4412 { 4413 op = PHI_RESULT (phi); 4414 4415 if (!is_gimple_reg (op)) 4416 continue; 4417 4418 if (get_iv (data, op)) 4419 continue; 4420 4421 n++; 4422 } 4423 4424 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 4425 { 4426 struct version_info *info = ver_info (data, j); 4427 4428 if (info->inv_id && info->has_nonlin_use) 4429 n++; 4430 } 4431 4432 loop_data (loop)->regs_used = n; 4433 if (dump_file && (dump_flags & TDF_DETAILS)) 4434 fprintf (dump_file, " regs_used %d\n", n); 4435 4436 if (dump_file && (dump_flags & TDF_DETAILS)) 4437 { 4438 fprintf (dump_file, " cost for size:\n"); 4439 fprintf (dump_file, " ivs\tcost\n"); 4440 for (j = 0; j <= 2 * target_avail_regs; j++) 4441 fprintf (dump_file, " %d\t%d\n", j, 4442 ivopts_global_cost_for_size (data, j)); 4443 fprintf (dump_file, "\n"); 4444 } 4445} 4446 4447/* Returns true if A is a cheaper cost pair than B. */ 4448 4449static bool 4450cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b) 4451{ 4452 if (!a) 4453 return false; 4454 4455 if (!b) 4456 return true; 4457 4458 if (a->cost < b->cost) 4459 return true; 4460 4461 if (a->cost > b->cost) 4462 return false; 4463 4464 /* In case the costs are the same, prefer the cheaper candidate. */ 4465 if (a->cand->cost < b->cand->cost) 4466 return true; 4467 4468 return false; 4469} 4470 4471/* Computes the cost field of IVS structure. */ 4472 4473static void 4474iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs) 4475{ 4476 unsigned cost = 0; 4477 4478 cost += ivs->cand_use_cost; 4479 cost += ivs->cand_cost; 4480 cost += ivopts_global_cost_for_size (data, ivs->n_regs); 4481 4482 ivs->cost = cost; 4483} 4484 4485/* Remove invariants in set INVS to set IVS. */ 4486 4487static void 4488iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs) 4489{ 4490 bitmap_iterator bi; 4491 unsigned iid; 4492 4493 if (!invs) 4494 return; 4495 4496 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 4497 { 4498 ivs->n_invariant_uses[iid]--; 4499 if (ivs->n_invariant_uses[iid] == 0) 4500 ivs->n_regs--; 4501 } 4502} 4503 4504/* Set USE not to be expressed by any candidate in IVS. */ 4505 4506static void 4507iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs, 4508 struct iv_use *use) 4509{ 4510 unsigned uid = use->id, cid; 4511 struct cost_pair *cp; 4512 4513 cp = ivs->cand_for_use[uid]; 4514 if (!cp) 4515 return; 4516 cid = cp->cand->id; 4517 4518 ivs->bad_uses++; 4519 ivs->cand_for_use[uid] = NULL; 4520 ivs->n_cand_uses[cid]--; 4521 4522 if (ivs->n_cand_uses[cid] == 0) 4523 { 4524 bitmap_clear_bit (ivs->cands, cid); 4525 /* Do not count the pseudocandidates. */ 4526 if (cp->cand->iv) 4527 ivs->n_regs--; 4528 ivs->n_cands--; 4529 ivs->cand_cost -= cp->cand->cost; 4530 4531 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on); 4532 } 4533 4534 ivs->cand_use_cost -= cp->cost; 4535 4536 iv_ca_set_remove_invariants (ivs, cp->depends_on); 4537 iv_ca_recount_cost (data, ivs); 4538} 4539 4540/* Add invariants in set INVS to set IVS. */ 4541 4542static void 4543iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs) 4544{ 4545 bitmap_iterator bi; 4546 unsigned iid; 4547 4548 if (!invs) 4549 return; 4550 4551 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi) 4552 { 4553 ivs->n_invariant_uses[iid]++; 4554 if (ivs->n_invariant_uses[iid] == 1) 4555 ivs->n_regs++; 4556 } 4557} 4558 4559/* Set cost pair for USE in set IVS to CP. */ 4560 4561static void 4562iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs, 4563 struct iv_use *use, struct cost_pair *cp) 4564{ 4565 unsigned uid = use->id, cid; 4566 4567 if (ivs->cand_for_use[uid] == cp) 4568 return; 4569 4570 if (ivs->cand_for_use[uid]) 4571 iv_ca_set_no_cp (data, ivs, use); 4572 4573 if (cp) 4574 { 4575 cid = cp->cand->id; 4576 4577 ivs->bad_uses--; 4578 ivs->cand_for_use[uid] = cp; 4579 ivs->n_cand_uses[cid]++; 4580 if (ivs->n_cand_uses[cid] == 1) 4581 { 4582 bitmap_set_bit (ivs->cands, cid); 4583 /* Do not count the pseudocandidates. */ 4584 if (cp->cand->iv) 4585 ivs->n_regs++; 4586 ivs->n_cands++; 4587 ivs->cand_cost += cp->cand->cost; 4588 4589 iv_ca_set_add_invariants (ivs, cp->cand->depends_on); 4590 } 4591 4592 ivs->cand_use_cost += cp->cost; 4593 iv_ca_set_add_invariants (ivs, cp->depends_on); 4594 iv_ca_recount_cost (data, ivs); 4595 } 4596} 4597 4598/* Extend set IVS by expressing USE by some of the candidates in it 4599 if possible. */ 4600 4601static void 4602iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs, 4603 struct iv_use *use) 4604{ 4605 struct cost_pair *best_cp = NULL, *cp; 4606 bitmap_iterator bi; 4607 unsigned i; 4608 4609 gcc_assert (ivs->upto >= use->id); 4610 4611 if (ivs->upto == use->id) 4612 { 4613 ivs->upto++; 4614 ivs->bad_uses++; 4615 } 4616 4617 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 4618 { 4619 cp = get_use_iv_cost (data, use, iv_cand (data, i)); 4620 4621 if (cheaper_cost_pair (cp, best_cp)) 4622 best_cp = cp; 4623 } 4624 4625 iv_ca_set_cp (data, ivs, use, best_cp); 4626} 4627 4628/* Get cost for assignment IVS. */ 4629 4630static unsigned 4631iv_ca_cost (struct iv_ca *ivs) 4632{ 4633 return (ivs->bad_uses ? INFTY : ivs->cost); 4634} 4635 4636/* Returns true if all dependences of CP are among invariants in IVS. */ 4637 4638static bool 4639iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp) 4640{ 4641 unsigned i; 4642 bitmap_iterator bi; 4643 4644 if (!cp->depends_on) 4645 return true; 4646 4647 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi) 4648 { 4649 if (ivs->n_invariant_uses[i] == 0) 4650 return false; 4651 } 4652 4653 return true; 4654} 4655 4656/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains 4657 it before NEXT_CHANGE. */ 4658 4659static struct iv_ca_delta * 4660iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp, 4661 struct cost_pair *new_cp, struct iv_ca_delta *next_change) 4662{ 4663 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta)); 4664 4665 change->use = use; 4666 change->old_cp = old_cp; 4667 change->new_cp = new_cp; 4668 change->next_change = next_change; 4669 4670 return change; 4671} 4672 4673/* Joins two lists of changes L1 and L2. Destructive -- old lists 4674 are rewritten. */ 4675 4676static struct iv_ca_delta * 4677iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2) 4678{ 4679 struct iv_ca_delta *last; 4680 4681 if (!l2) 4682 return l1; 4683 4684 if (!l1) 4685 return l2; 4686 4687 for (last = l1; last->next_change; last = last->next_change) 4688 continue; 4689 last->next_change = l2; 4690 4691 return l1; 4692} 4693 4694/* Returns candidate by that USE is expressed in IVS. */ 4695 4696static struct cost_pair * 4697iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use) 4698{ 4699 return ivs->cand_for_use[use->id]; 4700} 4701 4702/* Reverse the list of changes DELTA, forming the inverse to it. */ 4703 4704static struct iv_ca_delta * 4705iv_ca_delta_reverse (struct iv_ca_delta *delta) 4706{ 4707 struct iv_ca_delta *act, *next, *prev = NULL; 4708 struct cost_pair *tmp; 4709 4710 for (act = delta; act; act = next) 4711 { 4712 next = act->next_change; 4713 act->next_change = prev; 4714 prev = act; 4715 4716 tmp = act->old_cp; 4717 act->old_cp = act->new_cp; 4718 act->new_cp = tmp; 4719 } 4720 4721 return prev; 4722} 4723 4724/* Commit changes in DELTA to IVS. If FORWARD is false, the changes are 4725 reverted instead. */ 4726 4727static void 4728iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs, 4729 struct iv_ca_delta *delta, bool forward) 4730{ 4731 struct cost_pair *from, *to; 4732 struct iv_ca_delta *act; 4733 4734 if (!forward) 4735 delta = iv_ca_delta_reverse (delta); 4736 4737 for (act = delta; act; act = act->next_change) 4738 { 4739 from = act->old_cp; 4740 to = act->new_cp; 4741 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from); 4742 iv_ca_set_cp (data, ivs, act->use, to); 4743 } 4744 4745 if (!forward) 4746 iv_ca_delta_reverse (delta); 4747} 4748 4749/* Returns true if CAND is used in IVS. */ 4750 4751static bool 4752iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand) 4753{ 4754 return ivs->n_cand_uses[cand->id] > 0; 4755} 4756 4757/* Returns number of induction variable candidates in the set IVS. */ 4758 4759static unsigned 4760iv_ca_n_cands (struct iv_ca *ivs) 4761{ 4762 return ivs->n_cands; 4763} 4764 4765/* Free the list of changes DELTA. */ 4766 4767static void 4768iv_ca_delta_free (struct iv_ca_delta **delta) 4769{ 4770 struct iv_ca_delta *act, *next; 4771 4772 for (act = *delta; act; act = next) 4773 { 4774 next = act->next_change; 4775 free (act); 4776 } 4777 4778 *delta = NULL; 4779} 4780 4781/* Allocates new iv candidates assignment. */ 4782 4783static struct iv_ca * 4784iv_ca_new (struct ivopts_data *data) 4785{ 4786 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca)); 4787 4788 nw->upto = 0; 4789 nw->bad_uses = 0; 4790 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *)); 4791 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned)); 4792 nw->cands = BITMAP_ALLOC (NULL); 4793 nw->n_cands = 0; 4794 nw->n_regs = 0; 4795 nw->cand_use_cost = 0; 4796 nw->cand_cost = 0; 4797 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned)); 4798 nw->cost = 0; 4799 4800 return nw; 4801} 4802 4803/* Free memory occupied by the set IVS. */ 4804 4805static void 4806iv_ca_free (struct iv_ca **ivs) 4807{ 4808 free ((*ivs)->cand_for_use); 4809 free ((*ivs)->n_cand_uses); 4810 BITMAP_FREE ((*ivs)->cands); 4811 free ((*ivs)->n_invariant_uses); 4812 free (*ivs); 4813 *ivs = NULL; 4814} 4815 4816/* Dumps IVS to FILE. */ 4817 4818static void 4819iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs) 4820{ 4821 const char *pref = " invariants "; 4822 unsigned i; 4823 4824 fprintf (file, " cost %d\n", iv_ca_cost (ivs)); 4825 bitmap_print (file, ivs->cands, " candidates ","\n"); 4826 4827 for (i = 1; i <= data->max_inv_id; i++) 4828 if (ivs->n_invariant_uses[i]) 4829 { 4830 fprintf (file, "%s%d", pref, i); 4831 pref = ", "; 4832 } 4833 fprintf (file, "\n"); 4834} 4835 4836/* Try changing candidate in IVS to CAND for each use. Return cost of the 4837 new set, and store differences in DELTA. Number of induction variables 4838 in the new set is stored to N_IVS. */ 4839 4840static unsigned 4841iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs, 4842 struct iv_cand *cand, struct iv_ca_delta **delta, 4843 unsigned *n_ivs) 4844{ 4845 unsigned i, cost; 4846 struct iv_use *use; 4847 struct cost_pair *old_cp, *new_cp; 4848 4849 *delta = NULL; 4850 for (i = 0; i < ivs->upto; i++) 4851 { 4852 use = iv_use (data, i); 4853 old_cp = iv_ca_cand_for_use (ivs, use); 4854 4855 if (old_cp 4856 && old_cp->cand == cand) 4857 continue; 4858 4859 new_cp = get_use_iv_cost (data, use, cand); 4860 if (!new_cp) 4861 continue; 4862 4863 if (!iv_ca_has_deps (ivs, new_cp)) 4864 continue; 4865 4866 if (!cheaper_cost_pair (new_cp, old_cp)) 4867 continue; 4868 4869 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta); 4870 } 4871 4872 iv_ca_delta_commit (data, ivs, *delta, true); 4873 cost = iv_ca_cost (ivs); 4874 if (n_ivs) 4875 *n_ivs = iv_ca_n_cands (ivs); 4876 iv_ca_delta_commit (data, ivs, *delta, false); 4877 4878 return cost; 4879} 4880 4881/* Try narrowing set IVS by removing CAND. Return the cost of 4882 the new set and store the differences in DELTA. */ 4883 4884static unsigned 4885iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs, 4886 struct iv_cand *cand, struct iv_ca_delta **delta) 4887{ 4888 unsigned i, ci; 4889 struct iv_use *use; 4890 struct cost_pair *old_cp, *new_cp, *cp; 4891 bitmap_iterator bi; 4892 struct iv_cand *cnd; 4893 unsigned cost; 4894 4895 *delta = NULL; 4896 for (i = 0; i < n_iv_uses (data); i++) 4897 { 4898 use = iv_use (data, i); 4899 4900 old_cp = iv_ca_cand_for_use (ivs, use); 4901 if (old_cp->cand != cand) 4902 continue; 4903 4904 new_cp = NULL; 4905 4906 if (data->consider_all_candidates) 4907 { 4908 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi) 4909 { 4910 if (ci == cand->id) 4911 continue; 4912 4913 cnd = iv_cand (data, ci); 4914 4915 cp = get_use_iv_cost (data, use, cnd); 4916 if (!cp) 4917 continue; 4918 if (!iv_ca_has_deps (ivs, cp)) 4919 continue; 4920 4921 if (!cheaper_cost_pair (cp, new_cp)) 4922 continue; 4923 4924 new_cp = cp; 4925 } 4926 } 4927 else 4928 { 4929 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi) 4930 { 4931 if (ci == cand->id) 4932 continue; 4933 4934 cnd = iv_cand (data, ci); 4935 4936 cp = get_use_iv_cost (data, use, cnd); 4937 if (!cp) 4938 continue; 4939 if (!iv_ca_has_deps (ivs, cp)) 4940 continue; 4941 4942 if (!cheaper_cost_pair (cp, new_cp)) 4943 continue; 4944 4945 new_cp = cp; 4946 } 4947 } 4948 4949 if (!new_cp) 4950 { 4951 iv_ca_delta_free (delta); 4952 return INFTY; 4953 } 4954 4955 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta); 4956 } 4957 4958 iv_ca_delta_commit (data, ivs, *delta, true); 4959 cost = iv_ca_cost (ivs); 4960 iv_ca_delta_commit (data, ivs, *delta, false); 4961 4962 return cost; 4963} 4964 4965/* Try optimizing the set of candidates IVS by removing candidates different 4966 from to EXCEPT_CAND from it. Return cost of the new set, and store 4967 differences in DELTA. */ 4968 4969static unsigned 4970iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs, 4971 struct iv_cand *except_cand, struct iv_ca_delta **delta) 4972{ 4973 bitmap_iterator bi; 4974 struct iv_ca_delta *act_delta, *best_delta; 4975 unsigned i, best_cost, acost; 4976 struct iv_cand *cand; 4977 4978 best_delta = NULL; 4979 best_cost = iv_ca_cost (ivs); 4980 4981 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi) 4982 { 4983 cand = iv_cand (data, i); 4984 4985 if (cand == except_cand) 4986 continue; 4987 4988 acost = iv_ca_narrow (data, ivs, cand, &act_delta); 4989 4990 if (acost < best_cost) 4991 { 4992 best_cost = acost; 4993 iv_ca_delta_free (&best_delta); 4994 best_delta = act_delta; 4995 } 4996 else 4997 iv_ca_delta_free (&act_delta); 4998 } 4999 5000 if (!best_delta) 5001 { 5002 *delta = NULL; 5003 return best_cost; 5004 } 5005 5006 /* Recurse to possibly remove other unnecessary ivs. */ 5007 iv_ca_delta_commit (data, ivs, best_delta, true); 5008 best_cost = iv_ca_prune (data, ivs, except_cand, delta); 5009 iv_ca_delta_commit (data, ivs, best_delta, false); 5010 *delta = iv_ca_delta_join (best_delta, *delta); 5011 return best_cost; 5012} 5013 5014/* Tries to extend the sets IVS in the best possible way in order 5015 to express the USE. */ 5016 5017static bool 5018try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs, 5019 struct iv_use *use) 5020{ 5021 unsigned best_cost, act_cost; 5022 unsigned i; 5023 bitmap_iterator bi; 5024 struct iv_cand *cand; 5025 struct iv_ca_delta *best_delta = NULL, *act_delta; 5026 struct cost_pair *cp; 5027 5028 iv_ca_add_use (data, ivs, use); 5029 best_cost = iv_ca_cost (ivs); 5030 5031 cp = iv_ca_cand_for_use (ivs, use); 5032 if (cp) 5033 { 5034 best_delta = iv_ca_delta_add (use, NULL, cp, NULL); 5035 iv_ca_set_no_cp (data, ivs, use); 5036 } 5037 5038 /* First try important candidates. Only if it fails, try the specific ones. 5039 Rationale -- in loops with many variables the best choice often is to use 5040 just one generic biv. If we added here many ivs specific to the uses, 5041 the optimization algorithm later would be likely to get stuck in a local 5042 minimum, thus causing us to create too many ivs. The approach from 5043 few ivs to more seems more likely to be successful -- starting from few 5044 ivs, replacing an expensive use by a specific iv should always be a 5045 win. */ 5046 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi) 5047 { 5048 cand = iv_cand (data, i); 5049 5050 if (iv_ca_cand_used_p (ivs, cand)) 5051 continue; 5052 5053 cp = get_use_iv_cost (data, use, cand); 5054 if (!cp) 5055 continue; 5056 5057 iv_ca_set_cp (data, ivs, use, cp); 5058 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL); 5059 iv_ca_set_no_cp (data, ivs, use); 5060 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta); 5061 5062 if (act_cost < best_cost) 5063 { 5064 best_cost = act_cost; 5065 5066 iv_ca_delta_free (&best_delta); 5067 best_delta = act_delta; 5068 } 5069 else 5070 iv_ca_delta_free (&act_delta); 5071 } 5072 5073 if (best_cost == INFTY) 5074 { 5075 for (i = 0; i < use->n_map_members; i++) 5076 { 5077 cp = use->cost_map + i; 5078 cand = cp->cand; 5079 if (!cand) 5080 continue; 5081 5082 /* Already tried this. */ 5083 if (cand->important) 5084 continue; 5085 5086 if (iv_ca_cand_used_p (ivs, cand)) 5087 continue; 5088 5089 act_delta = NULL; 5090 iv_ca_set_cp (data, ivs, use, cp); 5091 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL); 5092 iv_ca_set_no_cp (data, ivs, use); 5093 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use), 5094 cp, act_delta); 5095 5096 if (act_cost < best_cost) 5097 { 5098 best_cost = act_cost; 5099 5100 if (best_delta) 5101 iv_ca_delta_free (&best_delta); 5102 best_delta = act_delta; 5103 } 5104 else 5105 iv_ca_delta_free (&act_delta); 5106 } 5107 } 5108 5109 iv_ca_delta_commit (data, ivs, best_delta, true); 5110 iv_ca_delta_free (&best_delta); 5111 5112 return (best_cost != INFTY); 5113} 5114 5115/* Finds an initial assignment of candidates to uses. */ 5116 5117static struct iv_ca * 5118get_initial_solution (struct ivopts_data *data) 5119{ 5120 struct iv_ca *ivs = iv_ca_new (data); 5121 unsigned i; 5122 5123 for (i = 0; i < n_iv_uses (data); i++) 5124 if (!try_add_cand_for (data, ivs, iv_use (data, i))) 5125 { 5126 iv_ca_free (&ivs); 5127 return NULL; 5128 } 5129 5130 return ivs; 5131} 5132 5133/* Tries to improve set of induction variables IVS. */ 5134 5135static bool 5136try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs) 5137{ 5138 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs; 5139 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta; 5140 struct iv_cand *cand; 5141 5142 /* Try extending the set of induction variables by one. */ 5143 for (i = 0; i < n_iv_cands (data); i++) 5144 { 5145 cand = iv_cand (data, i); 5146 5147 if (iv_ca_cand_used_p (ivs, cand)) 5148 continue; 5149 5150 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs); 5151 if (!act_delta) 5152 continue; 5153 5154 /* If we successfully added the candidate and the set is small enough, 5155 try optimizing it by removing other candidates. */ 5156 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND) 5157 { 5158 iv_ca_delta_commit (data, ivs, act_delta, true); 5159 acost = iv_ca_prune (data, ivs, cand, &tmp_delta); 5160 iv_ca_delta_commit (data, ivs, act_delta, false); 5161 act_delta = iv_ca_delta_join (act_delta, tmp_delta); 5162 } 5163 5164 if (acost < best_cost) 5165 { 5166 best_cost = acost; 5167 iv_ca_delta_free (&best_delta); 5168 best_delta = act_delta; 5169 } 5170 else 5171 iv_ca_delta_free (&act_delta); 5172 } 5173 5174 if (!best_delta) 5175 { 5176 /* Try removing the candidates from the set instead. */ 5177 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta); 5178 5179 /* Nothing more we can do. */ 5180 if (!best_delta) 5181 return false; 5182 } 5183 5184 iv_ca_delta_commit (data, ivs, best_delta, true); 5185 gcc_assert (best_cost == iv_ca_cost (ivs)); 5186 iv_ca_delta_free (&best_delta); 5187 return true; 5188} 5189 5190/* Attempts to find the optimal set of induction variables. We do simple 5191 greedy heuristic -- we try to replace at most one candidate in the selected 5192 solution and remove the unused ivs while this improves the cost. */ 5193 5194static struct iv_ca * 5195find_optimal_iv_set (struct ivopts_data *data) 5196{ 5197 unsigned i; 5198 struct iv_ca *set; 5199 struct iv_use *use; 5200 5201 /* Get the initial solution. */ 5202 set = get_initial_solution (data); 5203 if (!set) 5204 { 5205 if (dump_file && (dump_flags & TDF_DETAILS)) 5206 fprintf (dump_file, "Unable to substitute for ivs, failed.\n"); 5207 return NULL; 5208 } 5209 5210 if (dump_file && (dump_flags & TDF_DETAILS)) 5211 { 5212 fprintf (dump_file, "Initial set of candidates:\n"); 5213 iv_ca_dump (data, dump_file, set); 5214 } 5215 5216 while (try_improve_iv_set (data, set)) 5217 { 5218 if (dump_file && (dump_flags & TDF_DETAILS)) 5219 { 5220 fprintf (dump_file, "Improved to:\n"); 5221 iv_ca_dump (data, dump_file, set); 5222 } 5223 } 5224 5225 if (dump_file && (dump_flags & TDF_DETAILS)) 5226 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set)); 5227 5228 for (i = 0; i < n_iv_uses (data); i++) 5229 { 5230 use = iv_use (data, i); 5231 use->selected = iv_ca_cand_for_use (set, use)->cand; 5232 } 5233 5234 return set; 5235} 5236 5237/* Creates a new induction variable corresponding to CAND. */ 5238 5239static void 5240create_new_iv (struct ivopts_data *data, struct iv_cand *cand) 5241{ 5242 block_stmt_iterator incr_pos; 5243 tree base; 5244 bool after = false; 5245 5246 if (!cand->iv) 5247 return; 5248 5249 switch (cand->pos) 5250 { 5251 case IP_NORMAL: 5252 incr_pos = bsi_last (ip_normal_pos (data->current_loop)); 5253 break; 5254 5255 case IP_END: 5256 incr_pos = bsi_last (ip_end_pos (data->current_loop)); 5257 after = true; 5258 break; 5259 5260 case IP_ORIGINAL: 5261 /* Mark that the iv is preserved. */ 5262 name_info (data, cand->var_before)->preserve_biv = true; 5263 name_info (data, cand->var_after)->preserve_biv = true; 5264 5265 /* Rewrite the increment so that it uses var_before directly. */ 5266 find_interesting_uses_op (data, cand->var_after)->selected = cand; 5267 5268 return; 5269 } 5270 5271 gimple_add_tmp_var (cand->var_before); 5272 add_referenced_tmp_var (cand->var_before); 5273 5274 base = unshare_expr (cand->iv->base); 5275 5276 create_iv (base, unshare_expr (cand->iv->step), 5277 cand->var_before, data->current_loop, 5278 &incr_pos, after, &cand->var_before, &cand->var_after); 5279} 5280 5281/* Creates new induction variables described in SET. */ 5282 5283static void 5284create_new_ivs (struct ivopts_data *data, struct iv_ca *set) 5285{ 5286 unsigned i; 5287 struct iv_cand *cand; 5288 bitmap_iterator bi; 5289 5290 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi) 5291 { 5292 cand = iv_cand (data, i); 5293 create_new_iv (data, cand); 5294 } 5295} 5296 5297/* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME 5298 is true, remove also the ssa name defined by the statement. */ 5299 5300static void 5301remove_statement (tree stmt, bool including_defined_name) 5302{ 5303 if (TREE_CODE (stmt) == PHI_NODE) 5304 { 5305 if (!including_defined_name) 5306 { 5307 /* Prevent the ssa name defined by the statement from being removed. */ 5308 SET_PHI_RESULT (stmt, NULL); 5309 } 5310 remove_phi_node (stmt, NULL_TREE); 5311 } 5312 else 5313 { 5314 block_stmt_iterator bsi = bsi_for_stmt (stmt); 5315 5316 bsi_remove (&bsi); 5317 } 5318} 5319 5320/* Rewrites USE (definition of iv used in a nonlinear expression) 5321 using candidate CAND. */ 5322 5323static void 5324rewrite_use_nonlinear_expr (struct ivopts_data *data, 5325 struct iv_use *use, struct iv_cand *cand) 5326{ 5327 tree comp; 5328 tree op, stmts, tgt, ass; 5329 block_stmt_iterator bsi, pbsi; 5330 5331 /* An important special case -- if we are asked to express value of 5332 the original iv by itself, just exit; there is no need to 5333 introduce a new computation (that might also need casting the 5334 variable to unsigned and back). */ 5335 if (cand->pos == IP_ORIGINAL 5336 && cand->incremented_at == use->stmt) 5337 { 5338 tree step, ctype, utype; 5339 enum tree_code incr_code = PLUS_EXPR; 5340 5341 gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR); 5342 gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after); 5343 5344 step = cand->iv->step; 5345 ctype = TREE_TYPE (step); 5346 utype = TREE_TYPE (cand->var_after); 5347 if (TREE_CODE (step) == NEGATE_EXPR) 5348 { 5349 incr_code = MINUS_EXPR; 5350 step = TREE_OPERAND (step, 0); 5351 } 5352 5353 /* Check whether we may leave the computation unchanged. 5354 This is the case only if it does not rely on other 5355 computations in the loop -- otherwise, the computation 5356 we rely upon may be removed in remove_unused_ivs, 5357 thus leading to ICE. */ 5358 op = TREE_OPERAND (use->stmt, 1); 5359 if (TREE_CODE (op) == PLUS_EXPR 5360 || TREE_CODE (op) == MINUS_EXPR) 5361 { 5362 if (TREE_OPERAND (op, 0) == cand->var_before) 5363 op = TREE_OPERAND (op, 1); 5364 else if (TREE_CODE (op) == PLUS_EXPR 5365 && TREE_OPERAND (op, 1) == cand->var_before) 5366 op = TREE_OPERAND (op, 0); 5367 else 5368 op = NULL_TREE; 5369 } 5370 else 5371 op = NULL_TREE; 5372 5373 if (op 5374 && (TREE_CODE (op) == INTEGER_CST 5375 || operand_equal_p (op, step, 0))) 5376 return; 5377 5378 /* Otherwise, add the necessary computations to express 5379 the iv. */ 5380 op = fold_convert (ctype, cand->var_before); 5381 comp = fold_convert (utype, 5382 build2 (incr_code, ctype, op, 5383 unshare_expr (step))); 5384 } 5385 else 5386 comp = get_computation (data->current_loop, use, cand); 5387 5388 switch (TREE_CODE (use->stmt)) 5389 { 5390 case PHI_NODE: 5391 tgt = PHI_RESULT (use->stmt); 5392 5393 /* If we should keep the biv, do not replace it. */ 5394 if (name_info (data, tgt)->preserve_biv) 5395 return; 5396 5397 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt)); 5398 while (!bsi_end_p (pbsi) 5399 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR) 5400 { 5401 bsi = pbsi; 5402 bsi_next (&pbsi); 5403 } 5404 break; 5405 5406 case MODIFY_EXPR: 5407 tgt = TREE_OPERAND (use->stmt, 0); 5408 bsi = bsi_for_stmt (use->stmt); 5409 break; 5410 5411 default: 5412 gcc_unreachable (); 5413 } 5414 5415 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt)); 5416 5417 if (TREE_CODE (use->stmt) == PHI_NODE) 5418 { 5419 if (stmts) 5420 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING); 5421 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op); 5422 bsi_insert_after (&bsi, ass, BSI_NEW_STMT); 5423 remove_statement (use->stmt, false); 5424 SSA_NAME_DEF_STMT (tgt) = ass; 5425 } 5426 else 5427 { 5428 if (stmts) 5429 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT); 5430 TREE_OPERAND (use->stmt, 1) = op; 5431 } 5432} 5433 5434/* Replaces ssa name in index IDX by its basic variable. Callback for 5435 for_each_index. */ 5436 5437static bool 5438idx_remove_ssa_names (tree base, tree *idx, 5439 void *data ATTRIBUTE_UNUSED) 5440{ 5441 tree *op; 5442 5443 if (TREE_CODE (*idx) == SSA_NAME) 5444 *idx = SSA_NAME_VAR (*idx); 5445 5446 if (TREE_CODE (base) == ARRAY_REF) 5447 { 5448 op = &TREE_OPERAND (base, 2); 5449 if (*op 5450 && TREE_CODE (*op) == SSA_NAME) 5451 *op = SSA_NAME_VAR (*op); 5452 op = &TREE_OPERAND (base, 3); 5453 if (*op 5454 && TREE_CODE (*op) == SSA_NAME) 5455 *op = SSA_NAME_VAR (*op); 5456 } 5457 5458 return true; 5459} 5460 5461/* Unshares REF and replaces ssa names inside it by their basic variables. */ 5462 5463static tree 5464unshare_and_remove_ssa_names (tree ref) 5465{ 5466 ref = unshare_expr (ref); 5467 for_each_index (&ref, idx_remove_ssa_names, NULL); 5468 5469 return ref; 5470} 5471 5472/* Extract the alias analysis info for the memory reference REF. There are 5473 several ways how this information may be stored and what precisely is 5474 its semantics depending on the type of the reference, but there always is 5475 somewhere hidden one _DECL node that is used to determine the set of 5476 virtual operands for the reference. The code below deciphers this jungle 5477 and extracts this single useful piece of information. */ 5478 5479static tree 5480get_ref_tag (tree ref, tree orig) 5481{ 5482 tree var = get_base_address (ref); 5483 tree tag, sv; 5484 unsigned HOST_WIDE_INT offset, size; 5485 5486 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0)) 5487 if (okay_component_ref_for_subvars (sv, &offset, &size)) 5488 break; 5489 5490 if (handled_component_p (sv)) 5491 return unshare_expr (sv); 5492 5493 if (!var) 5494 return NULL_TREE; 5495 5496 if (TREE_CODE (var) == INDIRECT_REF) 5497 { 5498 /* In case the base is a dereference of a pointer, first check its name 5499 mem tag, and if it does not have one, use type mem tag. */ 5500 var = TREE_OPERAND (var, 0); 5501 if (TREE_CODE (var) != SSA_NAME) 5502 return NULL_TREE; 5503 5504 if (SSA_NAME_PTR_INFO (var)) 5505 { 5506 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag; 5507 if (tag) 5508 return tag; 5509 } 5510 5511 var = SSA_NAME_VAR (var); 5512 tag = var_ann (var)->type_mem_tag; 5513 gcc_assert (tag != NULL_TREE); 5514 return tag; 5515 } 5516 else 5517 { 5518 if (!DECL_P (var)) 5519 return NULL_TREE; 5520 5521 tag = var_ann (var)->type_mem_tag; 5522 if (tag) 5523 return tag; 5524 5525 return var; 5526 } 5527} 5528 5529/* Copies the reference information from OLD_REF to NEW_REF. */ 5530 5531static void 5532copy_ref_info (tree new_ref, tree old_ref) 5533{ 5534 if (TREE_CODE (old_ref) == TARGET_MEM_REF) 5535 copy_mem_ref_info (new_ref, old_ref); 5536 else 5537 { 5538 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref); 5539 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref)); 5540 } 5541} 5542 5543/* Rewrites USE (address that is an iv) using candidate CAND. */ 5544 5545static void 5546rewrite_use_address (struct ivopts_data *data, 5547 struct iv_use *use, struct iv_cand *cand) 5548{ 5549 struct affine_tree_combination aff; 5550 block_stmt_iterator bsi = bsi_for_stmt (use->stmt); 5551 tree ref; 5552 5553 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff); 5554 unshare_aff_combination (&aff); 5555 5556 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff); 5557 copy_ref_info (ref, *use->op_p); 5558 *use->op_p = ref; 5559} 5560 5561/* Rewrites USE (the condition such that one of the arguments is an iv) using 5562 candidate CAND. */ 5563 5564static void 5565rewrite_use_compare (struct ivopts_data *data, 5566 struct iv_use *use, struct iv_cand *cand) 5567{ 5568 tree comp; 5569 tree *op_p, cond, op, stmts, bound; 5570 block_stmt_iterator bsi = bsi_for_stmt (use->stmt); 5571 enum tree_code compare; 5572 struct cost_pair *cp = get_use_iv_cost (data, use, cand); 5573 5574 bound = cp->value; 5575 if (bound) 5576 { 5577 tree var = var_at_stmt (data->current_loop, cand, use->stmt); 5578 tree var_type = TREE_TYPE (var); 5579 5580 compare = iv_elimination_compare (data, use); 5581 bound = fold_convert (var_type, bound); 5582 op = force_gimple_operand (unshare_expr (bound), &stmts, 5583 true, NULL_TREE); 5584 5585 if (stmts) 5586 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT); 5587 5588 *use->op_p = build2 (compare, boolean_type_node, var, op); 5589 update_stmt (use->stmt); 5590 return; 5591 } 5592 5593 /* The induction variable elimination failed; just express the original 5594 giv. */ 5595 comp = get_computation (data->current_loop, use, cand); 5596 5597 cond = *use->op_p; 5598 op_p = &TREE_OPERAND (cond, 0); 5599 if (TREE_CODE (*op_p) != SSA_NAME 5600 || zero_p (get_iv (data, *op_p)->step)) 5601 op_p = &TREE_OPERAND (cond, 1); 5602 5603 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p)); 5604 if (stmts) 5605 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT); 5606 5607 *op_p = op; 5608} 5609 5610/* Ensure that operand *OP_P may be used at the end of EXIT without 5611 violating loop closed ssa form. */ 5612 5613static void 5614protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p) 5615{ 5616 basic_block def_bb; 5617 struct loop *def_loop; 5618 tree phi, use; 5619 5620 use = USE_FROM_PTR (op_p); 5621 if (TREE_CODE (use) != SSA_NAME) 5622 return; 5623 5624 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use)); 5625 if (!def_bb) 5626 return; 5627 5628 def_loop = def_bb->loop_father; 5629 if (flow_bb_inside_loop_p (def_loop, exit->dest)) 5630 return; 5631 5632 /* Try finding a phi node that copies the value out of the loop. */ 5633 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi)) 5634 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use) 5635 break; 5636 5637 if (!phi) 5638 { 5639 /* Create such a phi node. */ 5640 tree new_name = duplicate_ssa_name (use, NULL); 5641 5642 phi = create_phi_node (new_name, exit->dest); 5643 SSA_NAME_DEF_STMT (new_name) = phi; 5644 add_phi_arg (phi, use, exit); 5645 } 5646 5647 SET_USE (op_p, PHI_RESULT (phi)); 5648} 5649 5650/* Ensure that operands of STMT may be used at the end of EXIT without 5651 violating loop closed ssa form. */ 5652 5653static void 5654protect_loop_closed_ssa_form (edge exit, tree stmt) 5655{ 5656 ssa_op_iter iter; 5657 use_operand_p use_p; 5658 5659 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES) 5660 protect_loop_closed_ssa_form_use (exit, use_p); 5661} 5662 5663/* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things 5664 so that they are emitted on the correct place, and so that the loop closed 5665 ssa form is preserved. */ 5666 5667void 5668compute_phi_arg_on_exit (edge exit, tree stmts, tree op) 5669{ 5670 tree_stmt_iterator tsi; 5671 block_stmt_iterator bsi; 5672 tree phi, stmt, def, next; 5673 5674 if (!single_pred_p (exit->dest)) 5675 split_loop_exit_edge (exit); 5676 5677 /* Ensure there is label in exit->dest, so that we can 5678 insert after it. */ 5679 tree_block_label (exit->dest); 5680 bsi = bsi_after_labels (exit->dest); 5681 5682 if (TREE_CODE (stmts) == STATEMENT_LIST) 5683 { 5684 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi)) 5685 { 5686 tree stmt = tsi_stmt (tsi); 5687 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT); 5688 protect_loop_closed_ssa_form (exit, stmt); 5689 } 5690 } 5691 else 5692 { 5693 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT); 5694 protect_loop_closed_ssa_form (exit, stmts); 5695 } 5696 5697 if (!op) 5698 return; 5699 5700 for (phi = phi_nodes (exit->dest); phi; phi = next) 5701 { 5702 next = PHI_CHAIN (phi); 5703 5704 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op) 5705 { 5706 def = PHI_RESULT (phi); 5707 remove_statement (phi, false); 5708 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op), 5709 def, op); 5710 SSA_NAME_DEF_STMT (def) = stmt; 5711 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT); 5712 } 5713 } 5714} 5715 5716/* Rewrites the final value of USE (that is only needed outside of the loop) 5717 using candidate CAND. */ 5718 5719static void 5720rewrite_use_outer (struct ivopts_data *data, 5721 struct iv_use *use, struct iv_cand *cand) 5722{ 5723 edge exit; 5724 tree value, op, stmts, tgt; 5725 tree phi; 5726 5727 switch (TREE_CODE (use->stmt)) 5728 { 5729 case PHI_NODE: 5730 tgt = PHI_RESULT (use->stmt); 5731 break; 5732 case MODIFY_EXPR: 5733 tgt = TREE_OPERAND (use->stmt, 0); 5734 break; 5735 default: 5736 gcc_unreachable (); 5737 } 5738 5739 exit = single_dom_exit (data->current_loop); 5740 5741 if (exit && !(exit->flags & EDGE_COMPLEX)) 5742 { 5743 if (!cand->iv) 5744 { 5745 struct cost_pair *cp = get_use_iv_cost (data, use, cand); 5746 value = unshare_expr (cp->value); 5747 } 5748 else 5749 value = get_computation_at (data->current_loop, 5750 use, cand, last_stmt (exit->src)); 5751 5752 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt)); 5753 5754 /* If we will preserve the iv anyway and we would need to perform 5755 some computation to replace the final value, do nothing. */ 5756 if (stmts && name_info (data, tgt)->preserve_biv) 5757 return; 5758 5759 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi)) 5760 { 5761 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit); 5762 5763 if (USE_FROM_PTR (use_p) == tgt) 5764 SET_USE (use_p, op); 5765 } 5766 5767 if (stmts) 5768 compute_phi_arg_on_exit (exit, stmts, op); 5769 5770 /* Enable removal of the statement. We cannot remove it directly, 5771 since we may still need the aliasing information attached to the 5772 ssa name defined by it. */ 5773 name_info (data, tgt)->iv->have_use_for = false; 5774 return; 5775 } 5776 5777 /* If the variable is going to be preserved anyway, there is nothing to 5778 do. */ 5779 if (name_info (data, tgt)->preserve_biv) 5780 return; 5781 5782 /* Otherwise we just need to compute the iv. */ 5783 rewrite_use_nonlinear_expr (data, use, cand); 5784} 5785 5786/* Rewrites USE using candidate CAND. */ 5787 5788static void 5789rewrite_use (struct ivopts_data *data, 5790 struct iv_use *use, struct iv_cand *cand) 5791{ 5792 switch (use->type) 5793 { 5794 case USE_NONLINEAR_EXPR: 5795 rewrite_use_nonlinear_expr (data, use, cand); 5796 break; 5797 5798 case USE_OUTER: 5799 rewrite_use_outer (data, use, cand); 5800 break; 5801 5802 case USE_ADDRESS: 5803 rewrite_use_address (data, use, cand); 5804 break; 5805 5806 case USE_COMPARE: 5807 rewrite_use_compare (data, use, cand); 5808 break; 5809 5810 default: 5811 gcc_unreachable (); 5812 } 5813 update_stmt (use->stmt); 5814} 5815 5816/* Rewrite the uses using the selected induction variables. */ 5817 5818static void 5819rewrite_uses (struct ivopts_data *data) 5820{ 5821 unsigned i; 5822 struct iv_cand *cand; 5823 struct iv_use *use; 5824 5825 for (i = 0; i < n_iv_uses (data); i++) 5826 { 5827 use = iv_use (data, i); 5828 cand = use->selected; 5829 gcc_assert (cand); 5830 5831 rewrite_use (data, use, cand); 5832 } 5833} 5834 5835/* Removes the ivs that are not used after rewriting. */ 5836 5837static void 5838remove_unused_ivs (struct ivopts_data *data) 5839{ 5840 unsigned j; 5841 bitmap_iterator bi; 5842 5843 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi) 5844 { 5845 struct version_info *info; 5846 5847 info = ver_info (data, j); 5848 if (info->iv 5849 && !zero_p (info->iv->step) 5850 && !info->inv_id 5851 && !info->iv->have_use_for 5852 && !info->preserve_biv) 5853 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true); 5854 } 5855} 5856 5857/* Frees data allocated by the optimization of a single loop. */ 5858 5859static void 5860free_loop_data (struct ivopts_data *data) 5861{ 5862 unsigned i, j; 5863 bitmap_iterator bi; 5864 tree obj; 5865 5866 htab_empty (data->niters); 5867 5868 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi) 5869 { 5870 struct version_info *info; 5871 5872 info = ver_info (data, i); 5873 if (info->iv) 5874 free (info->iv); 5875 info->iv = NULL; 5876 info->has_nonlin_use = false; 5877 info->preserve_biv = false; 5878 info->inv_id = 0; 5879 } 5880 bitmap_clear (data->relevant); 5881 bitmap_clear (data->important_candidates); 5882 5883 for (i = 0; i < n_iv_uses (data); i++) 5884 { 5885 struct iv_use *use = iv_use (data, i); 5886 5887 free (use->iv); 5888 BITMAP_FREE (use->related_cands); 5889 for (j = 0; j < use->n_map_members; j++) 5890 if (use->cost_map[j].depends_on) 5891 BITMAP_FREE (use->cost_map[j].depends_on); 5892 free (use->cost_map); 5893 free (use); 5894 } 5895 VEC_truncate (iv_use_p, data->iv_uses, 0); 5896 5897 for (i = 0; i < n_iv_cands (data); i++) 5898 { 5899 struct iv_cand *cand = iv_cand (data, i); 5900 5901 if (cand->iv) 5902 free (cand->iv); 5903 if (cand->depends_on) 5904 BITMAP_FREE (cand->depends_on); 5905 free (cand); 5906 } 5907 VEC_truncate (iv_cand_p, data->iv_candidates, 0); 5908 5909 if (data->version_info_size < num_ssa_names) 5910 { 5911 data->version_info_size = 2 * num_ssa_names; 5912 free (data->version_info); 5913 data->version_info = xcalloc (data->version_info_size, 5914 sizeof (struct version_info)); 5915 } 5916 5917 data->max_inv_id = 0; 5918 5919 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++) 5920 SET_DECL_RTL (obj, NULL_RTX); 5921 5922 VEC_truncate (tree, decl_rtl_to_reset, 0); 5923} 5924 5925/* Finalizes data structures used by the iv optimization pass. LOOPS is the 5926 loop tree. */ 5927 5928static void 5929tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data) 5930{ 5931 unsigned i; 5932 5933 for (i = 1; i < loops->num; i++) 5934 if (loops->parray[i]) 5935 { 5936 free (loops->parray[i]->aux); 5937 loops->parray[i]->aux = NULL; 5938 } 5939 5940 free_loop_data (data); 5941 free (data->version_info); 5942 BITMAP_FREE (data->relevant); 5943 BITMAP_FREE (data->important_candidates); 5944 htab_delete (data->niters); 5945 5946 VEC_free (tree, heap, decl_rtl_to_reset); 5947 VEC_free (iv_use_p, heap, data->iv_uses); 5948 VEC_free (iv_cand_p, heap, data->iv_candidates); 5949} 5950 5951/* Optimizes the LOOP. Returns true if anything changed. */ 5952 5953static bool 5954tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop) 5955{ 5956 bool changed = false; 5957 struct iv_ca *iv_ca; 5958 edge exit; 5959 5960 data->current_loop = loop; 5961 5962 if (dump_file && (dump_flags & TDF_DETAILS)) 5963 { 5964 fprintf (dump_file, "Processing loop %d\n", loop->num); 5965 5966 exit = single_dom_exit (loop); 5967 if (exit) 5968 { 5969 fprintf (dump_file, " single exit %d -> %d, exit condition ", 5970 exit->src->index, exit->dest->index); 5971 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM); 5972 fprintf (dump_file, "\n"); 5973 } 5974 5975 fprintf (dump_file, "\n"); 5976 } 5977 5978 /* For each ssa name determines whether it behaves as an induction variable 5979 in some loop. */ 5980 if (!find_induction_variables (data)) 5981 goto finish; 5982 5983 /* Finds interesting uses (item 1). */ 5984 find_interesting_uses (data); 5985 if (n_iv_uses (data) > MAX_CONSIDERED_USES) 5986 goto finish; 5987 5988 /* Finds candidates for the induction variables (item 2). */ 5989 find_iv_candidates (data); 5990 5991 /* Calculates the costs (item 3, part 1). */ 5992 determine_use_iv_costs (data); 5993 determine_iv_costs (data); 5994 determine_set_costs (data); 5995 5996 /* Find the optimal set of induction variables (item 3, part 2). */ 5997 iv_ca = find_optimal_iv_set (data); 5998 if (!iv_ca) 5999 goto finish; 6000 changed = true; 6001 6002 /* Create the new induction variables (item 4, part 1). */ 6003 create_new_ivs (data, iv_ca); 6004 iv_ca_free (&iv_ca); 6005 6006 /* Rewrite the uses (item 4, part 2). */ 6007 rewrite_uses (data); 6008 6009 /* Remove the ivs that are unused after rewriting. */ 6010 remove_unused_ivs (data); 6011 6012 /* We have changed the structure of induction variables; it might happen 6013 that definitions in the scev database refer to some of them that were 6014 eliminated. */ 6015 scev_reset (); 6016 6017finish: 6018 free_loop_data (data); 6019 6020 return changed; 6021} 6022 6023/* Main entry point. Optimizes induction variables in LOOPS. */ 6024 6025void 6026tree_ssa_iv_optimize (struct loops *loops) 6027{ 6028 struct loop *loop; 6029 struct ivopts_data data; 6030 6031 tree_ssa_iv_optimize_init (loops, &data); 6032 6033 /* Optimize the loops starting with the innermost ones. */ 6034 loop = loops->tree_root; 6035 while (loop->inner) 6036 loop = loop->inner; 6037 6038 /* Scan the loops, inner ones first. */ 6039 while (loop != loops->tree_root) 6040 { 6041 if (dump_file && (dump_flags & TDF_DETAILS)) 6042 flow_loop_dump (loop, dump_file, NULL, 1); 6043 6044 tree_ssa_iv_optimize_loop (&data, loop); 6045 6046 if (loop->next) 6047 { 6048 loop = loop->next; 6049 while (loop->inner) 6050 loop = loop->inner; 6051 } 6052 else 6053 loop = loop->outer; 6054 } 6055 6056 tree_ssa_iv_optimize_finalize (loops, &data); 6057} 6058