1/* Global common subexpression elimination/Partial redundancy elimination 2 and global constant/copy propagation for GNU compiler. 3 Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc. 4 5This file is part of GNU CC. 6 7GNU CC is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 2, or (at your option) 10any later version. 11 12GNU CC is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with GNU CC; see the file COPYING. If not, write to 19the Free Software Foundation, 59 Temple Place - Suite 330, 20Boston, MA 02111-1307, USA. */ 21 22/* TODO 23 - reordering of memory allocation and freeing to be more space efficient 24 - do rough calc of how many regs are needed in each block, and a rough 25 calc of how many regs are available in each class and use that to 26 throttle back the code in cases where RTX_COST is minimal. 27 - dead store elimination 28 - a store to the same address as a load does not kill the load if the 29 source of the store is also the destination of the load. Handling this 30 allows more load motion, particularly out of loops. 31 - ability to realloc sbitmap vectors would allow one initial computation 32 of reg_set_in_block with only subsequent additions, rather than 33 recomputing it for each pass 34 35*/ 36 37/* References searched while implementing this. 38 39 Compilers Principles, Techniques and Tools 40 Aho, Sethi, Ullman 41 Addison-Wesley, 1988 42 43 Global Optimization by Suppression of Partial Redundancies 44 E. Morel, C. Renvoise 45 communications of the acm, Vol. 22, Num. 2, Feb. 1979 46 47 A Portable Machine-Independent Global Optimizer - Design and Measurements 48 Frederick Chow 49 Stanford Ph.D. thesis, Dec. 1983 50 51 A Fast Algorithm for Code Movement Optimization 52 D.M. Dhamdhere 53 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988 54 55 A Solution to a Problem with Morel and Renvoise's 56 Global Optimization by Suppression of Partial Redundancies 57 K-H Drechsler, M.P. Stadel 58 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988 59 60 Practical Adaptation of the Global Optimization 61 Algorithm of Morel and Renvoise 62 D.M. Dhamdhere 63 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991 64 65 Efficiently Computing Static Single Assignment Form and the Control 66 Dependence Graph 67 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck 68 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991 69 70 Lazy Code Motion 71 J. Knoop, O. Ruthing, B. Steffen 72 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI 73 74 What's In a Region? Or Computing Control Dependence Regions in Near-Linear 75 Time for Reducible Flow Control 76 Thomas Ball 77 ACM Letters on Programming Languages and Systems, 78 Vol. 2, Num. 1-4, Mar-Dec 1993 79 80 An Efficient Representation for Sparse Sets 81 Preston Briggs, Linda Torczon 82 ACM Letters on Programming Languages and Systems, 83 Vol. 2, Num. 1-4, Mar-Dec 1993 84 85 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion 86 K-H Drechsler, M.P. Stadel 87 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993 88 89 Partial Dead Code Elimination 90 J. Knoop, O. Ruthing, B. Steffen 91 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994 92 93 Effective Partial Redundancy Elimination 94 P. Briggs, K.D. Cooper 95 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994 96 97 The Program Structure Tree: Computing Control Regions in Linear Time 98 R. Johnson, D. Pearson, K. Pingali 99 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994 100 101 Optimal Code Motion: Theory and Practice 102 J. Knoop, O. Ruthing, B. Steffen 103 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994 104 105 The power of assignment motion 106 J. Knoop, O. Ruthing, B. Steffen 107 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI 108 109 Global code motion / global value numbering 110 C. Click 111 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI 112 113 Value Driven Redundancy Elimination 114 L.T. Simpson 115 Rice University Ph.D. thesis, Apr. 1996 116 117 Value Numbering 118 L.T. Simpson 119 Massively Scalar Compiler Project, Rice University, Sep. 1996 120 121 High Performance Compilers for Parallel Computing 122 Michael Wolfe 123 Addison-Wesley, 1996 124 125 Advanced Compiler Design and Implementation 126 Steven Muchnick 127 Morgan Kaufmann, 1997 128 129 People wishing to speed up the code here should read: 130 Elimination Algorithms for Data Flow Analysis 131 B.G. Ryder, M.C. Paull 132 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986 133 134 How to Analyze Large Programs Efficiently and Informatively 135 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck 136 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI 137 138 People wishing to do something different can find various possibilities 139 in the above papers and elsewhere. 140*/ 141 142#include "config.h" 143#include "system.h" 144#include "toplev.h" 145 146#include "rtl.h" 147#include "regs.h" 148#include "hard-reg-set.h" 149#include "flags.h" 150#include "real.h" 151#include "insn-config.h" 152#include "recog.h" 153#include "basic-block.h" 154#include "output.h" 155#include "expr.h" 156 157#include "obstack.h" 158#define obstack_chunk_alloc gmalloc 159#define obstack_chunk_free free 160 161/* Maximum number of passes to perform. */ 162#define MAX_PASSES 1 163 164/* Propagate flow information through back edges and thus enable PRE's 165 moving loop invariant calculations out of loops. 166 167 Originally this tended to create worse overall code, but several 168 improvements during the development of PRE seem to have made following 169 back edges generally a win. 170 171 Note much of the loop invariant code motion done here would normally 172 be done by loop.c, which has more heuristics for when to move invariants 173 out of loops. At some point we might need to move some of those 174 heuristics into gcse.c. */ 175#define FOLLOW_BACK_EDGES 1 176 177/* We support GCSE via Partial Redundancy Elimination. PRE optimizations 178 are a superset of those done by GCSE. 179 180 We perform the following steps: 181 182 1) Compute basic block information. 183 184 2) Compute table of places where registers are set. 185 186 3) Perform copy/constant propagation. 187 188 4) Perform global cse. 189 190 5) Perform another pass of copy/constant propagation. 191 192 Two passes of copy/constant propagation are done because the first one 193 enables more GCSE and the second one helps to clean up the copies that 194 GCSE creates. This is needed more for PRE than for Classic because Classic 195 GCSE will try to use an existing register containing the common 196 subexpression rather than create a new one. This is harder to do for PRE 197 because of the code motion (which Classic GCSE doesn't do). 198 199 Expressions we are interested in GCSE-ing are of the form 200 (set (pseudo-reg) (expression)). 201 Function want_to_gcse_p says what these are. 202 203 PRE handles moving invariant expressions out of loops (by treating them as 204 partially redundant). 205 206 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single 207 assignment) based GVN (global value numbering). L. T. Simpson's paper 208 (Rice University) on value numbering is a useful reference for this. 209 210 ********************** 211 212 We used to support multiple passes but there are diminishing returns in 213 doing so. The first pass usually makes 90% of the changes that are doable. 214 A second pass can make a few more changes made possible by the first pass. 215 Experiments show any further passes don't make enough changes to justify 216 the expense. 217 218 A study of spec92 using an unlimited number of passes: 219 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83, 220 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2, 221 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1 222 223 It was found doing copy propagation between each pass enables further 224 substitutions. 225 226 PRE is quite expensive in complicated functions because the DFA can take 227 awhile to converge. Hence we only perform one pass. Macro MAX_PASSES can 228 be modified if one wants to experiment. 229 230 ********************** 231 232 The steps for PRE are: 233 234 1) Build the hash table of expressions we wish to GCSE (expr_hash_table). 235 236 2) Perform the data flow analysis for PRE. 237 238 3) Delete the redundant instructions 239 240 4) Insert the required copies [if any] that make the partially 241 redundant instructions fully redundant. 242 243 5) For other reaching expressions, insert an instruction to copy the value 244 to a newly created pseudo that will reach the redundant instruction. 245 246 The deletion is done first so that when we do insertions we 247 know which pseudo reg to use. 248 249 Various papers have argued that PRE DFA is expensive (O(n^2)) and others 250 argue it is not. The number of iterations for the algorithm to converge 251 is typically 2-4 so I don't view it as that expensive (relatively speaking). 252 253 PRE GCSE depends heavily on the second CSE pass to clean up the copies 254 we create. To make an expression reach the place where it's redundant, 255 the result of the expression is copied to a new register, and the redundant 256 expression is deleted by replacing it with this new register. Classic GCSE 257 doesn't have this problem as much as it computes the reaching defs of 258 each register in each block and thus can try to use an existing register. 259 260 ********************** 261 262 A fair bit of simplicity is created by creating small functions for simple 263 tasks, even when the function is only called in one place. This may 264 measurably slow things down [or may not] by creating more function call 265 overhead than is necessary. The source is laid out so that it's trivial 266 to make the affected functions inline so that one can measure what speed 267 up, if any, can be achieved, and maybe later when things settle things can 268 be rearranged. 269 270 Help stamp out big monolithic functions! */ 271 272/* GCSE global vars. */ 273 274/* -dG dump file. */ 275static FILE *gcse_file; 276 277/* Note whether or not we should run jump optimization after gcse. We 278 want to do this for two cases. 279 280 * If we changed any jumps via cprop. 281 282 * If we added any labels via edge splitting. */ 283 284static int run_jump_opt_after_gcse; 285 286/* Element I is a list of I's predecessors/successors. */ 287static int_list_ptr *s_preds; 288static int_list_ptr *s_succs; 289 290/* Element I is the number of predecessors/successors of basic block I. */ 291static int *num_preds; 292static int *num_succs; 293 294/* Bitmaps are normally not included in debugging dumps. 295 However it's useful to be able to print them from GDB. 296 We could create special functions for this, but it's simpler to 297 just allow passing stderr to the dump_foo fns. Since stderr can 298 be a macro, we store a copy here. */ 299static FILE *debug_stderr; 300 301/* An obstack for our working variables. */ 302static struct obstack gcse_obstack; 303 304/* Non-zero for each mode that supports (set (reg) (reg)). 305 This is trivially true for integer and floating point values. 306 It may or may not be true for condition codes. */ 307static char can_copy_p[(int) NUM_MACHINE_MODES]; 308 309/* Non-zero if can_copy_p has been initialized. */ 310static int can_copy_init_p; 311 312/* Hash table of expressions. */ 313 314struct expr 315{ 316 /* The expression (SET_SRC for expressions, PATTERN for assignments). */ 317 rtx expr; 318 /* Index in the available expression bitmaps. */ 319 int bitmap_index; 320 /* Next entry with the same hash. */ 321 struct expr *next_same_hash; 322 /* List of anticipatable occurrences in basic blocks in the function. 323 An "anticipatable occurrence" is one that is the first occurrence in the 324 basic block, the operands are not modified in the basic block prior 325 to the occurrence and the output is not used between the start of 326 the block and the occurrence. */ 327 struct occr *antic_occr; 328 /* List of available occurrence in basic blocks in the function. 329 An "available occurrence" is one that is the last occurrence in the 330 basic block and the operands are not modified by following statements in 331 the basic block [including this insn]. */ 332 struct occr *avail_occr; 333 /* Non-null if the computation is PRE redundant. 334 The value is the newly created pseudo-reg to record a copy of the 335 expression in all the places that reach the redundant copy. */ 336 rtx reaching_reg; 337}; 338 339/* Occurrence of an expression. 340 There is one per basic block. If a pattern appears more than once the 341 last appearance is used [or first for anticipatable expressions]. */ 342 343struct occr 344{ 345 /* Next occurrence of this expression. */ 346 struct occr *next; 347 /* The insn that computes the expression. */ 348 rtx insn; 349 /* Non-zero if this [anticipatable] occurrence has been deleted. */ 350 char deleted_p; 351 /* Non-zero if this [available] occurrence has been copied to 352 reaching_reg. */ 353 /* ??? This is mutually exclusive with deleted_p, so they could share 354 the same byte. */ 355 char copied_p; 356}; 357 358/* Expression and copy propagation hash tables. 359 Each hash table is an array of buckets. 360 ??? It is known that if it were an array of entries, structure elements 361 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is 362 not clear whether in the final analysis a sufficient amount of memory would 363 be saved as the size of the available expression bitmaps would be larger 364 [one could build a mapping table without holes afterwards though]. 365 Someday I'll perform the computation and figure it out. 366*/ 367 368/* Total size of the expression hash table, in elements. */ 369static int expr_hash_table_size; 370/* The table itself. 371 This is an array of `expr_hash_table_size' elements. */ 372static struct expr **expr_hash_table; 373 374/* Total size of the copy propagation hash table, in elements. */ 375static int set_hash_table_size; 376/* The table itself. 377 This is an array of `set_hash_table_size' elements. */ 378static struct expr **set_hash_table; 379 380/* Mapping of uids to cuids. 381 Only real insns get cuids. */ 382static int *uid_cuid; 383 384/* Highest UID in UID_CUID. */ 385static int max_uid; 386 387/* Get the cuid of an insn. */ 388#define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)]) 389 390/* Number of cuids. */ 391static int max_cuid; 392 393/* Mapping of cuids to insns. */ 394static rtx *cuid_insn; 395 396/* Get insn from cuid. */ 397#define CUID_INSN(CUID) (cuid_insn[CUID]) 398 399/* Maximum register number in function prior to doing gcse + 1. 400 Registers created during this pass have regno >= max_gcse_regno. 401 This is named with "gcse" to not collide with global of same name. */ 402static int max_gcse_regno; 403 404/* Maximum number of cse-able expressions found. */ 405static int n_exprs; 406/* Maximum number of assignments for copy propagation found. */ 407static int n_sets; 408 409/* Table of registers that are modified. 410 For each register, each element is a list of places where the pseudo-reg 411 is set. 412 413 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only 414 requires knowledge of which blocks kill which regs [and thus could use 415 a bitmap instead of the lists `reg_set_table' uses]. 416 417 `reg_set_table' and could be turned into an array of bitmaps 418 (num-bbs x num-regs) 419 [however perhaps it may be useful to keep the data as is]. 420 One advantage of recording things this way is that `reg_set_table' is 421 fairly sparse with respect to pseudo regs but for hard regs could be 422 fairly dense [relatively speaking]. 423 And recording sets of pseudo-regs in lists speeds 424 up functions like compute_transp since in the case of pseudo-regs we only 425 need to iterate over the number of times a pseudo-reg is set, not over the 426 number of basic blocks [clearly there is a bit of a slow down in the cases 427 where a pseudo is set more than once in a block, however it is believed 428 that the net effect is to speed things up]. This isn't done for hard-regs 429 because recording call-clobbered hard-regs in `reg_set_table' at each 430 function call can consume a fair bit of memory, and iterating over hard-regs 431 stored this way in compute_transp will be more expensive. */ 432 433typedef struct reg_set { 434 /* The next setting of this register. */ 435 struct reg_set *next; 436 /* The insn where it was set. */ 437 rtx insn; 438} reg_set; 439static reg_set **reg_set_table; 440/* Size of `reg_set_table'. 441 The table starts out at max_gcse_regno + slop, and is enlarged as 442 necessary. */ 443static int reg_set_table_size; 444/* Amount to grow `reg_set_table' by when it's full. */ 445#define REG_SET_TABLE_SLOP 100 446 447/* Bitmap containing one bit for each register in the program. 448 Used when performing GCSE to track which registers have been set since 449 the start of the basic block. */ 450static sbitmap reg_set_bitmap; 451 452/* For each block, a bitmap of registers set in the block. 453 This is used by expr_killed_p and compute_transp. 454 It is computed during hash table computation and not by compute_sets 455 as it includes registers added since the last pass (or between cprop and 456 gcse) and it's currently not easy to realloc sbitmap vectors. */ 457static sbitmap *reg_set_in_block; 458 459/* For each block, non-zero if memory is set in that block. 460 This is computed during hash table computation and is used by 461 expr_killed_p and compute_transp. 462 ??? Handling of memory is very simple, we don't make any attempt 463 to optimize things (later). 464 ??? This can be computed by compute_sets since the information 465 doesn't change. */ 466static char *mem_set_in_block; 467 468/* Various variables for statistics gathering. */ 469 470/* Memory used in a pass. 471 This isn't intended to be absolutely precise. Its intent is only 472 to keep an eye on memory usage. */ 473static int bytes_used; 474/* GCSE substitutions made. */ 475static int gcse_subst_count; 476/* Number of copy instructions created. */ 477static int gcse_create_count; 478/* Number of constants propagated. */ 479static int const_prop_count; 480/* Number of copys propagated. */ 481static int copy_prop_count; 482 483extern char *current_function_name; 484extern int current_function_calls_setjmp; 485 486/* These variables are used by classic GCSE. 487 Normally they'd be defined a bit later, but `rd_gen' needs to 488 be declared sooner. */ 489 490/* A bitmap of all ones for implementing the algorithm for available 491 expressions and reaching definitions. */ 492/* ??? Available expression bitmaps have a different size than reaching 493 definition bitmaps. This should be the larger of the two, however, it 494 is not currently used for reaching definitions. */ 495static sbitmap u_bitmap; 496 497/* Each block has a bitmap of each type. 498 The length of each blocks bitmap is: 499 500 max_cuid - for reaching definitions 501 n_exprs - for available expressions 502 503 Thus we view the bitmaps as 2 dimensional arrays. i.e. 504 rd_kill[block_num][cuid_num] 505 ae_kill[block_num][expr_num] 506*/ 507 508/* For reaching defs */ 509static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out; 510 511/* for available exprs */ 512static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out; 513 514 515static void compute_can_copy PROTO ((void)); 516 517static char *gmalloc PROTO ((unsigned int)); 518static char *grealloc PROTO ((char *, unsigned int)); 519static char *gcse_alloc PROTO ((unsigned long)); 520static void alloc_gcse_mem PROTO ((rtx)); 521static void free_gcse_mem PROTO ((void)); 522static void alloc_reg_set_mem PROTO ((int)); 523static void free_reg_set_mem PROTO ((void)); 524static void record_one_set PROTO ((int, rtx)); 525static void record_set_info PROTO ((rtx, rtx)); 526static void compute_sets PROTO ((rtx)); 527 528static void hash_scan_insn PROTO ((rtx, int, int)); 529static void hash_scan_set PROTO ((rtx, rtx, int)); 530static void hash_scan_clobber PROTO ((rtx, rtx)); 531static void hash_scan_call PROTO ((rtx, rtx)); 532static int want_to_gcse_p PROTO ((rtx)); 533static int oprs_unchanged_p PROTO ((rtx, rtx, int)); 534static int oprs_anticipatable_p PROTO ((rtx, rtx)); 535static int oprs_available_p PROTO ((rtx, rtx)); 536static void insert_expr_in_table PROTO ((rtx, enum machine_mode, 537 rtx, int, int)); 538static void insert_set_in_table PROTO ((rtx, rtx)); 539static unsigned int hash_expr PROTO ((rtx, enum machine_mode, 540 int *, int)); 541static unsigned int hash_expr_1 PROTO ((rtx, enum machine_mode, int *)); 542static unsigned int hash_set PROTO ((int, int)); 543static int expr_equiv_p PROTO ((rtx, rtx)); 544static void record_last_reg_set_info PROTO ((rtx, int)); 545static void record_last_mem_set_info PROTO ((rtx)); 546static void record_last_set_info PROTO ((rtx, rtx)); 547static void compute_hash_table PROTO ((int)); 548static void alloc_set_hash_table PROTO ((int)); 549static void free_set_hash_table PROTO ((void)); 550static void compute_set_hash_table PROTO ((void)); 551static void alloc_expr_hash_table PROTO ((int)); 552static void free_expr_hash_table PROTO ((void)); 553static void compute_expr_hash_table PROTO ((void)); 554static void dump_hash_table PROTO ((FILE *, const char *, struct expr **, 555 int, int)); 556static struct expr *lookup_expr PROTO ((rtx)); 557static struct expr *lookup_set PROTO ((int, rtx)); 558static struct expr *next_set PROTO ((int, struct expr *)); 559static void reset_opr_set_tables PROTO ((void)); 560static int oprs_not_set_p PROTO ((rtx, rtx)); 561static void mark_call PROTO ((rtx)); 562static void mark_set PROTO ((rtx, rtx)); 563static void mark_clobber PROTO ((rtx, rtx)); 564static void mark_oprs_set PROTO ((rtx)); 565 566static void alloc_cprop_mem PROTO ((int, int)); 567static void free_cprop_mem PROTO ((void)); 568static void compute_transp PROTO ((rtx, int, sbitmap *, int)); 569static void compute_transpout PROTO ((void)); 570static void compute_local_properties PROTO ((sbitmap *, sbitmap *, 571 sbitmap *, int)); 572static void compute_cprop_avinout PROTO ((void)); 573static void compute_cprop_data PROTO ((void)); 574static void find_used_regs PROTO ((rtx)); 575static int try_replace_reg PROTO ((rtx, rtx, rtx)); 576static struct expr *find_avail_set PROTO ((int, rtx)); 577static int cprop_insn PROTO ((rtx, int)); 578static int cprop PROTO ((int)); 579static int one_cprop_pass PROTO ((int, int)); 580 581static void alloc_pre_mem PROTO ((int, int)); 582static void free_pre_mem PROTO ((void)); 583static void compute_pre_data PROTO ((void)); 584static int pre_expr_reaches_here_p PROTO ((int, struct expr *, 585 int, int, char *)); 586static void insert_insn_end_bb PROTO ((struct expr *, int, int)); 587static void pre_insert PROTO ((struct expr **)); 588static void pre_insert_copy_insn PROTO ((struct expr *, rtx)); 589static void pre_insert_copies PROTO ((void)); 590static int pre_delete PROTO ((void)); 591static int pre_gcse PROTO ((void)); 592static int one_pre_gcse_pass PROTO ((int)); 593 594static void add_label_notes PROTO ((rtx, rtx)); 595 596static void alloc_rd_mem PROTO ((int, int)); 597static void free_rd_mem PROTO ((void)); 598static void handle_rd_kill_set PROTO ((rtx, int, int)); 599static void compute_kill_rd PROTO ((void)); 600static void compute_rd PROTO ((void)); 601static void alloc_avail_expr_mem PROTO ((int, int)); 602static void free_avail_expr_mem PROTO ((void)); 603static void compute_ae_gen PROTO ((void)); 604static int expr_killed_p PROTO ((rtx, int)); 605static void compute_ae_kill PROTO ((void)); 606static void compute_available PROTO ((void)); 607static int expr_reaches_here_p PROTO ((struct occr *, struct expr *, 608 int, int, char *)); 609static rtx computing_insn PROTO ((struct expr *, rtx)); 610static int def_reaches_here_p PROTO ((rtx, rtx)); 611static int can_disregard_other_sets PROTO ((struct reg_set **, rtx, int)); 612static int handle_avail_expr PROTO ((rtx, struct expr *)); 613static int classic_gcse PROTO ((void)); 614static int one_classic_gcse_pass PROTO ((int)); 615 616 617/* Entry point for global common subexpression elimination. 618 F is the first instruction in the function. */ 619 620int 621gcse_main (f, file) 622 rtx f; 623 FILE *file; 624{ 625 int changed, pass; 626 /* Bytes used at start of pass. */ 627 int initial_bytes_used; 628 /* Maximum number of bytes used by a pass. */ 629 int max_pass_bytes; 630 /* Point to release obstack data from for each pass. */ 631 char *gcse_obstack_bottom; 632 633 /* We do not construct an accurate cfg in functions which call 634 setjmp, so just punt to be safe. */ 635 if (current_function_calls_setjmp) 636 return 0; 637 638 /* Assume that we do not need to run jump optimizations after gcse. */ 639 run_jump_opt_after_gcse = 0; 640 641 /* For calling dump_foo fns from gdb. */ 642 debug_stderr = stderr; 643 gcse_file = file; 644 645 /* Identify the basic block information for this function, including 646 successors and predecessors. */ 647 max_gcse_regno = max_reg_num (); 648 find_basic_blocks (f, max_gcse_regno, file, 1); 649 650 /* Return if there's nothing to do. */ 651 if (n_basic_blocks <= 1) 652 { 653 /* Free storage allocated by find_basic_blocks. */ 654 free_basic_block_vars (0); 655 return 0; 656 } 657 658 /* See what modes support reg/reg copy operations. */ 659 if (! can_copy_init_p) 660 { 661 compute_can_copy (); 662 can_copy_init_p = 1; 663 } 664 665 gcc_obstack_init (&gcse_obstack); 666 667 /* Allocate and compute predecessors/successors. */ 668 669 s_preds = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr)); 670 s_succs = (int_list_ptr *) alloca (n_basic_blocks * sizeof (int_list_ptr)); 671 num_preds = (int *) alloca (n_basic_blocks * sizeof (int)); 672 num_succs = (int *) alloca (n_basic_blocks * sizeof (int)); 673 bytes_used = 4 * n_basic_blocks * sizeof (int_list_ptr); 674 compute_preds_succs (s_preds, s_succs, num_preds, num_succs); 675 676 if (file) 677 dump_bb_data (file, s_preds, s_succs, 0); 678 679 /* Record where pseudo-registers are set. 680 This data is kept accurate during each pass. 681 ??? We could also record hard-reg information here 682 [since it's unchanging], however it is currently done during 683 hash table computation. 684 685 It may be tempting to compute MEM set information here too, but MEM 686 sets will be subject to code motion one day and thus we need to compute 687 information about memory sets when we build the hash tables. */ 688 689 alloc_reg_set_mem (max_gcse_regno); 690 compute_sets (f); 691 692 pass = 0; 693 initial_bytes_used = bytes_used; 694 max_pass_bytes = 0; 695 gcse_obstack_bottom = gcse_alloc (1); 696 changed = 1; 697 while (changed && pass < MAX_PASSES) 698 { 699 changed = 0; 700 if (file) 701 fprintf (file, "GCSE pass %d\n\n", pass + 1); 702 703 /* Initialize bytes_used to the space for the pred/succ lists, 704 and the reg_set_table data. */ 705 bytes_used = initial_bytes_used; 706 707 /* Each pass may create new registers, so recalculate each time. */ 708 max_gcse_regno = max_reg_num (); 709 710 alloc_gcse_mem (f); 711 712 /* Don't allow constant propagation to modify jumps 713 during this pass. */ 714 changed = one_cprop_pass (pass + 1, 0); 715 716 if (optimize_size) 717 changed |= one_classic_gcse_pass (pass + 1); 718 else 719 changed |= one_pre_gcse_pass (pass + 1); 720 721 if (max_pass_bytes < bytes_used) 722 max_pass_bytes = bytes_used; 723 724 free_gcse_mem (); 725 726 if (file) 727 { 728 fprintf (file, "\n"); 729 fflush (file); 730 } 731 obstack_free (&gcse_obstack, gcse_obstack_bottom); 732 pass++; 733 } 734 735 /* Do one last pass of copy propagation, including cprop into 736 conditional jumps. */ 737 738 max_gcse_regno = max_reg_num (); 739 alloc_gcse_mem (f); 740 /* This time, go ahead and allow cprop to alter jumps. */ 741 one_cprop_pass (pass + 1, 1); 742 free_gcse_mem (); 743 744 if (file) 745 { 746 fprintf (file, "GCSE of %s: %d basic blocks, ", 747 current_function_name, n_basic_blocks); 748 fprintf (file, "%d pass%s, %d bytes\n\n", 749 pass, pass > 1 ? "es" : "", max_pass_bytes); 750 } 751 752 /* Free our obstack. */ 753 obstack_free (&gcse_obstack, NULL_PTR); 754 /* Free reg_set_table. */ 755 free_reg_set_mem (); 756 /* Free storage used to record predecessor/successor data. */ 757 free_bb_mem (); 758 /* Free storage allocated by find_basic_blocks. */ 759 free_basic_block_vars (0); 760 return run_jump_opt_after_gcse; 761} 762 763/* Misc. utilities. */ 764 765/* Compute which modes support reg/reg copy operations. */ 766 767static void 768compute_can_copy () 769{ 770 int i; 771#ifndef AVOID_CCMODE_COPIES 772 rtx reg,insn; 773#endif 774 char *free_point = (char *) oballoc (1); 775 776 bzero (can_copy_p, NUM_MACHINE_MODES); 777 778 start_sequence (); 779 for (i = 0; i < NUM_MACHINE_MODES; i++) 780 { 781 switch (GET_MODE_CLASS (i)) 782 { 783 case MODE_CC : 784#ifdef AVOID_CCMODE_COPIES 785 can_copy_p[i] = 0; 786#else 787 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1); 788 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg)); 789 if (recog (PATTERN (insn), insn, NULL_PTR) >= 0) 790 can_copy_p[i] = 1; 791#endif 792 break; 793 default : 794 can_copy_p[i] = 1; 795 break; 796 } 797 } 798 end_sequence (); 799 800 /* Free the objects we just allocated. */ 801 obfree (free_point); 802} 803 804/* Cover function to xmalloc to record bytes allocated. */ 805 806static char * 807gmalloc (size) 808 unsigned int size; 809{ 810 bytes_used += size; 811 return xmalloc (size); 812} 813 814/* Cover function to xrealloc. 815 We don't record the additional size since we don't know it. 816 It won't affect memory usage stats much anyway. */ 817 818static char * 819grealloc (ptr, size) 820 char *ptr; 821 unsigned int size; 822{ 823 return xrealloc (ptr, size); 824} 825 826/* Cover function to obstack_alloc. 827 We don't need to record the bytes allocated here since 828 obstack_chunk_alloc is set to gmalloc. */ 829 830static char * 831gcse_alloc (size) 832 unsigned long size; 833{ 834 return (char *) obstack_alloc (&gcse_obstack, size); 835} 836 837/* Allocate memory for the cuid mapping array, 838 and reg/memory set tracking tables. 839 840 This is called at the start of each pass. */ 841 842static void 843alloc_gcse_mem (f) 844 rtx f; 845{ 846 int i,n; 847 rtx insn; 848 849 /* Find the largest UID and create a mapping from UIDs to CUIDs. 850 CUIDs are like UIDs except they increase monotonically, have no gaps, 851 and only apply to real insns. */ 852 853 max_uid = get_max_uid (); 854 n = (max_uid + 1) * sizeof (int); 855 uid_cuid = (int *) gmalloc (n); 856 bzero ((char *) uid_cuid, n); 857 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn)) 858 { 859 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') 860 INSN_CUID (insn) = i++; 861 else 862 INSN_CUID (insn) = i; 863 } 864 865 /* Create a table mapping cuids to insns. */ 866 867 max_cuid = i; 868 n = (max_cuid + 1) * sizeof (rtx); 869 cuid_insn = (rtx *) gmalloc (n); 870 bzero ((char *) cuid_insn, n); 871 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn)) 872 { 873 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') 874 { 875 CUID_INSN (i) = insn; 876 i++; 877 } 878 } 879 880 /* Allocate vars to track sets of regs. */ 881 882 reg_set_bitmap = (sbitmap) sbitmap_alloc (max_gcse_regno); 883 884 /* Allocate vars to track sets of regs, memory per block. */ 885 886 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, 887 max_gcse_regno); 888 mem_set_in_block = (char *) gmalloc (n_basic_blocks); 889} 890 891/* Free memory allocated by alloc_gcse_mem. */ 892 893static void 894free_gcse_mem () 895{ 896 free (uid_cuid); 897 free (cuid_insn); 898 899 free (reg_set_bitmap); 900 901 free (reg_set_in_block); 902 free (mem_set_in_block); 903} 904 905 906/* Compute the local properties of each recorded expression. 907 Local properties are those that are defined by the block, irrespective 908 of other blocks. 909 910 An expression is transparent in a block if its operands are not modified 911 in the block. 912 913 An expression is computed (locally available) in a block if it is computed 914 at least once and expression would contain the same value if the 915 computation was moved to the end of the block. 916 917 An expression is locally anticipatable in a block if it is computed at 918 least once and expression would contain the same value if the computation 919 was moved to the beginning of the block. 920 921 We call this routine for cprop, pre and code hoisting. They all 922 compute basically the same information and thus can easily share 923 this code. 924 925 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording 926 local properties. If NULL, then it is not necessary to compute 927 or record that particular property. 928 929 SETP controls which hash table to look at. If zero, this routine 930 looks at the expr hash table; if nonzero this routine looks at 931 the set hash table. Additionally, TRANSP is computed as ~TRANSP, 932 since this is really cprop's ABSALTERED. */ 933 934static void 935compute_local_properties (transp, comp, antloc, setp) 936 sbitmap *transp; 937 sbitmap *comp; 938 sbitmap *antloc; 939 int setp; 940{ 941 int i, hash_table_size; 942 struct expr **hash_table; 943 944 /* Initialize any bitmaps that were passed in. */ 945 if (transp) 946 { 947 if (setp) 948 sbitmap_vector_zero (transp, n_basic_blocks); 949 else 950 sbitmap_vector_ones (transp, n_basic_blocks); 951 } 952 if (comp) 953 sbitmap_vector_zero (comp, n_basic_blocks); 954 if (antloc) 955 sbitmap_vector_zero (antloc, n_basic_blocks); 956 957 /* We use the same code for cprop, pre and hoisting. For cprop 958 we care about the set hash table, for pre and hoisting we 959 care about the expr hash table. */ 960 hash_table_size = setp ? set_hash_table_size : expr_hash_table_size; 961 hash_table = setp ? set_hash_table : expr_hash_table; 962 963 for (i = 0; i < hash_table_size; i++) 964 { 965 struct expr *expr; 966 967 for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash) 968 { 969 struct occr *occr; 970 int indx = expr->bitmap_index; 971 972 /* The expression is transparent in this block if it is not killed. 973 We start by assuming all are transparent [none are killed], and 974 then reset the bits for those that are. */ 975 976 if (transp) 977 compute_transp (expr->expr, indx, transp, setp); 978 979 /* The occurrences recorded in antic_occr are exactly those that 980 we want to set to non-zero in ANTLOC. */ 981 982 if (antloc) 983 { 984 for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 985 { 986 int bb = BLOCK_NUM (occr->insn); 987 SET_BIT (antloc[bb], indx); 988 989 /* While we're scanning the table, this is a good place to 990 initialize this. */ 991 occr->deleted_p = 0; 992 } 993 } 994 995 /* The occurrences recorded in avail_occr are exactly those that 996 we want to set to non-zero in COMP. */ 997 if (comp) 998 { 999 1000 for (occr = expr->avail_occr; occr != NULL; occr = occr->next) 1001 { 1002 int bb = BLOCK_NUM (occr->insn); 1003 SET_BIT (comp[bb], indx); 1004 1005 /* While we're scanning the table, this is a good place to 1006 initialize this. */ 1007 occr->copied_p = 0; 1008 } 1009 } 1010 1011 /* While we're scanning the table, this is a good place to 1012 initialize this. */ 1013 expr->reaching_reg = 0; 1014 } 1015 } 1016} 1017 1018 1019/* Register set information. 1020 1021 `reg_set_table' records where each register is set or otherwise 1022 modified. */ 1023 1024static struct obstack reg_set_obstack; 1025 1026static void 1027alloc_reg_set_mem (n_regs) 1028 int n_regs; 1029{ 1030 int n; 1031 1032 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP; 1033 n = reg_set_table_size * sizeof (struct reg_set *); 1034 reg_set_table = (struct reg_set **) gmalloc (n); 1035 bzero ((char *) reg_set_table, n); 1036 1037 gcc_obstack_init (®_set_obstack); 1038} 1039 1040static void 1041free_reg_set_mem () 1042{ 1043 free (reg_set_table); 1044 obstack_free (®_set_obstack, NULL_PTR); 1045} 1046 1047/* Record REGNO in the reg_set table. */ 1048 1049static void 1050record_one_set (regno, insn) 1051 int regno; 1052 rtx insn; 1053{ 1054 /* allocate a new reg_set element and link it onto the list */ 1055 struct reg_set *new_reg_info, *reg_info_ptr1, *reg_info_ptr2; 1056 1057 /* If the table isn't big enough, enlarge it. */ 1058 if (regno >= reg_set_table_size) 1059 { 1060 int new_size = regno + REG_SET_TABLE_SLOP; 1061 reg_set_table = (struct reg_set **) 1062 grealloc ((char *) reg_set_table, 1063 new_size * sizeof (struct reg_set *)); 1064 bzero ((char *) (reg_set_table + reg_set_table_size), 1065 (new_size - reg_set_table_size) * sizeof (struct reg_set *)); 1066 reg_set_table_size = new_size; 1067 } 1068 1069 new_reg_info = (struct reg_set *) obstack_alloc (®_set_obstack, 1070 sizeof (struct reg_set)); 1071 bytes_used += sizeof (struct reg_set); 1072 new_reg_info->insn = insn; 1073 new_reg_info->next = NULL; 1074 if (reg_set_table[regno] == NULL) 1075 reg_set_table[regno] = new_reg_info; 1076 else 1077 { 1078 reg_info_ptr1 = reg_info_ptr2 = reg_set_table[regno]; 1079 /* ??? One could keep a "last" pointer to speed this up. */ 1080 while (reg_info_ptr1 != NULL) 1081 { 1082 reg_info_ptr2 = reg_info_ptr1; 1083 reg_info_ptr1 = reg_info_ptr1->next; 1084 } 1085 reg_info_ptr2->next = new_reg_info; 1086 } 1087} 1088 1089/* For communication between next two functions (via note_stores). */ 1090static rtx record_set_insn; 1091 1092/* Called from compute_sets via note_stores to handle one 1093 SET or CLOBBER in an insn. */ 1094 1095static void 1096record_set_info (dest, setter) 1097 rtx dest, setter ATTRIBUTE_UNUSED; 1098{ 1099 if (GET_CODE (dest) == SUBREG) 1100 dest = SUBREG_REG (dest); 1101 1102 if (GET_CODE (dest) == REG) 1103 { 1104 if (REGNO (dest) >= FIRST_PSEUDO_REGISTER) 1105 record_one_set (REGNO (dest), record_set_insn); 1106 } 1107} 1108 1109/* Scan the function and record each set of each pseudo-register. 1110 1111 This is called once, at the start of the gcse pass. 1112 See the comments for `reg_set_table' for further docs. */ 1113 1114static void 1115compute_sets (f) 1116 rtx f; 1117{ 1118 rtx insn = f; 1119 1120 while (insn) 1121 { 1122 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') 1123 { 1124 record_set_insn = insn; 1125 note_stores (PATTERN (insn), record_set_info); 1126 } 1127 insn = NEXT_INSN (insn); 1128 } 1129} 1130 1131/* Hash table support. */ 1132 1133#define NEVER_SET -1 1134 1135/* For each register, the cuid of the first/last insn in the block to set it, 1136 or -1 if not set. */ 1137static int *reg_first_set; 1138static int *reg_last_set; 1139 1140/* While computing "first/last set" info, this is the CUID of first/last insn 1141 to set memory or -1 if not set. `mem_last_set' is also used when 1142 performing GCSE to record whether memory has been set since the beginning 1143 of the block. 1144 Note that handling of memory is very simple, we don't make any attempt 1145 to optimize things (later). */ 1146static int mem_first_set; 1147static int mem_last_set; 1148 1149/* Perform a quick check whether X, the source of a set, is something 1150 we want to consider for GCSE. */ 1151 1152static int 1153want_to_gcse_p (x) 1154 rtx x; 1155{ 1156 enum rtx_code code = GET_CODE (x); 1157 1158 switch (code) 1159 { 1160 case REG: 1161 case SUBREG: 1162 case CONST_INT: 1163 case CONST_DOUBLE: 1164 case CALL: 1165 return 0; 1166 1167 default: 1168 break; 1169 } 1170 1171 return 1; 1172} 1173 1174/* Return non-zero if the operands of expression X are unchanged from the 1175 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0), 1176 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */ 1177 1178static int 1179oprs_unchanged_p (x, insn, avail_p) 1180 rtx x, insn; 1181 int avail_p; 1182{ 1183 int i; 1184 enum rtx_code code; 1185 char *fmt; 1186 1187 /* repeat is used to turn tail-recursion into iteration. */ 1188 repeat: 1189 1190 if (x == 0) 1191 return 1; 1192 1193 code = GET_CODE (x); 1194 switch (code) 1195 { 1196 case REG: 1197 if (avail_p) 1198 return (reg_last_set[REGNO (x)] == NEVER_SET 1199 || reg_last_set[REGNO (x)] < INSN_CUID (insn)); 1200 else 1201 return (reg_first_set[REGNO (x)] == NEVER_SET 1202 || reg_first_set[REGNO (x)] >= INSN_CUID (insn)); 1203 1204 case MEM: 1205 if (avail_p) 1206 { 1207 if (mem_last_set != NEVER_SET 1208 && mem_last_set >= INSN_CUID (insn)) 1209 return 0; 1210 } 1211 else 1212 { 1213 if (mem_first_set != NEVER_SET 1214 && mem_first_set < INSN_CUID (insn)) 1215 return 0; 1216 } 1217 x = XEXP (x, 0); 1218 goto repeat; 1219 1220 case PRE_DEC: 1221 case PRE_INC: 1222 case POST_DEC: 1223 case POST_INC: 1224 return 0; 1225 1226 case PC: 1227 case CC0: /*FIXME*/ 1228 case CONST: 1229 case CONST_INT: 1230 case CONST_DOUBLE: 1231 case SYMBOL_REF: 1232 case LABEL_REF: 1233 case ADDR_VEC: 1234 case ADDR_DIFF_VEC: 1235 return 1; 1236 1237 default: 1238 break; 1239 } 1240 1241 i = GET_RTX_LENGTH (code) - 1; 1242 fmt = GET_RTX_FORMAT (code); 1243 for (; i >= 0; i--) 1244 { 1245 if (fmt[i] == 'e') 1246 { 1247 rtx tem = XEXP (x, i); 1248 1249 /* If we are about to do the last recursive call 1250 needed at this level, change it into iteration. 1251 This function is called enough to be worth it. */ 1252 if (i == 0) 1253 { 1254 x = tem; 1255 goto repeat; 1256 } 1257 if (! oprs_unchanged_p (tem, insn, avail_p)) 1258 return 0; 1259 } 1260 else if (fmt[i] == 'E') 1261 { 1262 int j; 1263 for (j = 0; j < XVECLEN (x, i); j++) 1264 { 1265 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p)) 1266 return 0; 1267 } 1268 } 1269 } 1270 1271 return 1; 1272} 1273 1274/* Return non-zero if the operands of expression X are unchanged from 1275 the start of INSN's basic block up to but not including INSN. */ 1276 1277static int 1278oprs_anticipatable_p (x, insn) 1279 rtx x, insn; 1280{ 1281 return oprs_unchanged_p (x, insn, 0); 1282} 1283 1284/* Return non-zero if the operands of expression X are unchanged from 1285 INSN to the end of INSN's basic block. */ 1286 1287static int 1288oprs_available_p (x, insn) 1289 rtx x, insn; 1290{ 1291 return oprs_unchanged_p (x, insn, 1); 1292} 1293 1294/* Hash expression X. 1295 MODE is only used if X is a CONST_INT. 1296 A boolean indicating if a volatile operand is found or if the expression 1297 contains something we don't want to insert in the table is stored in 1298 DO_NOT_RECORD_P. 1299 1300 ??? One might want to merge this with canon_hash. Later. */ 1301 1302static unsigned int 1303hash_expr (x, mode, do_not_record_p, hash_table_size) 1304 rtx x; 1305 enum machine_mode mode; 1306 int *do_not_record_p; 1307 int hash_table_size; 1308{ 1309 unsigned int hash; 1310 1311 *do_not_record_p = 0; 1312 1313 hash = hash_expr_1 (x, mode, do_not_record_p); 1314 return hash % hash_table_size; 1315} 1316 1317/* Subroutine of hash_expr to do the actual work. */ 1318 1319static unsigned int 1320hash_expr_1 (x, mode, do_not_record_p) 1321 rtx x; 1322 enum machine_mode mode; 1323 int *do_not_record_p; 1324{ 1325 int i, j; 1326 unsigned hash = 0; 1327 enum rtx_code code; 1328 char *fmt; 1329 1330 /* repeat is used to turn tail-recursion into iteration. */ 1331 repeat: 1332 1333 if (x == 0) 1334 return hash; 1335 1336 code = GET_CODE (x); 1337 switch (code) 1338 { 1339 case REG: 1340 { 1341 register int regno = REGNO (x); 1342 hash += ((unsigned) REG << 7) + regno; 1343 return hash; 1344 } 1345 1346 case CONST_INT: 1347 { 1348 unsigned HOST_WIDE_INT tem = INTVAL (x); 1349 hash += ((unsigned) CONST_INT << 7) + (unsigned) mode + tem; 1350 return hash; 1351 } 1352 1353 case CONST_DOUBLE: 1354 /* This is like the general case, except that it only counts 1355 the integers representing the constant. */ 1356 hash += (unsigned) code + (unsigned) GET_MODE (x); 1357 if (GET_MODE (x) != VOIDmode) 1358 for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++) 1359 { 1360 unsigned tem = XINT (x, i); 1361 hash += tem; 1362 } 1363 else 1364 hash += ((unsigned) CONST_DOUBLE_LOW (x) 1365 + (unsigned) CONST_DOUBLE_HIGH (x)); 1366 return hash; 1367 1368 /* Assume there is only one rtx object for any given label. */ 1369 case LABEL_REF: 1370 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap 1371 differences and differences between each stage's debugging dumps. */ 1372 hash += ((unsigned) LABEL_REF << 7) + CODE_LABEL_NUMBER (XEXP (x, 0)); 1373 return hash; 1374 1375 case SYMBOL_REF: 1376 { 1377 /* Don't hash on the symbol's address to avoid bootstrap differences. 1378 Different hash values may cause expressions to be recorded in 1379 different orders and thus different registers to be used in the 1380 final assembler. This also avoids differences in the dump files 1381 between various stages. */ 1382 unsigned int h = 0; 1383 unsigned char *p = (unsigned char *) XSTR (x, 0); 1384 while (*p) 1385 h += (h << 7) + *p++; /* ??? revisit */ 1386 hash += ((unsigned) SYMBOL_REF << 7) + h; 1387 return hash; 1388 } 1389 1390 case MEM: 1391 if (MEM_VOLATILE_P (x)) 1392 { 1393 *do_not_record_p = 1; 1394 return 0; 1395 } 1396 hash += (unsigned) MEM; 1397 hash += MEM_ALIAS_SET (x); 1398 x = XEXP (x, 0); 1399 goto repeat; 1400 1401 case PRE_DEC: 1402 case PRE_INC: 1403 case POST_DEC: 1404 case POST_INC: 1405 case PC: 1406 case CC0: 1407 case CALL: 1408 case UNSPEC_VOLATILE: 1409 *do_not_record_p = 1; 1410 return 0; 1411 1412 case ASM_OPERANDS: 1413 if (MEM_VOLATILE_P (x)) 1414 { 1415 *do_not_record_p = 1; 1416 return 0; 1417 } 1418 1419 default: 1420 break; 1421 } 1422 1423 i = GET_RTX_LENGTH (code) - 1; 1424 hash += (unsigned) code + (unsigned) GET_MODE (x); 1425 fmt = GET_RTX_FORMAT (code); 1426 for (; i >= 0; i--) 1427 { 1428 if (fmt[i] == 'e') 1429 { 1430 rtx tem = XEXP (x, i); 1431 1432 /* If we are about to do the last recursive call 1433 needed at this level, change it into iteration. 1434 This function is called enough to be worth it. */ 1435 if (i == 0) 1436 { 1437 x = tem; 1438 goto repeat; 1439 } 1440 hash += hash_expr_1 (tem, 0, do_not_record_p); 1441 if (*do_not_record_p) 1442 return 0; 1443 } 1444 else if (fmt[i] == 'E') 1445 for (j = 0; j < XVECLEN (x, i); j++) 1446 { 1447 hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p); 1448 if (*do_not_record_p) 1449 return 0; 1450 } 1451 else if (fmt[i] == 's') 1452 { 1453 register unsigned char *p = (unsigned char *) XSTR (x, i); 1454 if (p) 1455 while (*p) 1456 hash += *p++; 1457 } 1458 else if (fmt[i] == 'i') 1459 { 1460 register unsigned tem = XINT (x, i); 1461 hash += tem; 1462 } 1463 else 1464 abort (); 1465 } 1466 1467 return hash; 1468} 1469 1470/* Hash a set of register REGNO. 1471 1472 Sets are hashed on the register that is set. 1473 This simplifies the PRE copy propagation code. 1474 1475 ??? May need to make things more elaborate. Later, as necessary. */ 1476 1477static unsigned int 1478hash_set (regno, hash_table_size) 1479 int regno; 1480 int hash_table_size; 1481{ 1482 unsigned int hash; 1483 1484 hash = regno; 1485 return hash % hash_table_size; 1486} 1487 1488/* Return non-zero if exp1 is equivalent to exp2. 1489 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */ 1490 1491static int 1492expr_equiv_p (x, y) 1493 rtx x, y; 1494{ 1495 register int i, j; 1496 register enum rtx_code code; 1497 register char *fmt; 1498 1499 if (x == y) 1500 return 1; 1501 if (x == 0 || y == 0) 1502 return x == y; 1503 1504 code = GET_CODE (x); 1505 if (code != GET_CODE (y)) 1506 return 0; 1507 1508 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */ 1509 if (GET_MODE (x) != GET_MODE (y)) 1510 return 0; 1511 1512 switch (code) 1513 { 1514 case PC: 1515 case CC0: 1516 return x == y; 1517 1518 case CONST_INT: 1519 return INTVAL (x) == INTVAL (y); 1520 1521 case LABEL_REF: 1522 return XEXP (x, 0) == XEXP (y, 0); 1523 1524 case SYMBOL_REF: 1525 return XSTR (x, 0) == XSTR (y, 0); 1526 1527 case REG: 1528 return REGNO (x) == REGNO (y); 1529 1530 case MEM: 1531 /* Can't merge two expressions in different alias sets, since we can 1532 decide that the expression is transparent in a block when it isn't, 1533 due to it being set with the different alias set. */ 1534 if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y)) 1535 return 0; 1536 break; 1537 1538 /* For commutative operations, check both orders. */ 1539 case PLUS: 1540 case MULT: 1541 case AND: 1542 case IOR: 1543 case XOR: 1544 case NE: 1545 case EQ: 1546 return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0)) 1547 && expr_equiv_p (XEXP (x, 1), XEXP (y, 1))) 1548 || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1)) 1549 && expr_equiv_p (XEXP (x, 1), XEXP (y, 0)))); 1550 1551 default: 1552 break; 1553 } 1554 1555 /* Compare the elements. If any pair of corresponding elements 1556 fail to match, return 0 for the whole thing. */ 1557 1558 fmt = GET_RTX_FORMAT (code); 1559 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 1560 { 1561 switch (fmt[i]) 1562 { 1563 case 'e': 1564 if (! expr_equiv_p (XEXP (x, i), XEXP (y, i))) 1565 return 0; 1566 break; 1567 1568 case 'E': 1569 if (XVECLEN (x, i) != XVECLEN (y, i)) 1570 return 0; 1571 for (j = 0; j < XVECLEN (x, i); j++) 1572 if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j))) 1573 return 0; 1574 break; 1575 1576 case 's': 1577 if (strcmp (XSTR (x, i), XSTR (y, i))) 1578 return 0; 1579 break; 1580 1581 case 'i': 1582 if (XINT (x, i) != XINT (y, i)) 1583 return 0; 1584 break; 1585 1586 case 'w': 1587 if (XWINT (x, i) != XWINT (y, i)) 1588 return 0; 1589 break; 1590 1591 case '0': 1592 break; 1593 1594 default: 1595 abort (); 1596 } 1597 } 1598 1599 return 1; 1600} 1601 1602/* Insert expression X in INSN in the hash table. 1603 If it is already present, record it as the last occurrence in INSN's 1604 basic block. 1605 1606 MODE is the mode of the value X is being stored into. 1607 It is only used if X is a CONST_INT. 1608 1609 ANTIC_P is non-zero if X is an anticipatable expression. 1610 AVAIL_P is non-zero if X is an available expression. */ 1611 1612static void 1613insert_expr_in_table (x, mode, insn, antic_p, avail_p) 1614 rtx x; 1615 enum machine_mode mode; 1616 rtx insn; 1617 int antic_p, avail_p; 1618{ 1619 int found, do_not_record_p; 1620 unsigned int hash; 1621 struct expr *cur_expr, *last_expr = NULL; 1622 struct occr *antic_occr, *avail_occr; 1623 struct occr *last_occr = NULL; 1624 1625 hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size); 1626 1627 /* Do not insert expression in table if it contains volatile operands, 1628 or if hash_expr determines the expression is something we don't want 1629 to or can't handle. */ 1630 if (do_not_record_p) 1631 return; 1632 1633 cur_expr = expr_hash_table[hash]; 1634 found = 0; 1635 1636 while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x))) 1637 { 1638 /* If the expression isn't found, save a pointer to the end of 1639 the list. */ 1640 last_expr = cur_expr; 1641 cur_expr = cur_expr->next_same_hash; 1642 } 1643 1644 if (! found) 1645 { 1646 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr)); 1647 bytes_used += sizeof (struct expr); 1648 if (expr_hash_table[hash] == NULL) 1649 { 1650 /* This is the first pattern that hashed to this index. */ 1651 expr_hash_table[hash] = cur_expr; 1652 } 1653 else 1654 { 1655 /* Add EXPR to end of this hash chain. */ 1656 last_expr->next_same_hash = cur_expr; 1657 } 1658 /* Set the fields of the expr element. */ 1659 cur_expr->expr = x; 1660 cur_expr->bitmap_index = n_exprs++; 1661 cur_expr->next_same_hash = NULL; 1662 cur_expr->antic_occr = NULL; 1663 cur_expr->avail_occr = NULL; 1664 } 1665 1666 /* Now record the occurrence(s). */ 1667 1668 if (antic_p) 1669 { 1670 antic_occr = cur_expr->antic_occr; 1671 1672 /* Search for another occurrence in the same basic block. */ 1673 while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn)) 1674 { 1675 /* If an occurrence isn't found, save a pointer to the end of 1676 the list. */ 1677 last_occr = antic_occr; 1678 antic_occr = antic_occr->next; 1679 } 1680 1681 if (antic_occr) 1682 { 1683 /* Found another instance of the expression in the same basic block. 1684 Prefer the currently recorded one. We want the first one in the 1685 block and the block is scanned from start to end. */ 1686 ; /* nothing to do */ 1687 } 1688 else 1689 { 1690 /* First occurrence of this expression in this basic block. */ 1691 antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr)); 1692 bytes_used += sizeof (struct occr); 1693 /* First occurrence of this expression in any block? */ 1694 if (cur_expr->antic_occr == NULL) 1695 cur_expr->antic_occr = antic_occr; 1696 else 1697 last_occr->next = antic_occr; 1698 antic_occr->insn = insn; 1699 antic_occr->next = NULL; 1700 } 1701 } 1702 1703 if (avail_p) 1704 { 1705 avail_occr = cur_expr->avail_occr; 1706 1707 /* Search for another occurrence in the same basic block. */ 1708 while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn)) 1709 { 1710 /* If an occurrence isn't found, save a pointer to the end of 1711 the list. */ 1712 last_occr = avail_occr; 1713 avail_occr = avail_occr->next; 1714 } 1715 1716 if (avail_occr) 1717 { 1718 /* Found another instance of the expression in the same basic block. 1719 Prefer this occurrence to the currently recorded one. We want 1720 the last one in the block and the block is scanned from start 1721 to end. */ 1722 avail_occr->insn = insn; 1723 } 1724 else 1725 { 1726 /* First occurrence of this expression in this basic block. */ 1727 avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr)); 1728 bytes_used += sizeof (struct occr); 1729 /* First occurrence of this expression in any block? */ 1730 if (cur_expr->avail_occr == NULL) 1731 cur_expr->avail_occr = avail_occr; 1732 else 1733 last_occr->next = avail_occr; 1734 avail_occr->insn = insn; 1735 avail_occr->next = NULL; 1736 } 1737 } 1738} 1739 1740/* Insert pattern X in INSN in the hash table. 1741 X is a SET of a reg to either another reg or a constant. 1742 If it is already present, record it as the last occurrence in INSN's 1743 basic block. */ 1744 1745static void 1746insert_set_in_table (x, insn) 1747 rtx x; 1748 rtx insn; 1749{ 1750 int found; 1751 unsigned int hash; 1752 struct expr *cur_expr, *last_expr = NULL; 1753 struct occr *cur_occr, *last_occr = NULL; 1754 1755 if (GET_CODE (x) != SET 1756 || GET_CODE (SET_DEST (x)) != REG) 1757 abort (); 1758 1759 hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size); 1760 1761 cur_expr = set_hash_table[hash]; 1762 found = 0; 1763 1764 while (cur_expr && ! (found = expr_equiv_p (cur_expr->expr, x))) 1765 { 1766 /* If the expression isn't found, save a pointer to the end of 1767 the list. */ 1768 last_expr = cur_expr; 1769 cur_expr = cur_expr->next_same_hash; 1770 } 1771 1772 if (! found) 1773 { 1774 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr)); 1775 bytes_used += sizeof (struct expr); 1776 if (set_hash_table[hash] == NULL) 1777 { 1778 /* This is the first pattern that hashed to this index. */ 1779 set_hash_table[hash] = cur_expr; 1780 } 1781 else 1782 { 1783 /* Add EXPR to end of this hash chain. */ 1784 last_expr->next_same_hash = cur_expr; 1785 } 1786 /* Set the fields of the expr element. 1787 We must copy X because it can be modified when copy propagation is 1788 performed on its operands. */ 1789 /* ??? Should this go in a different obstack? */ 1790 cur_expr->expr = copy_rtx (x); 1791 cur_expr->bitmap_index = n_sets++; 1792 cur_expr->next_same_hash = NULL; 1793 cur_expr->antic_occr = NULL; 1794 cur_expr->avail_occr = NULL; 1795 } 1796 1797 /* Now record the occurrence. */ 1798 1799 cur_occr = cur_expr->avail_occr; 1800 1801 /* Search for another occurrence in the same basic block. */ 1802 while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn)) 1803 { 1804 /* If an occurrence isn't found, save a pointer to the end of 1805 the list. */ 1806 last_occr = cur_occr; 1807 cur_occr = cur_occr->next; 1808 } 1809 1810 if (cur_occr) 1811 { 1812 /* Found another instance of the expression in the same basic block. 1813 Prefer this occurrence to the currently recorded one. We want 1814 the last one in the block and the block is scanned from start 1815 to end. */ 1816 cur_occr->insn = insn; 1817 } 1818 else 1819 { 1820 /* First occurrence of this expression in this basic block. */ 1821 cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr)); 1822 bytes_used += sizeof (struct occr); 1823 /* First occurrence of this expression in any block? */ 1824 if (cur_expr->avail_occr == NULL) 1825 cur_expr->avail_occr = cur_occr; 1826 else 1827 last_occr->next = cur_occr; 1828 cur_occr->insn = insn; 1829 cur_occr->next = NULL; 1830 } 1831} 1832 1833/* Scan pattern PAT of INSN and add an entry to the hash table. 1834 If SET_P is non-zero, this is for the assignment hash table, 1835 otherwise it is for the expression hash table. */ 1836 1837static void 1838hash_scan_set (pat, insn, set_p) 1839 rtx pat, insn; 1840 int set_p; 1841{ 1842 rtx src = SET_SRC (pat); 1843 rtx dest = SET_DEST (pat); 1844 1845 if (GET_CODE (src) == CALL) 1846 hash_scan_call (src, insn); 1847 1848 if (GET_CODE (dest) == REG) 1849 { 1850 int regno = REGNO (dest); 1851 rtx tmp; 1852 1853 /* Only record sets of pseudo-regs in the hash table. */ 1854 if (! set_p 1855 && regno >= FIRST_PSEUDO_REGISTER 1856 /* Don't GCSE something if we can't do a reg/reg copy. */ 1857 && can_copy_p [GET_MODE (dest)] 1858 /* Is SET_SRC something we want to gcse? */ 1859 && want_to_gcse_p (src)) 1860 { 1861 /* An expression is not anticipatable if its operands are 1862 modified before this insn. */ 1863 int antic_p = ! optimize_size && oprs_anticipatable_p (src, insn); 1864 /* An expression is not available if its operands are 1865 subsequently modified, including this insn. */ 1866 int avail_p = oprs_available_p (src, insn); 1867 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p); 1868 } 1869 /* Record sets for constant/copy propagation. */ 1870 else if (set_p 1871 && regno >= FIRST_PSEUDO_REGISTER 1872 && ((GET_CODE (src) == REG 1873 && REGNO (src) >= FIRST_PSEUDO_REGISTER 1874 && can_copy_p [GET_MODE (dest)]) 1875 /* ??? CONST_INT:wip */ 1876 || GET_CODE (src) == CONST_INT 1877 || GET_CODE (src) == CONST_DOUBLE) 1878 /* A copy is not available if its src or dest is subsequently 1879 modified. Here we want to search from INSN+1 on, but 1880 oprs_available_p searches from INSN on. */ 1881 && (insn == BLOCK_END (BLOCK_NUM (insn)) 1882 || ((tmp = next_nonnote_insn (insn)) != NULL_RTX 1883 && oprs_available_p (pat, tmp)))) 1884 insert_set_in_table (pat, insn); 1885 } 1886} 1887 1888static void 1889hash_scan_clobber (x, insn) 1890 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED; 1891{ 1892 /* Currently nothing to do. */ 1893} 1894 1895static void 1896hash_scan_call (x, insn) 1897 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED; 1898{ 1899 /* Currently nothing to do. */ 1900} 1901 1902/* Process INSN and add hash table entries as appropriate. 1903 1904 Only available expressions that set a single pseudo-reg are recorded. 1905 1906 Single sets in a PARALLEL could be handled, but it's an extra complication 1907 that isn't dealt with right now. The trick is handling the CLOBBERs that 1908 are also in the PARALLEL. Later. 1909 1910 If SET_P is non-zero, this is for the assignment hash table, 1911 otherwise it is for the expression hash table. 1912 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should 1913 not record any expressions. */ 1914 1915static void 1916hash_scan_insn (insn, set_p, in_libcall_block) 1917 rtx insn; 1918 int set_p; 1919 int in_libcall_block; 1920{ 1921 rtx pat = PATTERN (insn); 1922 1923 /* Pick out the sets of INSN and for other forms of instructions record 1924 what's been modified. */ 1925 1926 if (GET_CODE (pat) == SET && ! in_libcall_block) 1927 hash_scan_set (pat, insn, set_p); 1928 else if (GET_CODE (pat) == PARALLEL) 1929 { 1930 int i; 1931 1932 for (i = 0; i < XVECLEN (pat, 0); i++) 1933 { 1934 rtx x = XVECEXP (pat, 0, i); 1935 1936 if (GET_CODE (x) == SET) 1937 { 1938 if (GET_CODE (SET_SRC (x)) == CALL) 1939 hash_scan_call (SET_SRC (x), insn); 1940 } 1941 else if (GET_CODE (x) == CLOBBER) 1942 hash_scan_clobber (x, insn); 1943 else if (GET_CODE (x) == CALL) 1944 hash_scan_call (x, insn); 1945 } 1946 } 1947 else if (GET_CODE (pat) == CLOBBER) 1948 hash_scan_clobber (pat, insn); 1949 else if (GET_CODE (pat) == CALL) 1950 hash_scan_call (pat, insn); 1951} 1952 1953static void 1954dump_hash_table (file, name, table, table_size, total_size) 1955 FILE *file; 1956 const char *name; 1957 struct expr **table; 1958 int table_size, total_size; 1959{ 1960 int i; 1961 /* Flattened out table, so it's printed in proper order. */ 1962 struct expr **flat_table = (struct expr **) alloca (total_size * sizeof (struct expr *)); 1963 unsigned int *hash_val = (unsigned int *) alloca (total_size * sizeof (unsigned int)); 1964 1965 bzero ((char *) flat_table, total_size * sizeof (struct expr *)); 1966 for (i = 0; i < table_size; i++) 1967 { 1968 struct expr *expr; 1969 1970 for (expr = table[i]; expr != NULL; expr = expr->next_same_hash) 1971 { 1972 flat_table[expr->bitmap_index] = expr; 1973 hash_val[expr->bitmap_index] = i; 1974 } 1975 } 1976 1977 fprintf (file, "%s hash table (%d buckets, %d entries)\n", 1978 name, table_size, total_size); 1979 1980 for (i = 0; i < total_size; i++) 1981 { 1982 struct expr *expr = flat_table[i]; 1983 1984 fprintf (file, "Index %d (hash value %d)\n ", 1985 expr->bitmap_index, hash_val[i]); 1986 print_rtl (file, expr->expr); 1987 fprintf (file, "\n"); 1988 } 1989 1990 fprintf (file, "\n"); 1991} 1992 1993/* Record register first/last/block set information for REGNO in INSN. 1994 reg_first_set records the first place in the block where the register 1995 is set and is used to compute "anticipatability". 1996 reg_last_set records the last place in the block where the register 1997 is set and is used to compute "availability". 1998 reg_set_in_block records whether the register is set in the block 1999 and is used to compute "transparency". */ 2000 2001static void 2002record_last_reg_set_info (insn, regno) 2003 rtx insn; 2004 int regno; 2005{ 2006 if (reg_first_set[regno] == NEVER_SET) 2007 reg_first_set[regno] = INSN_CUID (insn); 2008 reg_last_set[regno] = INSN_CUID (insn); 2009 SET_BIT (reg_set_in_block[BLOCK_NUM (insn)], regno); 2010} 2011 2012/* Record memory first/last/block set information for INSN. */ 2013 2014static void 2015record_last_mem_set_info (insn) 2016 rtx insn; 2017{ 2018 if (mem_first_set == NEVER_SET) 2019 mem_first_set = INSN_CUID (insn); 2020 mem_last_set = INSN_CUID (insn); 2021 mem_set_in_block[BLOCK_NUM (insn)] = 1; 2022} 2023 2024/* Used for communicating between next two routines. */ 2025static rtx last_set_insn; 2026 2027/* Called from compute_hash_table via note_stores to handle one 2028 SET or CLOBBER in an insn. */ 2029 2030static void 2031record_last_set_info (dest, setter) 2032 rtx dest, setter ATTRIBUTE_UNUSED; 2033{ 2034 if (GET_CODE (dest) == SUBREG) 2035 dest = SUBREG_REG (dest); 2036 2037 if (GET_CODE (dest) == REG) 2038 record_last_reg_set_info (last_set_insn, REGNO (dest)); 2039 else if (GET_CODE (dest) == MEM 2040 /* Ignore pushes, they clobber nothing. */ 2041 && ! push_operand (dest, GET_MODE (dest))) 2042 record_last_mem_set_info (last_set_insn); 2043} 2044 2045/* Top level function to create an expression or assignment hash table. 2046 2047 Expression entries are placed in the hash table if 2048 - they are of the form (set (pseudo-reg) src), 2049 - src is something we want to perform GCSE on, 2050 - none of the operands are subsequently modified in the block 2051 2052 Assignment entries are placed in the hash table if 2053 - they are of the form (set (pseudo-reg) src), 2054 - src is something we want to perform const/copy propagation on, 2055 - none of the operands or target are subsequently modified in the block 2056 Currently src must be a pseudo-reg or a const_int. 2057 2058 F is the first insn. 2059 SET_P is non-zero for computing the assignment hash table. */ 2060 2061static void 2062compute_hash_table (set_p) 2063 int set_p; 2064{ 2065 int bb; 2066 2067 /* While we compute the hash table we also compute a bit array of which 2068 registers are set in which blocks. 2069 We also compute which blocks set memory, in the absence of aliasing 2070 support [which is TODO]. 2071 ??? This isn't needed during const/copy propagation, but it's cheap to 2072 compute. Later. */ 2073 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks); 2074 bzero ((char *) mem_set_in_block, n_basic_blocks); 2075 2076 /* Some working arrays used to track first and last set in each block. */ 2077 /* ??? One could use alloca here, but at some size a threshold is crossed 2078 beyond which one should use malloc. Are we at that threshold here? */ 2079 reg_first_set = (int *) gmalloc (max_gcse_regno * sizeof (int)); 2080 reg_last_set = (int *) gmalloc (max_gcse_regno * sizeof (int)); 2081 2082 for (bb = 0; bb < n_basic_blocks; bb++) 2083 { 2084 rtx insn; 2085 int regno; 2086 int in_libcall_block; 2087 int i; 2088 2089 /* First pass over the instructions records information used to 2090 determine when registers and memory are first and last set. 2091 ??? The mem_set_in_block and hard-reg reg_set_in_block computation 2092 could be moved to compute_sets since they currently don't change. */ 2093 2094 for (i = 0; i < max_gcse_regno; i++) 2095 reg_first_set[i] = reg_last_set[i] = NEVER_SET; 2096 mem_first_set = NEVER_SET; 2097 mem_last_set = NEVER_SET; 2098 2099 for (insn = BLOCK_HEAD (bb); 2100 insn && insn != NEXT_INSN (BLOCK_END (bb)); 2101 insn = NEXT_INSN (insn)) 2102 { 2103#ifdef NON_SAVING_SETJMP 2104 if (NON_SAVING_SETJMP && GET_CODE (insn) == NOTE 2105 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP) 2106 { 2107 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 2108 record_last_reg_set_info (insn, regno); 2109 continue; 2110 } 2111#endif 2112 2113 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') 2114 continue; 2115 2116 if (GET_CODE (insn) == CALL_INSN) 2117 { 2118 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 2119 if ((call_used_regs[regno] 2120 && regno != STACK_POINTER_REGNUM 2121#if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM 2122 && regno != HARD_FRAME_POINTER_REGNUM 2123#endif 2124#if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM 2125 && ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno]) 2126#endif 2127#if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED) 2128 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic) 2129#endif 2130 2131 && regno != FRAME_POINTER_REGNUM) 2132 || global_regs[regno]) 2133 record_last_reg_set_info (insn, regno); 2134 if (! CONST_CALL_P (insn)) 2135 record_last_mem_set_info (insn); 2136 } 2137 2138 last_set_insn = insn; 2139 note_stores (PATTERN (insn), record_last_set_info); 2140 } 2141 2142 /* The next pass builds the hash table. */ 2143 2144 for (insn = BLOCK_HEAD (bb), in_libcall_block = 0; 2145 insn && insn != NEXT_INSN (BLOCK_END (bb)); 2146 insn = NEXT_INSN (insn)) 2147 { 2148 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') 2149 { 2150 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX)) 2151 in_libcall_block = 1; 2152 else if (find_reg_note (insn, REG_RETVAL, NULL_RTX)) 2153 in_libcall_block = 0; 2154 hash_scan_insn (insn, set_p, in_libcall_block); 2155 } 2156 } 2157 } 2158 2159 free (reg_first_set); 2160 free (reg_last_set); 2161 /* Catch bugs early. */ 2162 reg_first_set = reg_last_set = 0; 2163} 2164 2165/* Allocate space for the set hash table. 2166 N_INSNS is the number of instructions in the function. 2167 It is used to determine the number of buckets to use. */ 2168 2169static void 2170alloc_set_hash_table (n_insns) 2171 int n_insns; 2172{ 2173 int n; 2174 2175 set_hash_table_size = n_insns / 4; 2176 if (set_hash_table_size < 11) 2177 set_hash_table_size = 11; 2178 /* Attempt to maintain efficient use of hash table. 2179 Making it an odd number is simplest for now. 2180 ??? Later take some measurements. */ 2181 set_hash_table_size |= 1; 2182 n = set_hash_table_size * sizeof (struct expr *); 2183 set_hash_table = (struct expr **) gmalloc (n); 2184} 2185 2186/* Free things allocated by alloc_set_hash_table. */ 2187 2188static void 2189free_set_hash_table () 2190{ 2191 free (set_hash_table); 2192} 2193 2194/* Compute the hash table for doing copy/const propagation. */ 2195 2196static void 2197compute_set_hash_table () 2198{ 2199 /* Initialize count of number of entries in hash table. */ 2200 n_sets = 0; 2201 bzero ((char *) set_hash_table, set_hash_table_size * sizeof (struct expr *)); 2202 2203 compute_hash_table (1); 2204} 2205 2206/* Allocate space for the expression hash table. 2207 N_INSNS is the number of instructions in the function. 2208 It is used to determine the number of buckets to use. */ 2209 2210static void 2211alloc_expr_hash_table (n_insns) 2212 int n_insns; 2213{ 2214 int n; 2215 2216 expr_hash_table_size = n_insns / 2; 2217 /* Make sure the amount is usable. */ 2218 if (expr_hash_table_size < 11) 2219 expr_hash_table_size = 11; 2220 /* Attempt to maintain efficient use of hash table. 2221 Making it an odd number is simplest for now. 2222 ??? Later take some measurements. */ 2223 expr_hash_table_size |= 1; 2224 n = expr_hash_table_size * sizeof (struct expr *); 2225 expr_hash_table = (struct expr **) gmalloc (n); 2226} 2227 2228/* Free things allocated by alloc_expr_hash_table. */ 2229 2230static void 2231free_expr_hash_table () 2232{ 2233 free (expr_hash_table); 2234} 2235 2236/* Compute the hash table for doing GCSE. */ 2237 2238static void 2239compute_expr_hash_table () 2240{ 2241 /* Initialize count of number of entries in hash table. */ 2242 n_exprs = 0; 2243 bzero ((char *) expr_hash_table, expr_hash_table_size * sizeof (struct expr *)); 2244 2245 compute_hash_table (0); 2246} 2247 2248/* Expression tracking support. */ 2249 2250/* Lookup pattern PAT in the expression table. 2251 The result is a pointer to the table entry, or NULL if not found. */ 2252 2253static struct expr * 2254lookup_expr (pat) 2255 rtx pat; 2256{ 2257 int do_not_record_p; 2258 unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p, 2259 expr_hash_table_size); 2260 struct expr *expr; 2261 2262 if (do_not_record_p) 2263 return NULL; 2264 2265 expr = expr_hash_table[hash]; 2266 2267 while (expr && ! expr_equiv_p (expr->expr, pat)) 2268 expr = expr->next_same_hash; 2269 2270 return expr; 2271} 2272 2273/* Lookup REGNO in the set table. 2274 If PAT is non-NULL look for the entry that matches it, otherwise return 2275 the first entry for REGNO. 2276 The result is a pointer to the table entry, or NULL if not found. */ 2277 2278static struct expr * 2279lookup_set (regno, pat) 2280 int regno; 2281 rtx pat; 2282{ 2283 unsigned int hash = hash_set (regno, set_hash_table_size); 2284 struct expr *expr; 2285 2286 expr = set_hash_table[hash]; 2287 2288 if (pat) 2289 { 2290 while (expr && ! expr_equiv_p (expr->expr, pat)) 2291 expr = expr->next_same_hash; 2292 } 2293 else 2294 { 2295 while (expr && REGNO (SET_DEST (expr->expr)) != regno) 2296 expr = expr->next_same_hash; 2297 } 2298 2299 return expr; 2300} 2301 2302/* Return the next entry for REGNO in list EXPR. */ 2303 2304static struct expr * 2305next_set (regno, expr) 2306 int regno; 2307 struct expr *expr; 2308{ 2309 do 2310 expr = expr->next_same_hash; 2311 while (expr && REGNO (SET_DEST (expr->expr)) != regno); 2312 return expr; 2313} 2314 2315/* Reset tables used to keep track of what's still available [since the 2316 start of the block]. */ 2317 2318static void 2319reset_opr_set_tables () 2320{ 2321 /* Maintain a bitmap of which regs have been set since beginning of 2322 the block. */ 2323 sbitmap_zero (reg_set_bitmap); 2324 /* Also keep a record of the last instruction to modify memory. 2325 For now this is very trivial, we only record whether any memory 2326 location has been modified. */ 2327 mem_last_set = 0; 2328} 2329 2330/* Return non-zero if the operands of X are not set before INSN in 2331 INSN's basic block. */ 2332 2333static int 2334oprs_not_set_p (x, insn) 2335 rtx x, insn; 2336{ 2337 int i; 2338 enum rtx_code code; 2339 char *fmt; 2340 2341 /* repeat is used to turn tail-recursion into iteration. */ 2342repeat: 2343 2344 if (x == 0) 2345 return 1; 2346 2347 code = GET_CODE (x); 2348 switch (code) 2349 { 2350 case PC: 2351 case CC0: 2352 case CONST: 2353 case CONST_INT: 2354 case CONST_DOUBLE: 2355 case SYMBOL_REF: 2356 case LABEL_REF: 2357 case ADDR_VEC: 2358 case ADDR_DIFF_VEC: 2359 return 1; 2360 2361 case MEM: 2362 if (mem_last_set != 0) 2363 return 0; 2364 x = XEXP (x, 0); 2365 goto repeat; 2366 2367 case REG: 2368 return ! TEST_BIT (reg_set_bitmap, REGNO (x)); 2369 2370 default: 2371 break; 2372 } 2373 2374 fmt = GET_RTX_FORMAT (code); 2375 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 2376 { 2377 if (fmt[i] == 'e') 2378 { 2379 int not_set_p; 2380 /* If we are about to do the last recursive call 2381 needed at this level, change it into iteration. 2382 This function is called enough to be worth it. */ 2383 if (i == 0) 2384 { 2385 x = XEXP (x, 0); 2386 goto repeat; 2387 } 2388 not_set_p = oprs_not_set_p (XEXP (x, i), insn); 2389 if (! not_set_p) 2390 return 0; 2391 } 2392 else if (fmt[i] == 'E') 2393 { 2394 int j; 2395 for (j = 0; j < XVECLEN (x, i); j++) 2396 { 2397 int not_set_p = oprs_not_set_p (XVECEXP (x, i, j), insn); 2398 if (! not_set_p) 2399 return 0; 2400 } 2401 } 2402 } 2403 2404 return 1; 2405} 2406 2407/* Mark things set by a CALL. */ 2408 2409static void 2410mark_call (insn) 2411 rtx insn; 2412{ 2413 mem_last_set = INSN_CUID (insn); 2414} 2415 2416/* Mark things set by a SET. */ 2417 2418static void 2419mark_set (pat, insn) 2420 rtx pat, insn; 2421{ 2422 rtx dest = SET_DEST (pat); 2423 2424 while (GET_CODE (dest) == SUBREG 2425 || GET_CODE (dest) == ZERO_EXTRACT 2426 || GET_CODE (dest) == SIGN_EXTRACT 2427 || GET_CODE (dest) == STRICT_LOW_PART) 2428 dest = XEXP (dest, 0); 2429 2430 if (GET_CODE (dest) == REG) 2431 SET_BIT (reg_set_bitmap, REGNO (dest)); 2432 else if (GET_CODE (dest) == MEM) 2433 mem_last_set = INSN_CUID (insn); 2434 2435 if (GET_CODE (SET_SRC (pat)) == CALL) 2436 mark_call (insn); 2437} 2438 2439/* Record things set by a CLOBBER. */ 2440 2441static void 2442mark_clobber (pat, insn) 2443 rtx pat, insn; 2444{ 2445 rtx clob = XEXP (pat, 0); 2446 2447 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART) 2448 clob = XEXP (clob, 0); 2449 2450 if (GET_CODE (clob) == REG) 2451 SET_BIT (reg_set_bitmap, REGNO (clob)); 2452 else 2453 mem_last_set = INSN_CUID (insn); 2454} 2455 2456/* Record things set by INSN. 2457 This data is used by oprs_not_set_p. */ 2458 2459static void 2460mark_oprs_set (insn) 2461 rtx insn; 2462{ 2463 rtx pat = PATTERN (insn); 2464 2465 if (GET_CODE (pat) == SET) 2466 mark_set (pat, insn); 2467 else if (GET_CODE (pat) == PARALLEL) 2468 { 2469 int i; 2470 2471 for (i = 0; i < XVECLEN (pat, 0); i++) 2472 { 2473 rtx x = XVECEXP (pat, 0, i); 2474 2475 if (GET_CODE (x) == SET) 2476 mark_set (x, insn); 2477 else if (GET_CODE (x) == CLOBBER) 2478 mark_clobber (x, insn); 2479 else if (GET_CODE (x) == CALL) 2480 mark_call (insn); 2481 } 2482 } 2483 else if (GET_CODE (pat) == CLOBBER) 2484 mark_clobber (pat, insn); 2485 else if (GET_CODE (pat) == CALL) 2486 mark_call (insn); 2487} 2488 2489 2490/* Classic GCSE reaching definition support. */ 2491 2492/* Allocate reaching def variables. */ 2493 2494static void 2495alloc_rd_mem (n_blocks, n_insns) 2496 int n_blocks, n_insns; 2497{ 2498 rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns); 2499 sbitmap_vector_zero (rd_kill, n_basic_blocks); 2500 2501 rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns); 2502 sbitmap_vector_zero (rd_gen, n_basic_blocks); 2503 2504 reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns); 2505 sbitmap_vector_zero (reaching_defs, n_basic_blocks); 2506 2507 rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns); 2508 sbitmap_vector_zero (rd_out, n_basic_blocks); 2509} 2510 2511/* Free reaching def variables. */ 2512 2513static void 2514free_rd_mem () 2515{ 2516 free (rd_kill); 2517 free (rd_gen); 2518 free (reaching_defs); 2519 free (rd_out); 2520} 2521 2522/* Add INSN to the kills of BB. 2523 REGNO, set in BB, is killed by INSN. */ 2524 2525static void 2526handle_rd_kill_set (insn, regno, bb) 2527 rtx insn; 2528 int regno, bb; 2529{ 2530 struct reg_set *this_reg = reg_set_table[regno]; 2531 2532 while (this_reg) 2533 { 2534 if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn)) 2535 SET_BIT (rd_kill[bb], INSN_CUID (this_reg->insn)); 2536 this_reg = this_reg->next; 2537 } 2538} 2539 2540/* Compute the set of kill's for reaching definitions. */ 2541 2542static void 2543compute_kill_rd () 2544{ 2545 int bb,cuid; 2546 2547 /* For each block 2548 For each set bit in `gen' of the block (i.e each insn which 2549 generates a definition in the block) 2550 Call the reg set by the insn corresponding to that bit regx 2551 Look at the linked list starting at reg_set_table[regx] 2552 For each setting of regx in the linked list, which is not in 2553 this block 2554 Set the bit in `kill' corresponding to that insn 2555 */ 2556 2557 for (bb = 0; bb < n_basic_blocks; bb++) 2558 { 2559 for (cuid = 0; cuid < max_cuid; cuid++) 2560 { 2561 if (TEST_BIT (rd_gen[bb], cuid)) 2562 { 2563 rtx insn = CUID_INSN (cuid); 2564 rtx pat = PATTERN (insn); 2565 2566 if (GET_CODE (insn) == CALL_INSN) 2567 { 2568 int regno; 2569 2570 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 2571 { 2572 if ((call_used_regs[regno] 2573 && regno != STACK_POINTER_REGNUM 2574#if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM 2575 && regno != HARD_FRAME_POINTER_REGNUM 2576#endif 2577#if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM 2578 && ! (regno == ARG_POINTER_REGNUM 2579 && fixed_regs[regno]) 2580#endif 2581#if defined (PIC_OFFSET_TABLE_REGNUM) && !defined (PIC_OFFSET_TABLE_REG_CALL_CLOBBERED) 2582 && ! (regno == PIC_OFFSET_TABLE_REGNUM && flag_pic) 2583#endif 2584 && regno != FRAME_POINTER_REGNUM) 2585 || global_regs[regno]) 2586 handle_rd_kill_set (insn, regno, bb); 2587 } 2588 } 2589 2590 if (GET_CODE (pat) == PARALLEL) 2591 { 2592 int i; 2593 2594 /* We work backwards because ... */ 2595 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--) 2596 { 2597 enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i)); 2598 if ((code == SET || code == CLOBBER) 2599 && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG) 2600 handle_rd_kill_set (insn, 2601 REGNO (XEXP (XVECEXP (pat, 0, i), 0)), 2602 bb); 2603 } 2604 } 2605 else if (GET_CODE (pat) == SET) 2606 { 2607 if (GET_CODE (SET_DEST (pat)) == REG) 2608 { 2609 /* Each setting of this register outside of this block 2610 must be marked in the set of kills in this block. */ 2611 handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), bb); 2612 } 2613 } 2614 /* FIXME: CLOBBER? */ 2615 } 2616 } 2617 } 2618} 2619 2620/* Compute the reaching definitions as in 2621 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman, 2622 Chapter 10. It is the same algorithm as used for computing available 2623 expressions but applied to the gens and kills of reaching definitions. */ 2624 2625static void 2626compute_rd () 2627{ 2628 int bb, changed, passes; 2629 2630 for (bb = 0; bb < n_basic_blocks; bb++) 2631 sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/); 2632 2633 passes = 0; 2634 changed = 1; 2635 while (changed) 2636 { 2637 changed = 0; 2638 for (bb = 0; bb < n_basic_blocks; bb++) 2639 { 2640 sbitmap_union_of_predecessors (reaching_defs[bb], rd_out, 2641 bb, s_preds); 2642 changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb], 2643 reaching_defs[bb], rd_kill[bb]); 2644 } 2645 passes++; 2646 } 2647 2648 if (gcse_file) 2649 fprintf (gcse_file, "reaching def computation: %d passes\n", passes); 2650} 2651 2652/* Classic GCSE available expression support. */ 2653 2654/* Allocate memory for available expression computation. */ 2655 2656static void 2657alloc_avail_expr_mem (n_blocks, n_exprs) 2658 int n_blocks, n_exprs; 2659{ 2660 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs); 2661 sbitmap_vector_zero (ae_kill, n_basic_blocks); 2662 2663 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs); 2664 sbitmap_vector_zero (ae_gen, n_basic_blocks); 2665 2666 ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs); 2667 sbitmap_vector_zero (ae_in, n_basic_blocks); 2668 2669 ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs); 2670 sbitmap_vector_zero (ae_out, n_basic_blocks); 2671 2672 u_bitmap = (sbitmap) sbitmap_alloc (n_exprs); 2673 sbitmap_ones (u_bitmap); 2674} 2675 2676static void 2677free_avail_expr_mem () 2678{ 2679 free (ae_kill); 2680 free (ae_gen); 2681 free (ae_in); 2682 free (ae_out); 2683 free (u_bitmap); 2684} 2685 2686/* Compute the set of available expressions generated in each basic block. */ 2687 2688static void 2689compute_ae_gen () 2690{ 2691 int i; 2692 2693 /* For each recorded occurrence of each expression, set ae_gen[bb][expr]. 2694 This is all we have to do because an expression is not recorded if it 2695 is not available, and the only expressions we want to work with are the 2696 ones that are recorded. */ 2697 2698 for (i = 0; i < expr_hash_table_size; i++) 2699 { 2700 struct expr *expr = expr_hash_table[i]; 2701 while (expr != NULL) 2702 { 2703 struct occr *occr = expr->avail_occr; 2704 while (occr != NULL) 2705 { 2706 SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index); 2707 occr = occr->next; 2708 } 2709 expr = expr->next_same_hash; 2710 } 2711 } 2712} 2713 2714/* Return non-zero if expression X is killed in BB. */ 2715 2716static int 2717expr_killed_p (x, bb) 2718 rtx x; 2719 int bb; 2720{ 2721 int i; 2722 enum rtx_code code; 2723 char *fmt; 2724 2725 /* repeat is used to turn tail-recursion into iteration. */ 2726 repeat: 2727 2728 if (x == 0) 2729 return 1; 2730 2731 code = GET_CODE (x); 2732 switch (code) 2733 { 2734 case REG: 2735 return TEST_BIT (reg_set_in_block[bb], REGNO (x)); 2736 2737 case MEM: 2738 if (mem_set_in_block[bb]) 2739 return 1; 2740 x = XEXP (x, 0); 2741 goto repeat; 2742 2743 case PC: 2744 case CC0: /*FIXME*/ 2745 case CONST: 2746 case CONST_INT: 2747 case CONST_DOUBLE: 2748 case SYMBOL_REF: 2749 case LABEL_REF: 2750 case ADDR_VEC: 2751 case ADDR_DIFF_VEC: 2752 return 0; 2753 2754 default: 2755 break; 2756 } 2757 2758 i = GET_RTX_LENGTH (code) - 1; 2759 fmt = GET_RTX_FORMAT (code); 2760 for (; i >= 0; i--) 2761 { 2762 if (fmt[i] == 'e') 2763 { 2764 rtx tem = XEXP (x, i); 2765 2766 /* If we are about to do the last recursive call 2767 needed at this level, change it into iteration. 2768 This function is called enough to be worth it. */ 2769 if (i == 0) 2770 { 2771 x = tem; 2772 goto repeat; 2773 } 2774 if (expr_killed_p (tem, bb)) 2775 return 1; 2776 } 2777 else if (fmt[i] == 'E') 2778 { 2779 int j; 2780 for (j = 0; j < XVECLEN (x, i); j++) 2781 { 2782 if (expr_killed_p (XVECEXP (x, i, j), bb)) 2783 return 1; 2784 } 2785 } 2786 } 2787 2788 return 0; 2789} 2790 2791/* Compute the set of available expressions killed in each basic block. */ 2792 2793static void 2794compute_ae_kill () 2795{ 2796 int bb,i; 2797 2798 for (bb = 0; bb < n_basic_blocks; bb++) 2799 { 2800 for (i = 0; i < expr_hash_table_size; i++) 2801 { 2802 struct expr *expr = expr_hash_table[i]; 2803 2804 for ( ; expr != NULL; expr = expr->next_same_hash) 2805 { 2806 /* Skip EXPR if generated in this block. */ 2807 if (TEST_BIT (ae_gen[bb], expr->bitmap_index)) 2808 continue; 2809 2810 if (expr_killed_p (expr->expr, bb)) 2811 SET_BIT (ae_kill[bb], expr->bitmap_index); 2812 } 2813 } 2814 } 2815} 2816 2817/* Compute available expressions. 2818 2819 Implement the algorithm to find available expressions 2820 as given in the Aho Sethi Ullman book, pages 627-631. */ 2821 2822static void 2823compute_available () 2824{ 2825 int bb, changed, passes; 2826 2827 sbitmap_zero (ae_in[0]); 2828 2829 sbitmap_copy (ae_out[0] /*dst*/, ae_gen[0] /*src*/); 2830 2831 for (bb = 1; bb < n_basic_blocks; bb++) 2832 sbitmap_difference (ae_out[bb], u_bitmap, ae_kill[bb]); 2833 2834 passes = 0; 2835 changed = 1; 2836 while (changed) 2837 { 2838 changed = 0; 2839 for (bb = 1; bb < n_basic_blocks; bb++) 2840 { 2841 sbitmap_intersect_of_predecessors (ae_in[bb], ae_out, bb, s_preds); 2842 changed |= sbitmap_union_of_diff (ae_out[bb], ae_gen[bb], 2843 ae_in[bb], ae_kill[bb]); 2844 } 2845 passes++; 2846 } 2847 2848 if (gcse_file) 2849 fprintf (gcse_file, "avail expr computation: %d passes\n", passes); 2850} 2851 2852/* Actually perform the Classic GCSE optimizations. */ 2853 2854/* Return non-zero if occurrence OCCR of expression EXPR reaches block BB. 2855 2856 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself 2857 as a positive reach. We want to do this when there are two computations 2858 of the expression in the block. 2859 2860 VISITED is a pointer to a working buffer for tracking which BB's have 2861 been visited. It is NULL for the top-level call. 2862 2863 We treat reaching expressions that go through blocks containing the same 2864 reaching expression as "not reaching". E.g. if EXPR is generated in blocks 2865 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block 2866 2 as not reaching. The intent is to improve the probability of finding 2867 only one reaching expression and to reduce register lifetimes by picking 2868 the closest such expression. */ 2869 2870static int 2871expr_reaches_here_p (occr, expr, bb, check_self_loop, visited) 2872 struct occr *occr; 2873 struct expr *expr; 2874 int bb; 2875 int check_self_loop; 2876 char *visited; 2877{ 2878 int_list_ptr pred; 2879 2880 if (visited == NULL) 2881 { 2882 visited = (char *) alloca (n_basic_blocks); 2883 bzero (visited, n_basic_blocks); 2884 } 2885 2886 for (pred = s_preds[bb]; pred != NULL; pred = pred->next) 2887 { 2888 int pred_bb = INT_LIST_VAL (pred); 2889 2890 if (visited[pred_bb]) 2891 { 2892 /* This predecessor has already been visited. 2893 Nothing to do. */ 2894 ; 2895 } 2896 else if (pred_bb == bb) 2897 { 2898 /* BB loops on itself. */ 2899 if (check_self_loop 2900 && TEST_BIT (ae_gen[pred_bb], expr->bitmap_index) 2901 && BLOCK_NUM (occr->insn) == pred_bb) 2902 return 1; 2903 visited[pred_bb] = 1; 2904 } 2905 /* Ignore this predecessor if it kills the expression. */ 2906 else if (TEST_BIT (ae_kill[pred_bb], expr->bitmap_index)) 2907 visited[pred_bb] = 1; 2908 /* Does this predecessor generate this expression? */ 2909 else if (TEST_BIT (ae_gen[pred_bb], expr->bitmap_index)) 2910 { 2911 /* Is this the occurrence we're looking for? 2912 Note that there's only one generating occurrence per block 2913 so we just need to check the block number. */ 2914 if (BLOCK_NUM (occr->insn) == pred_bb) 2915 return 1; 2916 visited[pred_bb] = 1; 2917 } 2918 /* Neither gen nor kill. */ 2919 else 2920 { 2921 visited[pred_bb] = 1; 2922 if (expr_reaches_here_p (occr, expr, pred_bb, check_self_loop, visited)) 2923 return 1; 2924 } 2925 } 2926 2927 /* All paths have been checked. */ 2928 return 0; 2929} 2930 2931/* Return the instruction that computes EXPR that reaches INSN's basic block. 2932 If there is more than one such instruction, return NULL. 2933 2934 Called only by handle_avail_expr. */ 2935 2936static rtx 2937computing_insn (expr, insn) 2938 struct expr *expr; 2939 rtx insn; 2940{ 2941 int bb = BLOCK_NUM (insn); 2942 2943 if (expr->avail_occr->next == NULL) 2944 { 2945 if (BLOCK_NUM (expr->avail_occr->insn) == bb) 2946 { 2947 /* The available expression is actually itself 2948 (i.e. a loop in the flow graph) so do nothing. */ 2949 return NULL; 2950 } 2951 /* (FIXME) Case that we found a pattern that was created by 2952 a substitution that took place. */ 2953 return expr->avail_occr->insn; 2954 } 2955 else 2956 { 2957 /* Pattern is computed more than once. 2958 Search backwards from this insn to see how many of these 2959 computations actually reach this insn. */ 2960 struct occr *occr; 2961 rtx insn_computes_expr = NULL; 2962 int can_reach = 0; 2963 2964 for (occr = expr->avail_occr; occr != NULL; occr = occr->next) 2965 { 2966 if (BLOCK_NUM (occr->insn) == bb) 2967 { 2968 /* The expression is generated in this block. 2969 The only time we care about this is when the expression 2970 is generated later in the block [and thus there's a loop]. 2971 We let the normal cse pass handle the other cases. */ 2972 if (INSN_CUID (insn) < INSN_CUID (occr->insn)) 2973 { 2974 if (expr_reaches_here_p (occr, expr, bb, 1, NULL)) 2975 { 2976 can_reach++; 2977 if (can_reach > 1) 2978 return NULL; 2979 insn_computes_expr = occr->insn; 2980 } 2981 } 2982 } 2983 else /* Computation of the pattern outside this block. */ 2984 { 2985 if (expr_reaches_here_p (occr, expr, bb, 0, NULL)) 2986 { 2987 can_reach++; 2988 if (can_reach > 1) 2989 return NULL; 2990 insn_computes_expr = occr->insn; 2991 } 2992 } 2993 } 2994 2995 if (insn_computes_expr == NULL) 2996 abort (); 2997 return insn_computes_expr; 2998 } 2999} 3000 3001/* Return non-zero if the definition in DEF_INSN can reach INSN. 3002 Only called by can_disregard_other_sets. */ 3003 3004static int 3005def_reaches_here_p (insn, def_insn) 3006 rtx insn, def_insn; 3007{ 3008 rtx reg; 3009 3010 if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn))) 3011 return 1; 3012 3013 if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn)) 3014 { 3015 if (INSN_CUID (def_insn) < INSN_CUID (insn)) 3016 { 3017 if (GET_CODE (PATTERN (def_insn)) == PARALLEL) 3018 return 1; 3019 if (GET_CODE (PATTERN (def_insn)) == CLOBBER) 3020 reg = XEXP (PATTERN (def_insn), 0); 3021 else if (GET_CODE (PATTERN (def_insn)) == SET) 3022 reg = SET_DEST (PATTERN (def_insn)); 3023 else 3024 abort (); 3025 return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn); 3026 } 3027 else 3028 return 0; 3029 } 3030 3031 return 0; 3032} 3033 3034/* Return non-zero if *ADDR_THIS_REG can only have one value at INSN. 3035 The value returned is the number of definitions that reach INSN. 3036 Returning a value of zero means that [maybe] more than one definition 3037 reaches INSN and the caller can't perform whatever optimization it is 3038 trying. i.e. it is always safe to return zero. */ 3039 3040static int 3041can_disregard_other_sets (addr_this_reg, insn, for_combine) 3042 struct reg_set **addr_this_reg; 3043 rtx insn; 3044 int for_combine; 3045{ 3046 int number_of_reaching_defs = 0; 3047 struct reg_set *this_reg = *addr_this_reg; 3048 3049 while (this_reg) 3050 { 3051 if (def_reaches_here_p (insn, this_reg->insn)) 3052 { 3053 number_of_reaching_defs++; 3054 /* Ignore parallels for now. */ 3055 if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL) 3056 return 0; 3057 if (!for_combine 3058 && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER 3059 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)), 3060 SET_SRC (PATTERN (insn))))) 3061 { 3062 /* A setting of the reg to a different value reaches INSN. */ 3063 return 0; 3064 } 3065 if (number_of_reaching_defs > 1) 3066 { 3067 /* If in this setting the value the register is being 3068 set to is equal to the previous value the register 3069 was set to and this setting reaches the insn we are 3070 trying to do the substitution on then we are ok. */ 3071 3072 if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER) 3073 return 0; 3074 if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)), 3075 SET_SRC (PATTERN (insn)))) 3076 return 0; 3077 } 3078 *addr_this_reg = this_reg; 3079 } 3080 3081 /* prev_this_reg = this_reg; */ 3082 this_reg = this_reg->next; 3083 } 3084 3085 return number_of_reaching_defs; 3086} 3087 3088/* Expression computed by insn is available and the substitution is legal, 3089 so try to perform the substitution. 3090 3091 The result is non-zero if any changes were made. */ 3092 3093static int 3094handle_avail_expr (insn, expr) 3095 rtx insn; 3096 struct expr *expr; 3097{ 3098 rtx pat, insn_computes_expr; 3099 rtx to; 3100 struct reg_set *this_reg; 3101 int found_setting, use_src; 3102 int changed = 0; 3103 3104 /* We only handle the case where one computation of the expression 3105 reaches this instruction. */ 3106 insn_computes_expr = computing_insn (expr, insn); 3107 if (insn_computes_expr == NULL) 3108 return 0; 3109 3110 found_setting = 0; 3111 use_src = 0; 3112 3113 /* At this point we know only one computation of EXPR outside of this 3114 block reaches this insn. Now try to find a register that the 3115 expression is computed into. */ 3116 3117 if (GET_CODE (SET_SRC (PATTERN (insn_computes_expr))) == REG) 3118 { 3119 /* This is the case when the available expression that reaches 3120 here has already been handled as an available expression. */ 3121 int regnum_for_replacing = REGNO (SET_SRC (PATTERN (insn_computes_expr))); 3122 /* If the register was created by GCSE we can't use `reg_set_table', 3123 however we know it's set only once. */ 3124 if (regnum_for_replacing >= max_gcse_regno 3125 /* If the register the expression is computed into is set only once, 3126 or only one set reaches this insn, we can use it. */ 3127 || (((this_reg = reg_set_table[regnum_for_replacing]), 3128 this_reg->next == NULL) 3129 || can_disregard_other_sets (&this_reg, insn, 0))) 3130 { 3131 use_src = 1; 3132 found_setting = 1; 3133 } 3134 } 3135 3136 if (!found_setting) 3137 { 3138 int regnum_for_replacing = REGNO (SET_DEST (PATTERN (insn_computes_expr))); 3139 /* This shouldn't happen. */ 3140 if (regnum_for_replacing >= max_gcse_regno) 3141 abort (); 3142 this_reg = reg_set_table[regnum_for_replacing]; 3143 /* If the register the expression is computed into is set only once, 3144 or only one set reaches this insn, use it. */ 3145 if (this_reg->next == NULL 3146 || can_disregard_other_sets (&this_reg, insn, 0)) 3147 found_setting = 1; 3148 } 3149 3150 if (found_setting) 3151 { 3152 pat = PATTERN (insn); 3153 if (use_src) 3154 to = SET_SRC (PATTERN (insn_computes_expr)); 3155 else 3156 to = SET_DEST (PATTERN (insn_computes_expr)); 3157 changed = validate_change (insn, &SET_SRC (pat), to, 0); 3158 3159 /* We should be able to ignore the return code from validate_change but 3160 to play it safe we check. */ 3161 if (changed) 3162 { 3163 gcse_subst_count++; 3164 if (gcse_file != NULL) 3165 { 3166 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d %s insn %d\n", 3167 INSN_UID (insn), REGNO (to), 3168 use_src ? "from" : "set in", 3169 INSN_UID (insn_computes_expr)); 3170 } 3171 3172 } 3173 } 3174 /* The register that the expr is computed into is set more than once. */ 3175 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/) 3176 { 3177 /* Insert an insn after insnx that copies the reg set in insnx 3178 into a new pseudo register call this new register REGN. 3179 From insnb until end of basic block or until REGB is set 3180 replace all uses of REGB with REGN. */ 3181 rtx new_insn; 3182 3183 to = gen_reg_rtx (GET_MODE (SET_DEST (PATTERN (insn_computes_expr)))); 3184 3185 /* Generate the new insn. */ 3186 /* ??? If the change fails, we return 0, even though we created 3187 an insn. I think this is ok. */ 3188 new_insn 3189 = emit_insn_after (gen_rtx_SET (VOIDmode, to, 3190 SET_DEST (PATTERN (insn_computes_expr))), 3191 insn_computes_expr); 3192 /* Keep block number table up to date. */ 3193 set_block_num (new_insn, BLOCK_NUM (insn_computes_expr)); 3194 /* Keep register set table up to date. */ 3195 record_one_set (REGNO (to), new_insn); 3196 3197 gcse_create_count++; 3198 if (gcse_file != NULL) 3199 { 3200 fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d, computed in insn %d,\n", 3201 INSN_UID (NEXT_INSN (insn_computes_expr)), 3202 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr)))), 3203 INSN_UID (insn_computes_expr)); 3204 fprintf (gcse_file, " into newly allocated reg %d\n", REGNO (to)); 3205 } 3206 3207 pat = PATTERN (insn); 3208 3209 /* Do register replacement for INSN. */ 3210 changed = validate_change (insn, &SET_SRC (pat), 3211 SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr))), 3212 0); 3213 3214 /* We should be able to ignore the return code from validate_change but 3215 to play it safe we check. */ 3216 if (changed) 3217 { 3218 gcse_subst_count++; 3219 if (gcse_file != NULL) 3220 { 3221 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with reg %d set in insn %d\n", 3222 INSN_UID (insn), 3223 REGNO (SET_DEST (PATTERN (NEXT_INSN (insn_computes_expr)))), 3224 INSN_UID (insn_computes_expr)); 3225 } 3226 3227 } 3228 } 3229 3230 return changed; 3231} 3232 3233/* Perform classic GCSE. 3234 This is called by one_classic_gcse_pass after all the dataflow analysis 3235 has been done. 3236 3237 The result is non-zero if a change was made. */ 3238 3239static int 3240classic_gcse () 3241{ 3242 int bb, changed; 3243 rtx insn; 3244 3245 /* Note we start at block 1. */ 3246 3247 changed = 0; 3248 for (bb = 1; bb < n_basic_blocks; bb++) 3249 { 3250 /* Reset tables used to keep track of what's still valid [since the 3251 start of the block]. */ 3252 reset_opr_set_tables (); 3253 3254 for (insn = BLOCK_HEAD (bb); 3255 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb)); 3256 insn = NEXT_INSN (insn)) 3257 { 3258 /* Is insn of form (set (pseudo-reg) ...)? */ 3259 3260 if (GET_CODE (insn) == INSN 3261 && GET_CODE (PATTERN (insn)) == SET 3262 && GET_CODE (SET_DEST (PATTERN (insn))) == REG 3263 && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER) 3264 { 3265 rtx pat = PATTERN (insn); 3266 rtx src = SET_SRC (pat); 3267 struct expr *expr; 3268 3269 if (want_to_gcse_p (src) 3270 /* Is the expression recorded? */ 3271 && ((expr = lookup_expr (src)) != NULL) 3272 /* Is the expression available [at the start of the 3273 block]? */ 3274 && TEST_BIT (ae_in[bb], expr->bitmap_index) 3275 /* Are the operands unchanged since the start of the 3276 block? */ 3277 && oprs_not_set_p (src, insn)) 3278 changed |= handle_avail_expr (insn, expr); 3279 } 3280 3281 /* Keep track of everything modified by this insn. */ 3282 /* ??? Need to be careful w.r.t. mods done to INSN. */ 3283 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') 3284 mark_oprs_set (insn); 3285 } 3286 } 3287 3288 return changed; 3289} 3290 3291/* Top level routine to perform one classic GCSE pass. 3292 3293 Return non-zero if a change was made. */ 3294 3295static int 3296one_classic_gcse_pass (pass) 3297 int pass; 3298{ 3299 int changed = 0; 3300 3301 gcse_subst_count = 0; 3302 gcse_create_count = 0; 3303 3304 alloc_expr_hash_table (max_cuid); 3305 alloc_rd_mem (n_basic_blocks, max_cuid); 3306 compute_expr_hash_table (); 3307 if (gcse_file) 3308 dump_hash_table (gcse_file, "Expression", expr_hash_table, 3309 expr_hash_table_size, n_exprs); 3310 if (n_exprs > 0) 3311 { 3312 compute_kill_rd (); 3313 compute_rd (); 3314 alloc_avail_expr_mem (n_basic_blocks, n_exprs); 3315 compute_ae_gen (); 3316 compute_ae_kill (); 3317 compute_available (); 3318 changed = classic_gcse (); 3319 free_avail_expr_mem (); 3320 } 3321 free_rd_mem (); 3322 free_expr_hash_table (); 3323 3324 if (gcse_file) 3325 { 3326 fprintf (gcse_file, "\n"); 3327 fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n", 3328 current_function_name, pass, 3329 bytes_used, gcse_subst_count, gcse_create_count); 3330 } 3331 3332 return changed; 3333} 3334 3335/* Compute copy/constant propagation working variables. */ 3336 3337/* Local properties of assignments. */ 3338 3339static sbitmap *cprop_pavloc; 3340static sbitmap *cprop_absaltered; 3341 3342/* Global properties of assignments (computed from the local properties). */ 3343 3344static sbitmap *cprop_avin; 3345static sbitmap *cprop_avout; 3346 3347/* Allocate vars used for copy/const propagation. 3348 N_BLOCKS is the number of basic blocks. 3349 N_SETS is the number of sets. */ 3350 3351static void 3352alloc_cprop_mem (n_blocks, n_sets) 3353 int n_blocks, n_sets; 3354{ 3355 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets); 3356 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets); 3357 3358 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets); 3359 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets); 3360} 3361 3362/* Free vars used by copy/const propagation. */ 3363 3364static void 3365free_cprop_mem () 3366{ 3367 free (cprop_pavloc); 3368 free (cprop_absaltered); 3369 free (cprop_avin); 3370 free (cprop_avout); 3371} 3372 3373/* For each block, compute whether X is transparent. 3374 X is either an expression or an assignment [though we don't care which, 3375 for this context an assignment is treated as an expression]. 3376 For each block where an element of X is modified, set (SET_P == 1) or reset 3377 (SET_P == 0) the INDX bit in BMAP. */ 3378 3379static void 3380compute_transp (x, indx, bmap, set_p) 3381 rtx x; 3382 int indx; 3383 sbitmap *bmap; 3384 int set_p; 3385{ 3386 int bb,i; 3387 enum rtx_code code; 3388 char *fmt; 3389 3390 /* repeat is used to turn tail-recursion into iteration. */ 3391 repeat: 3392 3393 if (x == 0) 3394 return; 3395 3396 code = GET_CODE (x); 3397 switch (code) 3398 { 3399 case REG: 3400 { 3401 reg_set *r; 3402 int regno = REGNO (x); 3403 3404 if (set_p) 3405 { 3406 if (regno < FIRST_PSEUDO_REGISTER) 3407 { 3408 for (bb = 0; bb < n_basic_blocks; bb++) 3409 if (TEST_BIT (reg_set_in_block[bb], regno)) 3410 SET_BIT (bmap[bb], indx); 3411 } 3412 else 3413 { 3414 for (r = reg_set_table[regno]; r != NULL; r = r->next) 3415 { 3416 bb = BLOCK_NUM (r->insn); 3417 SET_BIT (bmap[bb], indx); 3418 } 3419 } 3420 } 3421 else 3422 { 3423 if (regno < FIRST_PSEUDO_REGISTER) 3424 { 3425 for (bb = 0; bb < n_basic_blocks; bb++) 3426 if (TEST_BIT (reg_set_in_block[bb], regno)) 3427 RESET_BIT (bmap[bb], indx); 3428 } 3429 else 3430 { 3431 for (r = reg_set_table[regno]; r != NULL; r = r->next) 3432 { 3433 bb = BLOCK_NUM (r->insn); 3434 RESET_BIT (bmap[bb], indx); 3435 } 3436 } 3437 } 3438 return; 3439 } 3440 3441 case MEM: 3442 if (set_p) 3443 { 3444 for (bb = 0; bb < n_basic_blocks; bb++) 3445 if (mem_set_in_block[bb]) 3446 SET_BIT (bmap[bb], indx); 3447 } 3448 else 3449 { 3450 for (bb = 0; bb < n_basic_blocks; bb++) 3451 if (mem_set_in_block[bb]) 3452 RESET_BIT (bmap[bb], indx); 3453 } 3454 x = XEXP (x, 0); 3455 goto repeat; 3456 3457 case PC: 3458 case CC0: /*FIXME*/ 3459 case CONST: 3460 case CONST_INT: 3461 case CONST_DOUBLE: 3462 case SYMBOL_REF: 3463 case LABEL_REF: 3464 case ADDR_VEC: 3465 case ADDR_DIFF_VEC: 3466 return; 3467 3468 default: 3469 break; 3470 } 3471 3472 i = GET_RTX_LENGTH (code) - 1; 3473 fmt = GET_RTX_FORMAT (code); 3474 for (; i >= 0; i--) 3475 { 3476 if (fmt[i] == 'e') 3477 { 3478 rtx tem = XEXP (x, i); 3479 3480 /* If we are about to do the last recursive call 3481 needed at this level, change it into iteration. 3482 This function is called enough to be worth it. */ 3483 if (i == 0) 3484 { 3485 x = tem; 3486 goto repeat; 3487 } 3488 compute_transp (tem, indx, bmap, set_p); 3489 } 3490 else if (fmt[i] == 'E') 3491 { 3492 int j; 3493 for (j = 0; j < XVECLEN (x, i); j++) 3494 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p); 3495 } 3496 } 3497} 3498 3499/* Compute the available expressions at the start and end of each 3500 basic block for cprop. This particular dataflow equation is 3501 used often enough that we might want to generalize it and make 3502 as a subroutine for other global optimizations that need available 3503 in/out information. */ 3504static void 3505compute_cprop_avinout () 3506{ 3507 int bb, changed, passes; 3508 3509 sbitmap_zero (cprop_avin[0]); 3510 sbitmap_vector_ones (cprop_avout, n_basic_blocks); 3511 3512 passes = 0; 3513 changed = 1; 3514 while (changed) 3515 { 3516 changed = 0; 3517 for (bb = 0; bb < n_basic_blocks; bb++) 3518 { 3519 if (bb != 0) 3520 sbitmap_intersect_of_predecessors (cprop_avin[bb], 3521 cprop_avout, bb, s_preds); 3522 changed |= sbitmap_union_of_diff (cprop_avout[bb], 3523 cprop_pavloc[bb], 3524 cprop_avin[bb], 3525 cprop_absaltered[bb]); 3526 } 3527 passes++; 3528 } 3529 3530 if (gcse_file) 3531 fprintf (gcse_file, "cprop avail expr computation: %d passes\n", passes); 3532} 3533 3534/* Top level routine to do the dataflow analysis needed by copy/const 3535 propagation. */ 3536 3537static void 3538compute_cprop_data () 3539{ 3540 compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1); 3541 compute_cprop_avinout (); 3542} 3543 3544/* Copy/constant propagation. */ 3545 3546struct reg_use { 3547 rtx reg_rtx; 3548}; 3549 3550/* Maximum number of register uses in an insn that we handle. */ 3551#define MAX_USES 8 3552 3553/* Table of uses found in an insn. 3554 Allocated statically to avoid alloc/free complexity and overhead. */ 3555static struct reg_use reg_use_table[MAX_USES]; 3556 3557/* Index into `reg_use_table' while building it. */ 3558static int reg_use_count; 3559 3560/* Set up a list of register numbers used in INSN. 3561 The found uses are stored in `reg_use_table'. 3562 `reg_use_count' is initialized to zero before entry, and 3563 contains the number of uses in the table upon exit. 3564 3565 ??? If a register appears multiple times we will record it multiple 3566 times. This doesn't hurt anything but it will slow things down. */ 3567 3568static void 3569find_used_regs (x) 3570 rtx x; 3571{ 3572 int i; 3573 enum rtx_code code; 3574 char *fmt; 3575 3576 /* repeat is used to turn tail-recursion into iteration. */ 3577 repeat: 3578 3579 if (x == 0) 3580 return; 3581 3582 code = GET_CODE (x); 3583 switch (code) 3584 { 3585 case REG: 3586 if (reg_use_count == MAX_USES) 3587 return; 3588 reg_use_table[reg_use_count].reg_rtx = x; 3589 reg_use_count++; 3590 return; 3591 3592 case MEM: 3593 x = XEXP (x, 0); 3594 goto repeat; 3595 3596 case PC: 3597 case CC0: 3598 case CONST: 3599 case CONST_INT: 3600 case CONST_DOUBLE: 3601 case SYMBOL_REF: 3602 case LABEL_REF: 3603 case CLOBBER: 3604 case ADDR_VEC: 3605 case ADDR_DIFF_VEC: 3606 case ASM_INPUT: /*FIXME*/ 3607 return; 3608 3609 case SET: 3610 if (GET_CODE (SET_DEST (x)) == MEM) 3611 find_used_regs (SET_DEST (x)); 3612 x = SET_SRC (x); 3613 goto repeat; 3614 3615 default: 3616 break; 3617 } 3618 3619 /* Recursively scan the operands of this expression. */ 3620 3621 fmt = GET_RTX_FORMAT (code); 3622 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 3623 { 3624 if (fmt[i] == 'e') 3625 { 3626 /* If we are about to do the last recursive call 3627 needed at this level, change it into iteration. 3628 This function is called enough to be worth it. */ 3629 if (i == 0) 3630 { 3631 x = XEXP (x, 0); 3632 goto repeat; 3633 } 3634 find_used_regs (XEXP (x, i)); 3635 } 3636 else if (fmt[i] == 'E') 3637 { 3638 int j; 3639 for (j = 0; j < XVECLEN (x, i); j++) 3640 find_used_regs (XVECEXP (x, i, j)); 3641 } 3642 } 3643} 3644 3645/* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO. 3646 Returns non-zero is successful. */ 3647 3648static int 3649try_replace_reg (from, to, insn) 3650 rtx from, to, insn; 3651{ 3652 /* If this fails we could try to simplify the result of the 3653 replacement and attempt to recognize the simplified insn. 3654 3655 But we need a general simplify_rtx that doesn't have pass 3656 specific state variables. I'm not aware of one at the moment. */ 3657 return validate_replace_src (from, to, insn); 3658} 3659 3660/* Find a set of REGNO that is available on entry to INSN's block. 3661 Returns NULL if not found. */ 3662 3663static struct expr * 3664find_avail_set (regno, insn) 3665 int regno; 3666 rtx insn; 3667{ 3668 struct expr *set = lookup_set (regno, NULL_RTX); 3669 3670 while (set) 3671 { 3672 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index)) 3673 break; 3674 set = next_set (regno, set); 3675 } 3676 3677 return set; 3678} 3679 3680/* Perform constant and copy propagation on INSN. 3681 The result is non-zero if a change was made. */ 3682 3683static int 3684cprop_insn (insn, alter_jumps) 3685 rtx insn; 3686 int alter_jumps; 3687{ 3688 struct reg_use *reg_used; 3689 int changed = 0; 3690 3691 /* Only propagate into SETs. Note that a conditional jump is a 3692 SET with pc_rtx as the destination. */ 3693 if ((GET_CODE (insn) != INSN 3694 && GET_CODE (insn) != JUMP_INSN) 3695 || GET_CODE (PATTERN (insn)) != SET) 3696 return 0; 3697 3698 reg_use_count = 0; 3699 find_used_regs (PATTERN (insn)); 3700 3701 reg_used = ®_use_table[0]; 3702 for ( ; reg_use_count > 0; reg_used++, reg_use_count--) 3703 { 3704 rtx pat, src; 3705 struct expr *set; 3706 int regno = REGNO (reg_used->reg_rtx); 3707 3708 /* Ignore registers created by GCSE. 3709 We do this because ... */ 3710 if (regno >= max_gcse_regno) 3711 continue; 3712 3713 /* If the register has already been set in this block, there's 3714 nothing we can do. */ 3715 if (! oprs_not_set_p (reg_used->reg_rtx, insn)) 3716 continue; 3717 3718 /* Find an assignment that sets reg_used and is available 3719 at the start of the block. */ 3720 set = find_avail_set (regno, insn); 3721 if (! set) 3722 continue; 3723 3724 pat = set->expr; 3725 /* ??? We might be able to handle PARALLELs. Later. */ 3726 if (GET_CODE (pat) != SET) 3727 abort (); 3728 src = SET_SRC (pat); 3729 3730 /* Constant propagation. */ 3731 if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE) 3732 { 3733 /* Handle normal insns first. */ 3734 if (GET_CODE (insn) == INSN 3735 && try_replace_reg (reg_used->reg_rtx, src, insn)) 3736 { 3737 changed = 1; 3738 const_prop_count++; 3739 if (gcse_file != NULL) 3740 { 3741 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ", 3742 regno, INSN_UID (insn)); 3743 print_rtl (gcse_file, src); 3744 fprintf (gcse_file, "\n"); 3745 } 3746 3747 /* The original insn setting reg_used may or may not now be 3748 deletable. We leave the deletion to flow. */ 3749 } 3750 3751 /* Try to propagate a CONST_INT into a conditional jump. 3752 We're pretty specific about what we will handle in this 3753 code, we can extend this as necessary over time. 3754 3755 Right now the insn in question must look like 3756 3757 (set (pc) (if_then_else ...)) 3758 3759 Note this does not currently handle machines which use cc0. */ 3760 else if (alter_jumps 3761 && GET_CODE (insn) == JUMP_INSN 3762 && condjump_p (insn) 3763 && ! simplejump_p (insn)) 3764 { 3765 /* We want a copy of the JUMP_INSN so we can modify it 3766 in-place as needed without effecting the original. */ 3767 rtx copy = copy_rtx (insn); 3768 rtx set = PATTERN (copy); 3769 rtx temp; 3770 3771 /* Replace the register with the appropriate constant. */ 3772 replace_rtx (SET_SRC (set), reg_used->reg_rtx, src); 3773 3774 temp = simplify_ternary_operation (GET_CODE (SET_SRC (set)), 3775 GET_MODE (SET_SRC (set)), 3776 GET_MODE (XEXP (SET_SRC (set), 0)), 3777 XEXP (SET_SRC (set), 0), 3778 XEXP (SET_SRC (set), 1), 3779 XEXP (SET_SRC (set), 2)); 3780 3781 /* If no simplification can be made, then try the next 3782 register. */ 3783 if (temp) 3784 SET_SRC (set) = temp; 3785 else 3786 continue; 3787 3788 /* That may have changed the structure of TEMP, so 3789 force it to be rerecognized if it has not turned 3790 into a nop or unconditional jump. */ 3791 3792 INSN_CODE (copy) = -1; 3793 if ((SET_DEST (set) == pc_rtx 3794 && (SET_SRC (set) == pc_rtx 3795 || GET_CODE (SET_SRC (set)) == LABEL_REF)) 3796 || recog (PATTERN (copy), copy, NULL) >= 0) 3797 { 3798 /* This has either become an unconditional jump 3799 or a nop-jump. We'd like to delete nop jumps 3800 here, but doing so confuses gcse. So we just 3801 make the replacement and let later passes 3802 sort things out. */ 3803 PATTERN (insn) = set; 3804 INSN_CODE (insn) = -1; 3805 3806 /* One less use of the label this insn used to jump to 3807 if we turned this into a NOP jump. */ 3808 if (SET_SRC (set) == pc_rtx && JUMP_LABEL (insn) != 0) 3809 --LABEL_NUSES (JUMP_LABEL (insn)); 3810 3811 /* If this has turned into an unconditional jump, 3812 then put a barrier after it so that the unreachable 3813 code will be deleted. */ 3814 if (GET_CODE (SET_SRC (set)) == LABEL_REF) 3815 emit_barrier_after (insn); 3816 3817 run_jump_opt_after_gcse = 1; 3818 3819 changed = 1; 3820 const_prop_count++; 3821 if (gcse_file != NULL) 3822 { 3823 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in insn %d with constant ", 3824 regno, INSN_UID (insn)); 3825 print_rtl (gcse_file, src); 3826 fprintf (gcse_file, "\n"); 3827 } 3828 } 3829 } 3830 } 3831 else if (GET_CODE (src) == REG 3832 && REGNO (src) >= FIRST_PSEUDO_REGISTER 3833 && REGNO (src) != regno) 3834 { 3835 /* We know the set is available. 3836 Now check that SET_SRC is ANTLOC (i.e. none of the source operands 3837 have changed since the start of the block). */ 3838 if (oprs_not_set_p (src, insn)) 3839 { 3840 if (try_replace_reg (reg_used->reg_rtx, src, insn)) 3841 { 3842 changed = 1; 3843 copy_prop_count++; 3844 if (gcse_file != NULL) 3845 { 3846 fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d with reg %d\n", 3847 regno, INSN_UID (insn), REGNO (src)); 3848 } 3849 3850 /* The original insn setting reg_used may or may not now be 3851 deletable. We leave the deletion to flow. */ 3852 /* FIXME: If it turns out that the insn isn't deletable, 3853 then we may have unnecessarily extended register lifetimes 3854 and made things worse. */ 3855 } 3856 } 3857 } 3858 } 3859 3860 return changed; 3861} 3862 3863/* Forward propagate copies. 3864 This includes copies and constants. 3865 Return non-zero if a change was made. */ 3866 3867static int 3868cprop (alter_jumps) 3869 int alter_jumps; 3870{ 3871 int bb, changed; 3872 rtx insn; 3873 3874 /* Note we start at block 1. */ 3875 3876 changed = 0; 3877 for (bb = 1; bb < n_basic_blocks; bb++) 3878 { 3879 /* Reset tables used to keep track of what's still valid [since the 3880 start of the block]. */ 3881 reset_opr_set_tables (); 3882 3883 for (insn = BLOCK_HEAD (bb); 3884 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb)); 3885 insn = NEXT_INSN (insn)) 3886 { 3887 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') 3888 { 3889 changed |= cprop_insn (insn, alter_jumps); 3890 3891 /* Keep track of everything modified by this insn. */ 3892 /* ??? Need to be careful w.r.t. mods done to INSN. */ 3893 mark_oprs_set (insn); 3894 } 3895 } 3896 } 3897 3898 if (gcse_file != NULL) 3899 fprintf (gcse_file, "\n"); 3900 3901 return changed; 3902} 3903 3904/* Perform one copy/constant propagation pass. 3905 F is the first insn in the function. 3906 PASS is the pass count. */ 3907 3908static int 3909one_cprop_pass (pass, alter_jumps) 3910 int pass; 3911 int alter_jumps; 3912{ 3913 int changed = 0; 3914 3915 const_prop_count = 0; 3916 copy_prop_count = 0; 3917 3918 alloc_set_hash_table (max_cuid); 3919 compute_set_hash_table (); 3920 if (gcse_file) 3921 dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size, 3922 n_sets); 3923 if (n_sets > 0) 3924 { 3925 alloc_cprop_mem (n_basic_blocks, n_sets); 3926 compute_cprop_data (); 3927 changed = cprop (alter_jumps); 3928 free_cprop_mem (); 3929 } 3930 free_set_hash_table (); 3931 3932 if (gcse_file) 3933 { 3934 fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, %d const props, %d copy props\n", 3935 current_function_name, pass, 3936 bytes_used, const_prop_count, copy_prop_count); 3937 fprintf (gcse_file, "\n"); 3938 } 3939 3940 return changed; 3941} 3942 3943/* Compute PRE+LCM working variables. */ 3944 3945/* Local properties of expressions. */ 3946/* Nonzero for expressions that are transparent in the block. */ 3947static sbitmap *transp; 3948 3949/* Nonzero for expressions that are transparent at the end of the block. 3950 This is only zero for expressions killed by abnormal critical edge 3951 created by a calls. */ 3952static sbitmap *transpout; 3953 3954/* Nonzero for expressions that are computed (available) in the block. */ 3955static sbitmap *comp; 3956 3957/* Nonzero for expressions that are locally anticipatable in the block. */ 3958static sbitmap *antloc; 3959 3960/* Nonzero for expressions where this block is an optimal computation 3961 point. */ 3962static sbitmap *pre_optimal; 3963 3964/* Nonzero for expressions which are redundant in a particular block. */ 3965static sbitmap *pre_redundant; 3966 3967static sbitmap *temp_bitmap; 3968 3969/* Redundant insns. */ 3970static sbitmap pre_redundant_insns; 3971 3972/* Allocate vars used for PRE analysis. */ 3973 3974static void 3975alloc_pre_mem (n_blocks, n_exprs) 3976 int n_blocks, n_exprs; 3977{ 3978 transp = sbitmap_vector_alloc (n_blocks, n_exprs); 3979 comp = sbitmap_vector_alloc (n_blocks, n_exprs); 3980 antloc = sbitmap_vector_alloc (n_blocks, n_exprs); 3981 3982 temp_bitmap = sbitmap_vector_alloc (n_blocks, n_exprs); 3983 pre_optimal = sbitmap_vector_alloc (n_blocks, n_exprs); 3984 pre_redundant = sbitmap_vector_alloc (n_blocks, n_exprs); 3985 transpout = sbitmap_vector_alloc (n_blocks, n_exprs); 3986} 3987 3988/* Free vars used for PRE analysis. */ 3989 3990static void 3991free_pre_mem () 3992{ 3993 free (transp); 3994 free (comp); 3995 free (antloc); 3996 3997 free (pre_optimal); 3998 free (pre_redundant); 3999 free (transpout); 4000} 4001 4002/* Top level routine to do the dataflow analysis needed by PRE. */ 4003 4004static void 4005compute_pre_data () 4006{ 4007 compute_local_properties (transp, comp, antloc, 0); 4008 compute_transpout (); 4009 pre_lcm (n_basic_blocks, n_exprs, s_preds, s_succs, transp, 4010 antloc, pre_redundant, pre_optimal); 4011} 4012 4013 4014/* PRE utilities */ 4015 4016/* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach 4017 block BB. 4018 4019 VISITED is a pointer to a working buffer for tracking which BB's have 4020 been visited. It is NULL for the top-level call. 4021 4022 CHECK_PRE_COMP controls whether or not we check for a computation of 4023 EXPR in OCCR_BB. 4024 4025 We treat reaching expressions that go through blocks containing the same 4026 reaching expression as "not reaching". E.g. if EXPR is generated in blocks 4027 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block 4028 2 as not reaching. The intent is to improve the probability of finding 4029 only one reaching expression and to reduce register lifetimes by picking 4030 the closest such expression. */ 4031 4032static int 4033pre_expr_reaches_here_p (occr_bb, expr, bb, check_pre_comp, visited) 4034 int occr_bb; 4035 struct expr *expr; 4036 int bb; 4037 int check_pre_comp; 4038 char *visited; 4039{ 4040 int_list_ptr pred; 4041 4042 if (visited == NULL) 4043 { 4044 visited = (char *) alloca (n_basic_blocks); 4045 bzero (visited, n_basic_blocks); 4046 } 4047 4048 for (pred = s_preds[bb]; pred != NULL; pred = pred->next) 4049 { 4050 int pred_bb = INT_LIST_VAL (pred); 4051 4052 if (pred_bb == ENTRY_BLOCK 4053 /* Has predecessor has already been visited? */ 4054 || visited[pred_bb]) 4055 { 4056 /* Nothing to do. */ 4057 } 4058 /* Does this predecessor generate this expression? */ 4059 else if ((!check_pre_comp && occr_bb == pred_bb) 4060 || TEST_BIT (comp[pred_bb], expr->bitmap_index)) 4061 { 4062 /* Is this the occurrence we're looking for? 4063 Note that there's only one generating occurrence per block 4064 so we just need to check the block number. */ 4065 if (occr_bb == pred_bb) 4066 return 1; 4067 visited[pred_bb] = 1; 4068 } 4069 /* Ignore this predecessor if it kills the expression. */ 4070 else if (! TEST_BIT (transp[pred_bb], expr->bitmap_index)) 4071 visited[pred_bb] = 1; 4072 /* Neither gen nor kill. */ 4073 else 4074 { 4075 visited[pred_bb] = 1; 4076 if (pre_expr_reaches_here_p (occr_bb, expr, pred_bb, 4077 check_pre_comp, visited)) 4078 return 1; 4079 } 4080 } 4081 4082 /* All paths have been checked. */ 4083 return 0; 4084} 4085 4086/* Add EXPR to the end of basic block BB. 4087 4088 This is used by both the PRE and code hoisting. 4089 4090 For PRE, we want to verify that the expr is either transparent 4091 or locally anticipatable in the target block. This check makes 4092 no sense for code hoisting. */ 4093 4094static void 4095insert_insn_end_bb (expr, bb, pre) 4096 struct expr *expr; 4097 int bb; 4098 int pre; 4099{ 4100 rtx insn = BLOCK_END (bb); 4101 rtx new_insn; 4102 rtx reg = expr->reaching_reg; 4103 int regno = REGNO (reg); 4104 rtx pat, copied_expr; 4105 rtx first_new_insn; 4106 4107 start_sequence (); 4108 copied_expr = copy_rtx (expr->expr); 4109 emit_move_insn (reg, copied_expr); 4110 first_new_insn = get_insns (); 4111 pat = gen_sequence (); 4112 end_sequence (); 4113 4114 /* If the last insn is a jump, insert EXPR in front [taking care to 4115 handle cc0, etc. properly]. */ 4116 4117 if (GET_CODE (insn) == JUMP_INSN) 4118 { 4119#ifdef HAVE_cc0 4120 rtx note; 4121#endif 4122 4123 /* If this is a jump table, then we can't insert stuff here. Since 4124 we know the previous real insn must be the tablejump, we insert 4125 the new instruction just before the tablejump. */ 4126 if (GET_CODE (PATTERN (insn)) == ADDR_VEC 4127 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) 4128 insn = prev_real_insn (insn); 4129 4130#ifdef HAVE_cc0 4131 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts 4132 if cc0 isn't set. */ 4133 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX); 4134 if (note) 4135 insn = XEXP (note, 0); 4136 else 4137 { 4138 rtx maybe_cc0_setter = prev_nonnote_insn (insn); 4139 if (maybe_cc0_setter 4140 && GET_RTX_CLASS (GET_CODE (maybe_cc0_setter)) == 'i' 4141 && sets_cc0_p (PATTERN (maybe_cc0_setter))) 4142 insn = maybe_cc0_setter; 4143 } 4144#endif 4145 /* FIXME: What if something in cc0/jump uses value set in new insn? */ 4146 new_insn = emit_insn_before (pat, insn); 4147 if (BLOCK_HEAD (bb) == insn) 4148 BLOCK_HEAD (bb) = new_insn; 4149 } 4150 /* Likewise if the last insn is a call, as will happen in the presence 4151 of exception handling. */ 4152 else if (GET_CODE (insn) == CALL_INSN) 4153 { 4154 HARD_REG_SET parm_regs; 4155 int nparm_regs; 4156 rtx p; 4157 4158 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers, 4159 we search backward and place the instructions before the first 4160 parameter is loaded. Do this for everyone for consistency and a 4161 presumtion that we'll get better code elsewhere as well. */ 4162 4163 /* It should always be the case that we can put these instructions 4164 anywhere in the basic block with performing PRE optimizations. 4165 Check this. */ 4166 if (pre 4167 && !TEST_BIT (antloc[bb], expr->bitmap_index) 4168 && !TEST_BIT (transp[bb], expr->bitmap_index)) 4169 abort (); 4170 4171 /* Since different machines initialize their parameter registers 4172 in different orders, assume nothing. Collect the set of all 4173 parameter registers. */ 4174 CLEAR_HARD_REG_SET (parm_regs); 4175 nparm_regs = 0; 4176 for (p = CALL_INSN_FUNCTION_USAGE (insn); p ; p = XEXP (p, 1)) 4177 if (GET_CODE (XEXP (p, 0)) == USE 4178 && GET_CODE (XEXP (XEXP (p, 0), 0)) == REG) 4179 { 4180 int regno = REGNO (XEXP (XEXP (p, 0), 0)); 4181 if (regno >= FIRST_PSEUDO_REGISTER) 4182 abort (); 4183 SET_HARD_REG_BIT (parm_regs, regno); 4184 nparm_regs++; 4185 } 4186 4187 /* Search backward for the first set of a register in this set. */ 4188 while (nparm_regs && BLOCK_HEAD (bb) != insn) 4189 { 4190 insn = PREV_INSN (insn); 4191 p = single_set (insn); 4192 if (p && GET_CODE (SET_DEST (p)) == REG 4193 && REGNO (SET_DEST (p)) < FIRST_PSEUDO_REGISTER 4194 && TEST_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p)))) 4195 { 4196 CLEAR_HARD_REG_BIT (parm_regs, REGNO (SET_DEST (p))); 4197 nparm_regs--; 4198 } 4199 } 4200 4201 /* If we found all the parameter loads, then we want to insert 4202 before the first parameter load. 4203 4204 If we did not find all the parameter loads, then we might have 4205 stopped on the head of the block, which could be a CODE_LABEL. 4206 If we inserted before the CODE_LABEL, then we would be putting 4207 the insn in the wrong basic block. In that case, put the insn 4208 after the CODE_LABEL. 4209 4210 ?!? Do we need to account for NOTE_INSN_BASIC_BLOCK here? */ 4211 if (GET_CODE (insn) != CODE_LABEL) 4212 { 4213 new_insn = emit_insn_before (pat, insn); 4214 if (BLOCK_HEAD (bb) == insn) 4215 BLOCK_HEAD (bb) = new_insn; 4216 } 4217 else 4218 { 4219 new_insn = emit_insn_after (pat, insn); 4220 } 4221 } 4222 else 4223 { 4224 new_insn = emit_insn_after (pat, insn); 4225 BLOCK_END (bb) = new_insn; 4226 } 4227 4228 /* Keep block number table up to date. 4229 Note, PAT could be a multiple insn sequence, we have to make 4230 sure that each insn in the sequence is handled. */ 4231 if (GET_CODE (pat) == SEQUENCE) 4232 { 4233 int i; 4234 4235 for (i = 0; i < XVECLEN (pat, 0); i++) 4236 { 4237 rtx insn = XVECEXP (pat, 0, i); 4238 set_block_num (insn, bb); 4239 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') 4240 add_label_notes (PATTERN (insn), new_insn); 4241 record_set_insn = insn; 4242 note_stores (PATTERN (insn), record_set_info); 4243 } 4244 } 4245 else 4246 { 4247 add_label_notes (SET_SRC (pat), new_insn); 4248 set_block_num (new_insn, bb); 4249 /* Keep register set table up to date. */ 4250 record_one_set (regno, new_insn); 4251 } 4252 4253 gcse_create_count++; 4254 4255 if (gcse_file) 4256 { 4257 fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, copying expression %d to reg %d\n", 4258 bb, INSN_UID (new_insn), expr->bitmap_index, regno); 4259 } 4260} 4261 4262/* Insert partially redundant expressions at the ends of appropriate basic 4263 blocks making them fully redundant. */ 4264 4265static void 4266pre_insert (index_map) 4267 struct expr **index_map; 4268{ 4269 int bb, i, set_size; 4270 sbitmap *inserted; 4271 4272 /* Compute INSERT = PRE_OPTIMAL & ~PRE_REDUNDANT. 4273 Where INSERT is nonzero, we add the expression at the end of the basic 4274 block if it reaches any of the deleted expressions. */ 4275 4276 set_size = pre_optimal[0]->size; 4277 inserted = sbitmap_vector_alloc (n_basic_blocks, n_exprs); 4278 sbitmap_vector_zero (inserted, n_basic_blocks); 4279 4280 for (bb = 0; bb < n_basic_blocks; bb++) 4281 { 4282 int indx; 4283 4284 /* This computes the number of potential insertions we need. */ 4285 sbitmap_not (temp_bitmap[bb], pre_redundant[bb]); 4286 sbitmap_a_and_b (temp_bitmap[bb], temp_bitmap[bb], pre_optimal[bb]); 4287 4288 /* TEMP_BITMAP[bb] now contains a bitmap of the expressions that we need 4289 to insert at the end of this basic block. */ 4290 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS) 4291 { 4292 SBITMAP_ELT_TYPE insert = temp_bitmap[bb]->elms[i]; 4293 int j; 4294 4295 for (j = indx; insert && j < n_exprs; j++, insert >>= 1) 4296 { 4297 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX) 4298 { 4299 struct expr *expr = index_map[j]; 4300 struct occr *occr; 4301 4302 /* Now look at each deleted occurence of this expression. */ 4303 for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 4304 { 4305 if (! occr->deleted_p) 4306 continue; 4307 4308 /* Insert this expression at the end of BB if it would 4309 reach the deleted occurence. */ 4310 if (!TEST_BIT (inserted[bb], j) 4311 && pre_expr_reaches_here_p (bb, expr, 4312 BLOCK_NUM (occr->insn), 0, 4313 NULL)) 4314 { 4315 SET_BIT (inserted[bb], j); 4316 insert_insn_end_bb (index_map[j], bb, 1); 4317 } 4318 } 4319 } 4320 } 4321 } 4322 } 4323} 4324 4325/* Copy the result of INSN to REG. 4326 INDX is the expression number. */ 4327 4328static void 4329pre_insert_copy_insn (expr, insn) 4330 struct expr *expr; 4331 rtx insn; 4332{ 4333 rtx reg = expr->reaching_reg; 4334 int regno = REGNO (reg); 4335 int indx = expr->bitmap_index; 4336 rtx set = single_set (insn); 4337 rtx new_insn; 4338 4339 if (!set) 4340 abort (); 4341 new_insn = emit_insn_after (gen_rtx_SET (VOIDmode, reg, SET_DEST (set)), 4342 insn); 4343 /* Keep block number table up to date. */ 4344 set_block_num (new_insn, BLOCK_NUM (insn)); 4345 /* Keep register set table up to date. */ 4346 record_one_set (regno, new_insn); 4347 4348 gcse_create_count++; 4349 4350 if (gcse_file) 4351 { 4352 fprintf (gcse_file, "PRE: bb %d, insn %d, copying expression %d in insn %d to reg %d\n", 4353 BLOCK_NUM (insn), INSN_UID (new_insn), indx, INSN_UID (insn), regno); 4354 } 4355} 4356 4357/* Copy available expressions that reach the redundant expression 4358 to `reaching_reg'. */ 4359 4360static void 4361pre_insert_copies () 4362{ 4363 int i, bb; 4364 4365 for (bb = 0; bb < n_basic_blocks; bb++) 4366 { 4367 sbitmap_a_and_b (temp_bitmap[bb], pre_optimal[bb], pre_redundant[bb]); 4368 } 4369 4370 /* For each available expression in the table, copy the result to 4371 `reaching_reg' if the expression reaches a deleted one. 4372 4373 ??? The current algorithm is rather brute force. 4374 Need to do some profiling. */ 4375 4376 for (i = 0; i < expr_hash_table_size; i++) 4377 { 4378 struct expr *expr; 4379 4380 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash) 4381 { 4382 struct occr *occr; 4383 4384 /* If the basic block isn't reachable, PPOUT will be TRUE. 4385 However, we don't want to insert a copy here because the 4386 expression may not really be redundant. So only insert 4387 an insn if the expression was deleted. 4388 This test also avoids further processing if the expression 4389 wasn't deleted anywhere. */ 4390 if (expr->reaching_reg == NULL) 4391 continue; 4392 4393 for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 4394 { 4395 struct occr *avail; 4396 4397 if (! occr->deleted_p) 4398 continue; 4399 4400 for (avail = expr->avail_occr; avail != NULL; avail = avail->next) 4401 { 4402 rtx insn = avail->insn; 4403 int bb = BLOCK_NUM (insn); 4404 4405 if (!TEST_BIT (temp_bitmap[bb], expr->bitmap_index)) 4406 continue; 4407 4408 /* No need to handle this one if handled already. */ 4409 if (avail->copied_p) 4410 continue; 4411 /* Don't handle this one if it's a redundant one. */ 4412 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn))) 4413 continue; 4414 /* Or if the expression doesn't reach the deleted one. */ 4415 if (! pre_expr_reaches_here_p (BLOCK_NUM (avail->insn), expr, 4416 BLOCK_NUM (occr->insn), 4417 1, NULL)) 4418 continue; 4419 4420 /* Copy the result of avail to reaching_reg. */ 4421 pre_insert_copy_insn (expr, insn); 4422 avail->copied_p = 1; 4423 } 4424 } 4425 } 4426 } 4427} 4428 4429/* Delete redundant computations. 4430 Deletion is done by changing the insn to copy the `reaching_reg' of 4431 the expression into the result of the SET. It is left to later passes 4432 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it. 4433 4434 Returns non-zero if a change is made. */ 4435 4436static int 4437pre_delete () 4438{ 4439 int i, bb, changed; 4440 4441 /* Compute the expressions which are redundant and need to be replaced by 4442 copies from the reaching reg to the target reg. */ 4443 for (bb = 0; bb < n_basic_blocks; bb++) 4444 { 4445 sbitmap_not (temp_bitmap[bb], pre_optimal[bb]); 4446 sbitmap_a_and_b (temp_bitmap[bb], temp_bitmap[bb], pre_redundant[bb]); 4447 } 4448 4449 changed = 0; 4450 for (i = 0; i < expr_hash_table_size; i++) 4451 { 4452 struct expr *expr; 4453 4454 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash) 4455 { 4456 struct occr *occr; 4457 int indx = expr->bitmap_index; 4458 4459 /* We only need to search antic_occr since we require 4460 ANTLOC != 0. */ 4461 4462 for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 4463 { 4464 rtx insn = occr->insn; 4465 rtx set; 4466 int bb = BLOCK_NUM (insn); 4467 4468 if (TEST_BIT (temp_bitmap[bb], indx)) 4469 { 4470 set = single_set (insn); 4471 if (! set) 4472 abort (); 4473 4474 /* Create a pseudo-reg to store the result of reaching 4475 expressions into. Get the mode for the new pseudo 4476 from the mode of the original destination pseudo. */ 4477 if (expr->reaching_reg == NULL) 4478 expr->reaching_reg 4479 = gen_reg_rtx (GET_MODE (SET_DEST (set))); 4480 4481 /* In theory this should never fail since we're creating 4482 a reg->reg copy. 4483 4484 However, on the x86 some of the movXX patterns actually 4485 contain clobbers of scratch regs. This may cause the 4486 insn created by validate_change to not match any pattern 4487 and thus cause validate_change to fail. */ 4488 if (validate_change (insn, &SET_SRC (set), 4489 expr->reaching_reg, 0)) 4490 { 4491 occr->deleted_p = 1; 4492 SET_BIT (pre_redundant_insns, INSN_CUID (insn)); 4493 changed = 1; 4494 gcse_subst_count++; 4495 } 4496 4497 if (gcse_file) 4498 { 4499 fprintf (gcse_file, 4500 "PRE: redundant insn %d (expression %d) in bb %d, reaching reg is %d\n", 4501 INSN_UID (insn), indx, bb, REGNO (expr->reaching_reg)); 4502 } 4503 } 4504 } 4505 } 4506 } 4507 4508 return changed; 4509} 4510 4511/* Perform GCSE optimizations using PRE. 4512 This is called by one_pre_gcse_pass after all the dataflow analysis 4513 has been done. 4514 4515 This is based on the original Morel-Renvoise paper Fred Chow's thesis, 4516 and lazy code motion from Knoop, Ruthing and Steffen as described in 4517 Advanced Compiler Design and Implementation. 4518 4519 ??? A new pseudo reg is created to hold the reaching expression. 4520 The nice thing about the classical approach is that it would try to 4521 use an existing reg. If the register can't be adequately optimized 4522 [i.e. we introduce reload problems], one could add a pass here to 4523 propagate the new register through the block. 4524 4525 ??? We don't handle single sets in PARALLELs because we're [currently] 4526 not able to copy the rest of the parallel when we insert copies to create 4527 full redundancies from partial redundancies. However, there's no reason 4528 why we can't handle PARALLELs in the cases where there are no partial 4529 redundancies. */ 4530 4531static int 4532pre_gcse () 4533{ 4534 int i; 4535 int changed; 4536 struct expr **index_map; 4537 4538 /* Compute a mapping from expression number (`bitmap_index') to 4539 hash table entry. */ 4540 4541 index_map = (struct expr **) alloca (n_exprs * sizeof (struct expr *)); 4542 bzero ((char *) index_map, n_exprs * sizeof (struct expr *)); 4543 for (i = 0; i < expr_hash_table_size; i++) 4544 { 4545 struct expr *expr; 4546 4547 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash) 4548 index_map[expr->bitmap_index] = expr; 4549 } 4550 4551 /* Reset bitmap used to track which insns are redundant. */ 4552 pre_redundant_insns = sbitmap_alloc (max_cuid); 4553 sbitmap_zero (pre_redundant_insns); 4554 4555 /* Delete the redundant insns first so that 4556 - we know what register to use for the new insns and for the other 4557 ones with reaching expressions 4558 - we know which insns are redundant when we go to create copies */ 4559 changed = pre_delete (); 4560 4561 /* Insert insns in places that make partially redundant expressions 4562 fully redundant. */ 4563 pre_insert (index_map); 4564 4565 /* In other places with reaching expressions, copy the expression to the 4566 specially allocated pseudo-reg that reaches the redundant expression. */ 4567 pre_insert_copies (); 4568 4569 free (pre_redundant_insns); 4570 4571 return changed; 4572} 4573 4574/* Top level routine to perform one PRE GCSE pass. 4575 4576 Return non-zero if a change was made. */ 4577 4578static int 4579one_pre_gcse_pass (pass) 4580 int pass; 4581{ 4582 int changed = 0; 4583 4584 gcse_subst_count = 0; 4585 gcse_create_count = 0; 4586 4587 alloc_expr_hash_table (max_cuid); 4588 compute_expr_hash_table (); 4589 if (gcse_file) 4590 dump_hash_table (gcse_file, "Expression", expr_hash_table, 4591 expr_hash_table_size, n_exprs); 4592 if (n_exprs > 0) 4593 { 4594 alloc_pre_mem (n_basic_blocks, n_exprs); 4595 compute_pre_data (); 4596 changed |= pre_gcse (); 4597 free_pre_mem (); 4598 } 4599 free_expr_hash_table (); 4600 4601 if (gcse_file) 4602 { 4603 fprintf (gcse_file, "\n"); 4604 fprintf (gcse_file, "PRE GCSE of %s, pass %d: %d bytes needed, %d substs, %d insns created\n", 4605 current_function_name, pass, 4606 bytes_used, gcse_subst_count, gcse_create_count); 4607 } 4608 4609 return changed; 4610} 4611 4612/* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN. 4613 We have to add REG_LABEL notes, because the following loop optimization 4614 pass requires them. */ 4615 4616/* ??? This is very similar to the loop.c add_label_notes function. We 4617 could probably share code here. */ 4618 4619/* ??? If there was a jump optimization pass after gcse and before loop, 4620 then we would not need to do this here, because jump would add the 4621 necessary REG_LABEL notes. */ 4622 4623static void 4624add_label_notes (x, insn) 4625 rtx x; 4626 rtx insn; 4627{ 4628 enum rtx_code code = GET_CODE (x); 4629 int i, j; 4630 char *fmt; 4631 4632 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x)) 4633 { 4634 /* This code used to ignore labels that referred to dispatch tables to 4635 avoid flow generating (slighly) worse code. 4636 4637 We no longer ignore such label references (see LABEL_REF handling in 4638 mark_jump_label for additional information). */ 4639 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_LABEL, XEXP (x, 0), 4640 REG_NOTES (insn)); 4641 return; 4642 } 4643 4644 fmt = GET_RTX_FORMAT (code); 4645 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 4646 { 4647 if (fmt[i] == 'e') 4648 add_label_notes (XEXP (x, i), insn); 4649 else if (fmt[i] == 'E') 4650 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 4651 add_label_notes (XVECEXP (x, i, j), insn); 4652 } 4653} 4654 4655/* Compute transparent outgoing information for each block. 4656 4657 An expression is transparent to an edge unless it is killed by 4658 the edge itself. This can only happen with abnormal control flow, 4659 when the edge is traversed through a call. This happens with 4660 non-local labels and exceptions. 4661 4662 This would not be necessary if we split the edge. While this is 4663 normally impossible for abnormal critical edges, with some effort 4664 it should be possible with exception handling, since we still have 4665 control over which handler should be invoked. But due to increased 4666 EH table sizes, this may not be worthwhile. */ 4667 4668static void 4669compute_transpout () 4670{ 4671 int bb; 4672 4673 sbitmap_vector_ones (transpout, n_basic_blocks); 4674 4675 for (bb = 0; bb < n_basic_blocks; ++bb) 4676 { 4677 int i; 4678 4679 /* Note that flow inserted a nop a the end of basic blocks that 4680 end in call instructions for reasons other than abnormal 4681 control flow. */ 4682 if (GET_CODE (BLOCK_END (bb)) != CALL_INSN) 4683 continue; 4684 4685 for (i = 0; i < expr_hash_table_size; i++) 4686 { 4687 struct expr *expr; 4688 for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash) 4689 if (GET_CODE (expr->expr) == MEM) 4690 { 4691 rtx addr = XEXP (expr->expr, 0); 4692 4693 if (GET_CODE (addr) == SYMBOL_REF 4694 && CONSTANT_POOL_ADDRESS_P (addr)) 4695 continue; 4696 4697 /* ??? Optimally, we would use interprocedural alias 4698 analysis to determine if this mem is actually killed 4699 by this call. */ 4700 RESET_BIT (transpout[bb], expr->bitmap_index); 4701 } 4702 } 4703 } 4704} 4705