gcse.c revision 90075
1/* Global common subexpression elimination/Partial redundancy elimination 2 and global constant/copy propagation for GNU compiler. 3 Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002 4 Free Software Foundation, Inc. 5 6This file is part of GCC. 7 8GCC is free software; you can redistribute it and/or modify it under 9the terms of the GNU General Public License as published by the Free 10Software Foundation; either version 2, or (at your option) any later 11version. 12 13GCC is distributed in the hope that it will be useful, but WITHOUT ANY 14WARRANTY; without even the implied warranty of MERCHANTABILITY or 15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16for more details. 17 18You should have received a copy of the GNU General Public License 19along with GCC; see the file COPYING. If not, write to the Free 20Software Foundation, 59 Temple Place - Suite 330, Boston, MA 2102111-1307, USA. */ 22 23/* TODO 24 - reordering of memory allocation and freeing to be more space efficient 25 - do rough calc of how many regs are needed in each block, and a rough 26 calc of how many regs are available in each class and use that to 27 throttle back the code in cases where RTX_COST is minimal. 28 - a store to the same address as a load does not kill the load if the 29 source of the store is also the destination of the load. Handling this 30 allows more load motion, particularly out of loops. 31 - ability to realloc sbitmap vectors would allow one initial computation 32 of reg_set_in_block with only subsequent additions, rather than 33 recomputing it for each pass 34 35*/ 36 37/* References searched while implementing this. 38 39 Compilers Principles, Techniques and Tools 40 Aho, Sethi, Ullman 41 Addison-Wesley, 1988 42 43 Global Optimization by Suppression of Partial Redundancies 44 E. Morel, C. Renvoise 45 communications of the acm, Vol. 22, Num. 2, Feb. 1979 46 47 A Portable Machine-Independent Global Optimizer - Design and Measurements 48 Frederick Chow 49 Stanford Ph.D. thesis, Dec. 1983 50 51 A Fast Algorithm for Code Movement Optimization 52 D.M. Dhamdhere 53 SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988 54 55 A Solution to a Problem with Morel and Renvoise's 56 Global Optimization by Suppression of Partial Redundancies 57 K-H Drechsler, M.P. Stadel 58 ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988 59 60 Practical Adaptation of the Global Optimization 61 Algorithm of Morel and Renvoise 62 D.M. Dhamdhere 63 ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991 64 65 Efficiently Computing Static Single Assignment Form and the Control 66 Dependence Graph 67 R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck 68 ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991 69 70 Lazy Code Motion 71 J. Knoop, O. Ruthing, B. Steffen 72 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI 73 74 What's In a Region? Or Computing Control Dependence Regions in Near-Linear 75 Time for Reducible Flow Control 76 Thomas Ball 77 ACM Letters on Programming Languages and Systems, 78 Vol. 2, Num. 1-4, Mar-Dec 1993 79 80 An Efficient Representation for Sparse Sets 81 Preston Briggs, Linda Torczon 82 ACM Letters on Programming Languages and Systems, 83 Vol. 2, Num. 1-4, Mar-Dec 1993 84 85 A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion 86 K-H Drechsler, M.P. Stadel 87 ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993 88 89 Partial Dead Code Elimination 90 J. Knoop, O. Ruthing, B. Steffen 91 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994 92 93 Effective Partial Redundancy Elimination 94 P. Briggs, K.D. Cooper 95 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994 96 97 The Program Structure Tree: Computing Control Regions in Linear Time 98 R. Johnson, D. Pearson, K. Pingali 99 ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994 100 101 Optimal Code Motion: Theory and Practice 102 J. Knoop, O. Ruthing, B. Steffen 103 ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994 104 105 The power of assignment motion 106 J. Knoop, O. Ruthing, B. Steffen 107 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI 108 109 Global code motion / global value numbering 110 C. Click 111 ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI 112 113 Value Driven Redundancy Elimination 114 L.T. Simpson 115 Rice University Ph.D. thesis, Apr. 1996 116 117 Value Numbering 118 L.T. Simpson 119 Massively Scalar Compiler Project, Rice University, Sep. 1996 120 121 High Performance Compilers for Parallel Computing 122 Michael Wolfe 123 Addison-Wesley, 1996 124 125 Advanced Compiler Design and Implementation 126 Steven Muchnick 127 Morgan Kaufmann, 1997 128 129 Building an Optimizing Compiler 130 Robert Morgan 131 Digital Press, 1998 132 133 People wishing to speed up the code here should read: 134 Elimination Algorithms for Data Flow Analysis 135 B.G. Ryder, M.C. Paull 136 ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986 137 138 How to Analyze Large Programs Efficiently and Informatively 139 D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck 140 ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI 141 142 People wishing to do something different can find various possibilities 143 in the above papers and elsewhere. 144*/ 145 146#include "config.h" 147#include "system.h" 148#include "toplev.h" 149 150#include "rtl.h" 151#include "tm_p.h" 152#include "regs.h" 153#include "hard-reg-set.h" 154#include "flags.h" 155#include "real.h" 156#include "insn-config.h" 157#include "recog.h" 158#include "basic-block.h" 159#include "output.h" 160#include "function.h" 161#include "expr.h" 162#include "ggc.h" 163#include "params.h" 164 165#include "obstack.h" 166#define obstack_chunk_alloc gmalloc 167#define obstack_chunk_free free 168 169/* Propagate flow information through back edges and thus enable PRE's 170 moving loop invariant calculations out of loops. 171 172 Originally this tended to create worse overall code, but several 173 improvements during the development of PRE seem to have made following 174 back edges generally a win. 175 176 Note much of the loop invariant code motion done here would normally 177 be done by loop.c, which has more heuristics for when to move invariants 178 out of loops. At some point we might need to move some of those 179 heuristics into gcse.c. */ 180#define FOLLOW_BACK_EDGES 1 181 182/* We support GCSE via Partial Redundancy Elimination. PRE optimizations 183 are a superset of those done by GCSE. 184 185 We perform the following steps: 186 187 1) Compute basic block information. 188 189 2) Compute table of places where registers are set. 190 191 3) Perform copy/constant propagation. 192 193 4) Perform global cse. 194 195 5) Perform another pass of copy/constant propagation. 196 197 Two passes of copy/constant propagation are done because the first one 198 enables more GCSE and the second one helps to clean up the copies that 199 GCSE creates. This is needed more for PRE than for Classic because Classic 200 GCSE will try to use an existing register containing the common 201 subexpression rather than create a new one. This is harder to do for PRE 202 because of the code motion (which Classic GCSE doesn't do). 203 204 Expressions we are interested in GCSE-ing are of the form 205 (set (pseudo-reg) (expression)). 206 Function want_to_gcse_p says what these are. 207 208 PRE handles moving invariant expressions out of loops (by treating them as 209 partially redundant). 210 211 Eventually it would be nice to replace cse.c/gcse.c with SSA (static single 212 assignment) based GVN (global value numbering). L. T. Simpson's paper 213 (Rice University) on value numbering is a useful reference for this. 214 215 ********************** 216 217 We used to support multiple passes but there are diminishing returns in 218 doing so. The first pass usually makes 90% of the changes that are doable. 219 A second pass can make a few more changes made possible by the first pass. 220 Experiments show any further passes don't make enough changes to justify 221 the expense. 222 223 A study of spec92 using an unlimited number of passes: 224 [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83, 225 [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2, 226 [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1 227 228 It was found doing copy propagation between each pass enables further 229 substitutions. 230 231 PRE is quite expensive in complicated functions because the DFA can take 232 awhile to converge. Hence we only perform one pass. The parameter max-gcse-passes can 233 be modified if one wants to experiment. 234 235 ********************** 236 237 The steps for PRE are: 238 239 1) Build the hash table of expressions we wish to GCSE (expr_hash_table). 240 241 2) Perform the data flow analysis for PRE. 242 243 3) Delete the redundant instructions 244 245 4) Insert the required copies [if any] that make the partially 246 redundant instructions fully redundant. 247 248 5) For other reaching expressions, insert an instruction to copy the value 249 to a newly created pseudo that will reach the redundant instruction. 250 251 The deletion is done first so that when we do insertions we 252 know which pseudo reg to use. 253 254 Various papers have argued that PRE DFA is expensive (O(n^2)) and others 255 argue it is not. The number of iterations for the algorithm to converge 256 is typically 2-4 so I don't view it as that expensive (relatively speaking). 257 258 PRE GCSE depends heavily on the second CSE pass to clean up the copies 259 we create. To make an expression reach the place where it's redundant, 260 the result of the expression is copied to a new register, and the redundant 261 expression is deleted by replacing it with this new register. Classic GCSE 262 doesn't have this problem as much as it computes the reaching defs of 263 each register in each block and thus can try to use an existing register. 264 265 ********************** 266 267 A fair bit of simplicity is created by creating small functions for simple 268 tasks, even when the function is only called in one place. This may 269 measurably slow things down [or may not] by creating more function call 270 overhead than is necessary. The source is laid out so that it's trivial 271 to make the affected functions inline so that one can measure what speed 272 up, if any, can be achieved, and maybe later when things settle things can 273 be rearranged. 274 275 Help stamp out big monolithic functions! */ 276 277/* GCSE global vars. */ 278 279/* -dG dump file. */ 280static FILE *gcse_file; 281 282/* Note whether or not we should run jump optimization after gcse. We 283 want to do this for two cases. 284 285 * If we changed any jumps via cprop. 286 287 * If we added any labels via edge splitting. */ 288 289static int run_jump_opt_after_gcse; 290 291/* Bitmaps are normally not included in debugging dumps. 292 However it's useful to be able to print them from GDB. 293 We could create special functions for this, but it's simpler to 294 just allow passing stderr to the dump_foo fns. Since stderr can 295 be a macro, we store a copy here. */ 296static FILE *debug_stderr; 297 298/* An obstack for our working variables. */ 299static struct obstack gcse_obstack; 300 301/* Non-zero for each mode that supports (set (reg) (reg)). 302 This is trivially true for integer and floating point values. 303 It may or may not be true for condition codes. */ 304static char can_copy_p[(int) NUM_MACHINE_MODES]; 305 306/* Non-zero if can_copy_p has been initialized. */ 307static int can_copy_init_p; 308 309struct reg_use {rtx reg_rtx; }; 310 311/* Hash table of expressions. */ 312 313struct expr 314{ 315 /* The expression (SET_SRC for expressions, PATTERN for assignments). */ 316 rtx expr; 317 /* Index in the available expression bitmaps. */ 318 int bitmap_index; 319 /* Next entry with the same hash. */ 320 struct expr *next_same_hash; 321 /* List of anticipatable occurrences in basic blocks in the function. 322 An "anticipatable occurrence" is one that is the first occurrence in the 323 basic block, the operands are not modified in the basic block prior 324 to the occurrence and the output is not used between the start of 325 the block and the occurrence. */ 326 struct occr *antic_occr; 327 /* List of available occurrence in basic blocks in the function. 328 An "available occurrence" is one that is the last occurrence in the 329 basic block and the operands are not modified by following statements in 330 the basic block [including this insn]. */ 331 struct occr *avail_occr; 332 /* Non-null if the computation is PRE redundant. 333 The value is the newly created pseudo-reg to record a copy of the 334 expression in all the places that reach the redundant copy. */ 335 rtx reaching_reg; 336}; 337 338/* Occurrence of an expression. 339 There is one per basic block. If a pattern appears more than once the 340 last appearance is used [or first for anticipatable expressions]. */ 341 342struct occr 343{ 344 /* Next occurrence of this expression. */ 345 struct occr *next; 346 /* The insn that computes the expression. */ 347 rtx insn; 348 /* Non-zero if this [anticipatable] occurrence has been deleted. */ 349 char deleted_p; 350 /* Non-zero if this [available] occurrence has been copied to 351 reaching_reg. */ 352 /* ??? This is mutually exclusive with deleted_p, so they could share 353 the same byte. */ 354 char copied_p; 355}; 356 357/* Expression and copy propagation hash tables. 358 Each hash table is an array of buckets. 359 ??? It is known that if it were an array of entries, structure elements 360 `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is 361 not clear whether in the final analysis a sufficient amount of memory would 362 be saved as the size of the available expression bitmaps would be larger 363 [one could build a mapping table without holes afterwards though]. 364 Someday I'll perform the computation and figure it out. */ 365 366/* Total size of the expression hash table, in elements. */ 367static unsigned int expr_hash_table_size; 368 369/* The table itself. 370 This is an array of `expr_hash_table_size' elements. */ 371static struct expr **expr_hash_table; 372 373/* Total size of the copy propagation hash table, in elements. */ 374static unsigned int set_hash_table_size; 375 376/* The table itself. 377 This is an array of `set_hash_table_size' elements. */ 378static struct expr **set_hash_table; 379 380/* Mapping of uids to cuids. 381 Only real insns get cuids. */ 382static int *uid_cuid; 383 384/* Highest UID in UID_CUID. */ 385static int max_uid; 386 387/* Get the cuid of an insn. */ 388#ifdef ENABLE_CHECKING 389#define INSN_CUID(INSN) (INSN_UID (INSN) > max_uid ? (abort (), 0) : uid_cuid[INSN_UID (INSN)]) 390#else 391#define INSN_CUID(INSN) (uid_cuid[INSN_UID (INSN)]) 392#endif 393 394/* Number of cuids. */ 395static int max_cuid; 396 397/* Mapping of cuids to insns. */ 398static rtx *cuid_insn; 399 400/* Get insn from cuid. */ 401#define CUID_INSN(CUID) (cuid_insn[CUID]) 402 403/* Maximum register number in function prior to doing gcse + 1. 404 Registers created during this pass have regno >= max_gcse_regno. 405 This is named with "gcse" to not collide with global of same name. */ 406static unsigned int max_gcse_regno; 407 408/* Maximum number of cse-able expressions found. */ 409static int n_exprs; 410 411/* Maximum number of assignments for copy propagation found. */ 412static int n_sets; 413 414/* Table of registers that are modified. 415 416 For each register, each element is a list of places where the pseudo-reg 417 is set. 418 419 For simplicity, GCSE is done on sets of pseudo-regs only. PRE GCSE only 420 requires knowledge of which blocks kill which regs [and thus could use 421 a bitmap instead of the lists `reg_set_table' uses]. 422 423 `reg_set_table' and could be turned into an array of bitmaps (num-bbs x 424 num-regs) [however perhaps it may be useful to keep the data as is]. One 425 advantage of recording things this way is that `reg_set_table' is fairly 426 sparse with respect to pseudo regs but for hard regs could be fairly dense 427 [relatively speaking]. And recording sets of pseudo-regs in lists speeds 428 up functions like compute_transp since in the case of pseudo-regs we only 429 need to iterate over the number of times a pseudo-reg is set, not over the 430 number of basic blocks [clearly there is a bit of a slow down in the cases 431 where a pseudo is set more than once in a block, however it is believed 432 that the net effect is to speed things up]. This isn't done for hard-regs 433 because recording call-clobbered hard-regs in `reg_set_table' at each 434 function call can consume a fair bit of memory, and iterating over 435 hard-regs stored this way in compute_transp will be more expensive. */ 436 437typedef struct reg_set 438{ 439 /* The next setting of this register. */ 440 struct reg_set *next; 441 /* The insn where it was set. */ 442 rtx insn; 443} reg_set; 444 445static reg_set **reg_set_table; 446 447/* Size of `reg_set_table'. 448 The table starts out at max_gcse_regno + slop, and is enlarged as 449 necessary. */ 450static int reg_set_table_size; 451 452/* Amount to grow `reg_set_table' by when it's full. */ 453#define REG_SET_TABLE_SLOP 100 454 455/* This is a list of expressions which are MEMs and will be used by load 456 or store motion. 457 Load motion tracks MEMs which aren't killed by 458 anything except itself. (ie, loads and stores to a single location). 459 We can then allow movement of these MEM refs with a little special 460 allowance. (all stores copy the same value to the reaching reg used 461 for the loads). This means all values used to store into memory must have 462 no side effects so we can re-issue the setter value. 463 Store Motion uses this structure as an expression table to track stores 464 which look interesting, and might be moveable towards the exit block. */ 465 466struct ls_expr 467{ 468 struct expr * expr; /* Gcse expression reference for LM. */ 469 rtx pattern; /* Pattern of this mem. */ 470 rtx loads; /* INSN list of loads seen. */ 471 rtx stores; /* INSN list of stores seen. */ 472 struct ls_expr * next; /* Next in the list. */ 473 int invalid; /* Invalid for some reason. */ 474 int index; /* If it maps to a bitmap index. */ 475 int hash_index; /* Index when in a hash table. */ 476 rtx reaching_reg; /* Register to use when re-writing. */ 477}; 478 479/* Head of the list of load/store memory refs. */ 480static struct ls_expr * pre_ldst_mems = NULL; 481 482/* Bitmap containing one bit for each register in the program. 483 Used when performing GCSE to track which registers have been set since 484 the start of the basic block. */ 485static regset reg_set_bitmap; 486 487/* For each block, a bitmap of registers set in the block. 488 This is used by expr_killed_p and compute_transp. 489 It is computed during hash table computation and not by compute_sets 490 as it includes registers added since the last pass (or between cprop and 491 gcse) and it's currently not easy to realloc sbitmap vectors. */ 492static sbitmap *reg_set_in_block; 493 494/* Array, indexed by basic block number for a list of insns which modify 495 memory within that block. */ 496static rtx * modify_mem_list; 497bitmap modify_mem_list_set; 498 499/* This array parallels modify_mem_list, but is kept canonicalized. */ 500static rtx * canon_modify_mem_list; 501bitmap canon_modify_mem_list_set; 502/* Various variables for statistics gathering. */ 503 504/* Memory used in a pass. 505 This isn't intended to be absolutely precise. Its intent is only 506 to keep an eye on memory usage. */ 507static int bytes_used; 508 509/* GCSE substitutions made. */ 510static int gcse_subst_count; 511/* Number of copy instructions created. */ 512static int gcse_create_count; 513/* Number of constants propagated. */ 514static int const_prop_count; 515/* Number of copys propagated. */ 516static int copy_prop_count; 517 518/* These variables are used by classic GCSE. 519 Normally they'd be defined a bit later, but `rd_gen' needs to 520 be declared sooner. */ 521 522/* Each block has a bitmap of each type. 523 The length of each blocks bitmap is: 524 525 max_cuid - for reaching definitions 526 n_exprs - for available expressions 527 528 Thus we view the bitmaps as 2 dimensional arrays. i.e. 529 rd_kill[block_num][cuid_num] 530 ae_kill[block_num][expr_num] */ 531 532/* For reaching defs */ 533static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out; 534 535/* for available exprs */ 536static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out; 537 538/* Objects of this type are passed around by the null-pointer check 539 removal routines. */ 540struct null_pointer_info 541{ 542 /* The basic block being processed. */ 543 int current_block; 544 /* The first register to be handled in this pass. */ 545 unsigned int min_reg; 546 /* One greater than the last register to be handled in this pass. */ 547 unsigned int max_reg; 548 sbitmap *nonnull_local; 549 sbitmap *nonnull_killed; 550}; 551 552static void compute_can_copy PARAMS ((void)); 553static char *gmalloc PARAMS ((unsigned int)); 554static char *grealloc PARAMS ((char *, unsigned int)); 555static char *gcse_alloc PARAMS ((unsigned long)); 556static void alloc_gcse_mem PARAMS ((rtx)); 557static void free_gcse_mem PARAMS ((void)); 558static void alloc_reg_set_mem PARAMS ((int)); 559static void free_reg_set_mem PARAMS ((void)); 560static int get_bitmap_width PARAMS ((int, int, int)); 561static void record_one_set PARAMS ((int, rtx)); 562static void record_set_info PARAMS ((rtx, rtx, void *)); 563static void compute_sets PARAMS ((rtx)); 564static void hash_scan_insn PARAMS ((rtx, int, int)); 565static void hash_scan_set PARAMS ((rtx, rtx, int)); 566static void hash_scan_clobber PARAMS ((rtx, rtx)); 567static void hash_scan_call PARAMS ((rtx, rtx)); 568static int want_to_gcse_p PARAMS ((rtx)); 569static int oprs_unchanged_p PARAMS ((rtx, rtx, int)); 570static int oprs_anticipatable_p PARAMS ((rtx, rtx)); 571static int oprs_available_p PARAMS ((rtx, rtx)); 572static void insert_expr_in_table PARAMS ((rtx, enum machine_mode, rtx, 573 int, int)); 574static void insert_set_in_table PARAMS ((rtx, rtx)); 575static unsigned int hash_expr PARAMS ((rtx, enum machine_mode, int *, int)); 576static unsigned int hash_expr_1 PARAMS ((rtx, enum machine_mode, int *)); 577static unsigned int hash_string_1 PARAMS ((const char *)); 578static unsigned int hash_set PARAMS ((int, int)); 579static int expr_equiv_p PARAMS ((rtx, rtx)); 580static void record_last_reg_set_info PARAMS ((rtx, int)); 581static void record_last_mem_set_info PARAMS ((rtx)); 582static void record_last_set_info PARAMS ((rtx, rtx, void *)); 583static void compute_hash_table PARAMS ((int)); 584static void alloc_set_hash_table PARAMS ((int)); 585static void free_set_hash_table PARAMS ((void)); 586static void compute_set_hash_table PARAMS ((void)); 587static void alloc_expr_hash_table PARAMS ((unsigned int)); 588static void free_expr_hash_table PARAMS ((void)); 589static void compute_expr_hash_table PARAMS ((void)); 590static void dump_hash_table PARAMS ((FILE *, const char *, struct expr **, 591 int, int)); 592static struct expr *lookup_expr PARAMS ((rtx)); 593static struct expr *lookup_set PARAMS ((unsigned int, rtx)); 594static struct expr *next_set PARAMS ((unsigned int, struct expr *)); 595static void reset_opr_set_tables PARAMS ((void)); 596static int oprs_not_set_p PARAMS ((rtx, rtx)); 597static void mark_call PARAMS ((rtx)); 598static void mark_set PARAMS ((rtx, rtx)); 599static void mark_clobber PARAMS ((rtx, rtx)); 600static void mark_oprs_set PARAMS ((rtx)); 601static void alloc_cprop_mem PARAMS ((int, int)); 602static void free_cprop_mem PARAMS ((void)); 603static void compute_transp PARAMS ((rtx, int, sbitmap *, int)); 604static void compute_transpout PARAMS ((void)); 605static void compute_local_properties PARAMS ((sbitmap *, sbitmap *, sbitmap *, 606 int)); 607static void compute_cprop_data PARAMS ((void)); 608static void find_used_regs PARAMS ((rtx *, void *)); 609static int try_replace_reg PARAMS ((rtx, rtx, rtx)); 610static struct expr *find_avail_set PARAMS ((int, rtx)); 611static int cprop_jump PARAMS ((basic_block, rtx, rtx, rtx)); 612#ifdef HAVE_cc0 613static int cprop_cc0_jump PARAMS ((basic_block, rtx, struct reg_use *, rtx)); 614#endif 615static void mems_conflict_for_gcse_p PARAMS ((rtx, rtx, void *)); 616static int load_killed_in_block_p PARAMS ((basic_block, int, rtx, int)); 617static void canon_list_insert PARAMS ((rtx, rtx, void *)); 618static int cprop_insn PARAMS ((basic_block, rtx, int)); 619static int cprop PARAMS ((int)); 620static int one_cprop_pass PARAMS ((int, int)); 621static void alloc_pre_mem PARAMS ((int, int)); 622static void free_pre_mem PARAMS ((void)); 623static void compute_pre_data PARAMS ((void)); 624static int pre_expr_reaches_here_p PARAMS ((basic_block, struct expr *, 625 basic_block)); 626static void insert_insn_end_bb PARAMS ((struct expr *, basic_block, int)); 627static void pre_insert_copy_insn PARAMS ((struct expr *, rtx)); 628static void pre_insert_copies PARAMS ((void)); 629static int pre_delete PARAMS ((void)); 630static int pre_gcse PARAMS ((void)); 631static int one_pre_gcse_pass PARAMS ((int)); 632static void add_label_notes PARAMS ((rtx, rtx)); 633static void alloc_code_hoist_mem PARAMS ((int, int)); 634static void free_code_hoist_mem PARAMS ((void)); 635static void compute_code_hoist_vbeinout PARAMS ((void)); 636static void compute_code_hoist_data PARAMS ((void)); 637static int hoist_expr_reaches_here_p PARAMS ((basic_block, int, basic_block, 638 char *)); 639static void hoist_code PARAMS ((void)); 640static int one_code_hoisting_pass PARAMS ((void)); 641static void alloc_rd_mem PARAMS ((int, int)); 642static void free_rd_mem PARAMS ((void)); 643static void handle_rd_kill_set PARAMS ((rtx, int, basic_block)); 644static void compute_kill_rd PARAMS ((void)); 645static void compute_rd PARAMS ((void)); 646static void alloc_avail_expr_mem PARAMS ((int, int)); 647static void free_avail_expr_mem PARAMS ((void)); 648static void compute_ae_gen PARAMS ((void)); 649static int expr_killed_p PARAMS ((rtx, basic_block)); 650static void compute_ae_kill PARAMS ((sbitmap *, sbitmap *)); 651static int expr_reaches_here_p PARAMS ((struct occr *, struct expr *, 652 basic_block, int)); 653static rtx computing_insn PARAMS ((struct expr *, rtx)); 654static int def_reaches_here_p PARAMS ((rtx, rtx)); 655static int can_disregard_other_sets PARAMS ((struct reg_set **, rtx, int)); 656static int handle_avail_expr PARAMS ((rtx, struct expr *)); 657static int classic_gcse PARAMS ((void)); 658static int one_classic_gcse_pass PARAMS ((int)); 659static void invalidate_nonnull_info PARAMS ((rtx, rtx, void *)); 660static void delete_null_pointer_checks_1 PARAMS ((unsigned int *, 661 sbitmap *, sbitmap *, 662 struct null_pointer_info *)); 663static rtx process_insert_insn PARAMS ((struct expr *)); 664static int pre_edge_insert PARAMS ((struct edge_list *, struct expr **)); 665static int expr_reaches_here_p_work PARAMS ((struct occr *, struct expr *, 666 basic_block, int, char *)); 667static int pre_expr_reaches_here_p_work PARAMS ((basic_block, struct expr *, 668 basic_block, char *)); 669static struct ls_expr * ldst_entry PARAMS ((rtx)); 670static void free_ldst_entry PARAMS ((struct ls_expr *)); 671static void free_ldst_mems PARAMS ((void)); 672static void print_ldst_list PARAMS ((FILE *)); 673static struct ls_expr * find_rtx_in_ldst PARAMS ((rtx)); 674static int enumerate_ldsts PARAMS ((void)); 675static inline struct ls_expr * first_ls_expr PARAMS ((void)); 676static inline struct ls_expr * next_ls_expr PARAMS ((struct ls_expr *)); 677static int simple_mem PARAMS ((rtx)); 678static void invalidate_any_buried_refs PARAMS ((rtx)); 679static void compute_ld_motion_mems PARAMS ((void)); 680static void trim_ld_motion_mems PARAMS ((void)); 681static void update_ld_motion_stores PARAMS ((struct expr *)); 682static void reg_set_info PARAMS ((rtx, rtx, void *)); 683static int store_ops_ok PARAMS ((rtx, basic_block)); 684static void find_moveable_store PARAMS ((rtx)); 685static int compute_store_table PARAMS ((void)); 686static int load_kills_store PARAMS ((rtx, rtx)); 687static int find_loads PARAMS ((rtx, rtx)); 688static int store_killed_in_insn PARAMS ((rtx, rtx)); 689static int store_killed_after PARAMS ((rtx, rtx, basic_block)); 690static int store_killed_before PARAMS ((rtx, rtx, basic_block)); 691static void build_store_vectors PARAMS ((void)); 692static void insert_insn_start_bb PARAMS ((rtx, basic_block)); 693static int insert_store PARAMS ((struct ls_expr *, edge)); 694static void replace_store_insn PARAMS ((rtx, rtx, basic_block)); 695static void delete_store PARAMS ((struct ls_expr *, 696 basic_block)); 697static void free_store_memory PARAMS ((void)); 698static void store_motion PARAMS ((void)); 699static void clear_modify_mem_tables PARAMS ((void)); 700static void free_modify_mem_tables PARAMS ((void)); 701 702/* Entry point for global common subexpression elimination. 703 F is the first instruction in the function. */ 704 705int 706gcse_main (f, file) 707 rtx f; 708 FILE *file; 709{ 710 int changed, pass; 711 /* Bytes used at start of pass. */ 712 int initial_bytes_used; 713 /* Maximum number of bytes used by a pass. */ 714 int max_pass_bytes; 715 /* Point to release obstack data from for each pass. */ 716 char *gcse_obstack_bottom; 717 718 /* Insertion of instructions on edges can create new basic blocks; we 719 need the original basic block count so that we can properly deallocate 720 arrays sized on the number of basic blocks originally in the cfg. */ 721 int orig_bb_count; 722 /* We do not construct an accurate cfg in functions which call 723 setjmp, so just punt to be safe. */ 724 if (current_function_calls_setjmp) 725 return 0; 726 727 /* Assume that we do not need to run jump optimizations after gcse. */ 728 run_jump_opt_after_gcse = 0; 729 730 /* For calling dump_foo fns from gdb. */ 731 debug_stderr = stderr; 732 gcse_file = file; 733 734 /* Identify the basic block information for this function, including 735 successors and predecessors. */ 736 max_gcse_regno = max_reg_num (); 737 738 if (file) 739 dump_flow_info (file); 740 741 orig_bb_count = n_basic_blocks; 742 /* Return if there's nothing to do. */ 743 if (n_basic_blocks <= 1) 744 return 0; 745 746 /* Trying to perform global optimizations on flow graphs which have 747 a high connectivity will take a long time and is unlikely to be 748 particularly useful. 749 750 In normal circumstances a cfg should have about twice as many edges 751 as blocks. But we do not want to punish small functions which have 752 a couple switch statements. So we require a relatively large number 753 of basic blocks and the ratio of edges to blocks to be high. */ 754 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20) 755 { 756 if (warn_disabled_optimization) 757 warning ("GCSE disabled: %d > 1000 basic blocks and %d >= 20 edges/basic block", 758 n_basic_blocks, n_edges / n_basic_blocks); 759 return 0; 760 } 761 762 /* If allocating memory for the cprop bitmap would take up too much 763 storage it's better just to disable the optimization. */ 764 if ((n_basic_blocks 765 * SBITMAP_SET_SIZE (max_gcse_regno) 766 * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY) 767 { 768 if (warn_disabled_optimization) 769 warning ("GCSE disabled: %d basic blocks and %d registers", 770 n_basic_blocks, max_gcse_regno); 771 772 return 0; 773 } 774 775 /* See what modes support reg/reg copy operations. */ 776 if (! can_copy_init_p) 777 { 778 compute_can_copy (); 779 can_copy_init_p = 1; 780 } 781 782 gcc_obstack_init (&gcse_obstack); 783 bytes_used = 0; 784 785 /* We need alias. */ 786 init_alias_analysis (); 787 /* Record where pseudo-registers are set. This data is kept accurate 788 during each pass. ??? We could also record hard-reg information here 789 [since it's unchanging], however it is currently done during hash table 790 computation. 791 792 It may be tempting to compute MEM set information here too, but MEM sets 793 will be subject to code motion one day and thus we need to compute 794 information about memory sets when we build the hash tables. */ 795 796 alloc_reg_set_mem (max_gcse_regno); 797 compute_sets (f); 798 799 pass = 0; 800 initial_bytes_used = bytes_used; 801 max_pass_bytes = 0; 802 gcse_obstack_bottom = gcse_alloc (1); 803 changed = 1; 804 while (changed && pass < MAX_GCSE_PASSES) 805 { 806 changed = 0; 807 if (file) 808 fprintf (file, "GCSE pass %d\n\n", pass + 1); 809 810 /* Initialize bytes_used to the space for the pred/succ lists, 811 and the reg_set_table data. */ 812 bytes_used = initial_bytes_used; 813 814 /* Each pass may create new registers, so recalculate each time. */ 815 max_gcse_regno = max_reg_num (); 816 817 alloc_gcse_mem (f); 818 819 /* Don't allow constant propagation to modify jumps 820 during this pass. */ 821 changed = one_cprop_pass (pass + 1, 0); 822 823 if (optimize_size) 824 changed |= one_classic_gcse_pass (pass + 1); 825 else 826 { 827 changed |= one_pre_gcse_pass (pass + 1); 828 /* We may have just created new basic blocks. Release and 829 recompute various things which are sized on the number of 830 basic blocks. */ 831 if (changed) 832 { 833 free_modify_mem_tables (); 834 modify_mem_list 835 = (rtx *) gmalloc (n_basic_blocks * sizeof (rtx)); 836 canon_modify_mem_list 837 = (rtx *) gmalloc (n_basic_blocks * sizeof (rtx)); 838 memset ((char *) modify_mem_list, 0, n_basic_blocks * sizeof (rtx)); 839 memset ((char *) canon_modify_mem_list, 0, n_basic_blocks * sizeof (rtx)); 840 orig_bb_count = n_basic_blocks; 841 } 842 free_reg_set_mem (); 843 alloc_reg_set_mem (max_reg_num ()); 844 compute_sets (f); 845 run_jump_opt_after_gcse = 1; 846 } 847 848 if (max_pass_bytes < bytes_used) 849 max_pass_bytes = bytes_used; 850 851 /* Free up memory, then reallocate for code hoisting. We can 852 not re-use the existing allocated memory because the tables 853 will not have info for the insns or registers created by 854 partial redundancy elimination. */ 855 free_gcse_mem (); 856 857 /* It does not make sense to run code hoisting unless we optimizing 858 for code size -- it rarely makes programs faster, and can make 859 them bigger if we did partial redundancy elimination (when optimizing 860 for space, we use a classic gcse algorithm instead of partial 861 redundancy algorithms). */ 862 if (optimize_size) 863 { 864 max_gcse_regno = max_reg_num (); 865 alloc_gcse_mem (f); 866 changed |= one_code_hoisting_pass (); 867 free_gcse_mem (); 868 869 if (max_pass_bytes < bytes_used) 870 max_pass_bytes = bytes_used; 871 } 872 873 if (file) 874 { 875 fprintf (file, "\n"); 876 fflush (file); 877 } 878 879 obstack_free (&gcse_obstack, gcse_obstack_bottom); 880 pass++; 881 } 882 883 /* Do one last pass of copy propagation, including cprop into 884 conditional jumps. */ 885 886 max_gcse_regno = max_reg_num (); 887 alloc_gcse_mem (f); 888 /* This time, go ahead and allow cprop to alter jumps. */ 889 one_cprop_pass (pass + 1, 1); 890 free_gcse_mem (); 891 892 if (file) 893 { 894 fprintf (file, "GCSE of %s: %d basic blocks, ", 895 current_function_name, n_basic_blocks); 896 fprintf (file, "%d pass%s, %d bytes\n\n", 897 pass, pass > 1 ? "es" : "", max_pass_bytes); 898 } 899 900 obstack_free (&gcse_obstack, NULL); 901 free_reg_set_mem (); 902 /* We are finished with alias. */ 903 end_alias_analysis (); 904 allocate_reg_info (max_reg_num (), FALSE, FALSE); 905 906 if (!optimize_size && flag_gcse_sm) 907 store_motion (); 908 /* Record where pseudo-registers are set. */ 909 return run_jump_opt_after_gcse; 910} 911 912/* Misc. utilities. */ 913 914/* Compute which modes support reg/reg copy operations. */ 915 916static void 917compute_can_copy () 918{ 919 int i; 920#ifndef AVOID_CCMODE_COPIES 921 rtx reg, insn; 922#endif 923 memset (can_copy_p, 0, NUM_MACHINE_MODES); 924 925 start_sequence (); 926 for (i = 0; i < NUM_MACHINE_MODES; i++) 927 if (GET_MODE_CLASS (i) == MODE_CC) 928 { 929#ifdef AVOID_CCMODE_COPIES 930 can_copy_p[i] = 0; 931#else 932 reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1); 933 insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg)); 934 if (recog (PATTERN (insn), insn, NULL) >= 0) 935 can_copy_p[i] = 1; 936#endif 937 } 938 else 939 can_copy_p[i] = 1; 940 941 end_sequence (); 942} 943 944/* Cover function to xmalloc to record bytes allocated. */ 945 946static char * 947gmalloc (size) 948 unsigned int size; 949{ 950 bytes_used += size; 951 return xmalloc (size); 952} 953 954/* Cover function to xrealloc. 955 We don't record the additional size since we don't know it. 956 It won't affect memory usage stats much anyway. */ 957 958static char * 959grealloc (ptr, size) 960 char *ptr; 961 unsigned int size; 962{ 963 return xrealloc (ptr, size); 964} 965 966/* Cover function to obstack_alloc. 967 We don't need to record the bytes allocated here since 968 obstack_chunk_alloc is set to gmalloc. */ 969 970static char * 971gcse_alloc (size) 972 unsigned long size; 973{ 974 return (char *) obstack_alloc (&gcse_obstack, size); 975} 976 977/* Allocate memory for the cuid mapping array, 978 and reg/memory set tracking tables. 979 980 This is called at the start of each pass. */ 981 982static void 983alloc_gcse_mem (f) 984 rtx f; 985{ 986 int i, n; 987 rtx insn; 988 989 /* Find the largest UID and create a mapping from UIDs to CUIDs. 990 CUIDs are like UIDs except they increase monotonically, have no gaps, 991 and only apply to real insns. */ 992 993 max_uid = get_max_uid (); 994 n = (max_uid + 1) * sizeof (int); 995 uid_cuid = (int *) gmalloc (n); 996 memset ((char *) uid_cuid, 0, n); 997 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn)) 998 { 999 if (INSN_P (insn)) 1000 uid_cuid[INSN_UID (insn)] = i++; 1001 else 1002 uid_cuid[INSN_UID (insn)] = i; 1003 } 1004 1005 /* Create a table mapping cuids to insns. */ 1006 1007 max_cuid = i; 1008 n = (max_cuid + 1) * sizeof (rtx); 1009 cuid_insn = (rtx *) gmalloc (n); 1010 memset ((char *) cuid_insn, 0, n); 1011 for (insn = f, i = 0; insn; insn = NEXT_INSN (insn)) 1012 if (INSN_P (insn)) 1013 CUID_INSN (i++) = insn; 1014 1015 /* Allocate vars to track sets of regs. */ 1016 reg_set_bitmap = BITMAP_XMALLOC (); 1017 1018 /* Allocate vars to track sets of regs, memory per block. */ 1019 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, 1020 max_gcse_regno); 1021 /* Allocate array to keep a list of insns which modify memory in each 1022 basic block. */ 1023 modify_mem_list = (rtx *) gmalloc (n_basic_blocks * sizeof (rtx)); 1024 canon_modify_mem_list = (rtx *) gmalloc (n_basic_blocks * sizeof (rtx)); 1025 memset ((char *) modify_mem_list, 0, n_basic_blocks * sizeof (rtx)); 1026 memset ((char *) canon_modify_mem_list, 0, n_basic_blocks * sizeof (rtx)); 1027 modify_mem_list_set = BITMAP_XMALLOC (); 1028 canon_modify_mem_list_set = BITMAP_XMALLOC (); 1029} 1030 1031/* Free memory allocated by alloc_gcse_mem. */ 1032 1033static void 1034free_gcse_mem () 1035{ 1036 free (uid_cuid); 1037 free (cuid_insn); 1038 1039 BITMAP_XFREE (reg_set_bitmap); 1040 1041 sbitmap_vector_free (reg_set_in_block); 1042 free_modify_mem_tables (); 1043 BITMAP_XFREE (modify_mem_list_set); 1044 BITMAP_XFREE (canon_modify_mem_list_set); 1045} 1046 1047/* Many of the global optimization algorithms work by solving dataflow 1048 equations for various expressions. Initially, some local value is 1049 computed for each expression in each block. Then, the values across the 1050 various blocks are combined (by following flow graph edges) to arrive at 1051 global values. Conceptually, each set of equations is independent. We 1052 may therefore solve all the equations in parallel, solve them one at a 1053 time, or pick any intermediate approach. 1054 1055 When you're going to need N two-dimensional bitmaps, each X (say, the 1056 number of blocks) by Y (say, the number of expressions), call this 1057 function. It's not important what X and Y represent; only that Y 1058 correspond to the things that can be done in parallel. This function will 1059 return an appropriate chunking factor C; you should solve C sets of 1060 equations in parallel. By going through this function, we can easily 1061 trade space against time; by solving fewer equations in parallel we use 1062 less space. */ 1063 1064static int 1065get_bitmap_width (n, x, y) 1066 int n; 1067 int x; 1068 int y; 1069{ 1070 /* It's not really worth figuring out *exactly* how much memory will 1071 be used by a particular choice. The important thing is to get 1072 something approximately right. */ 1073 size_t max_bitmap_memory = 10 * 1024 * 1024; 1074 1075 /* The number of bytes we'd use for a single column of minimum 1076 width. */ 1077 size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE); 1078 1079 /* Often, it's reasonable just to solve all the equations in 1080 parallel. */ 1081 if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory) 1082 return y; 1083 1084 /* Otherwise, pick the largest width we can, without going over the 1085 limit. */ 1086 return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1) 1087 / column_size); 1088} 1089 1090/* Compute the local properties of each recorded expression. 1091 1092 Local properties are those that are defined by the block, irrespective of 1093 other blocks. 1094 1095 An expression is transparent in a block if its operands are not modified 1096 in the block. 1097 1098 An expression is computed (locally available) in a block if it is computed 1099 at least once and expression would contain the same value if the 1100 computation was moved to the end of the block. 1101 1102 An expression is locally anticipatable in a block if it is computed at 1103 least once and expression would contain the same value if the computation 1104 was moved to the beginning of the block. 1105 1106 We call this routine for cprop, pre and code hoisting. They all compute 1107 basically the same information and thus can easily share this code. 1108 1109 TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local 1110 properties. If NULL, then it is not necessary to compute or record that 1111 particular property. 1112 1113 SETP controls which hash table to look at. If zero, this routine looks at 1114 the expr hash table; if nonzero this routine looks at the set hash table. 1115 Additionally, TRANSP is computed as ~TRANSP, since this is really cprop's 1116 ABSALTERED. */ 1117 1118static void 1119compute_local_properties (transp, comp, antloc, setp) 1120 sbitmap *transp; 1121 sbitmap *comp; 1122 sbitmap *antloc; 1123 int setp; 1124{ 1125 unsigned int i, hash_table_size; 1126 struct expr **hash_table; 1127 1128 /* Initialize any bitmaps that were passed in. */ 1129 if (transp) 1130 { 1131 if (setp) 1132 sbitmap_vector_zero (transp, n_basic_blocks); 1133 else 1134 sbitmap_vector_ones (transp, n_basic_blocks); 1135 } 1136 1137 if (comp) 1138 sbitmap_vector_zero (comp, n_basic_blocks); 1139 if (antloc) 1140 sbitmap_vector_zero (antloc, n_basic_blocks); 1141 1142 /* We use the same code for cprop, pre and hoisting. For cprop 1143 we care about the set hash table, for pre and hoisting we 1144 care about the expr hash table. */ 1145 hash_table_size = setp ? set_hash_table_size : expr_hash_table_size; 1146 hash_table = setp ? set_hash_table : expr_hash_table; 1147 1148 for (i = 0; i < hash_table_size; i++) 1149 { 1150 struct expr *expr; 1151 1152 for (expr = hash_table[i]; expr != NULL; expr = expr->next_same_hash) 1153 { 1154 int indx = expr->bitmap_index; 1155 struct occr *occr; 1156 1157 /* The expression is transparent in this block if it is not killed. 1158 We start by assuming all are transparent [none are killed], and 1159 then reset the bits for those that are. */ 1160 if (transp) 1161 compute_transp (expr->expr, indx, transp, setp); 1162 1163 /* The occurrences recorded in antic_occr are exactly those that 1164 we want to set to non-zero in ANTLOC. */ 1165 if (antloc) 1166 for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 1167 { 1168 SET_BIT (antloc[BLOCK_NUM (occr->insn)], indx); 1169 1170 /* While we're scanning the table, this is a good place to 1171 initialize this. */ 1172 occr->deleted_p = 0; 1173 } 1174 1175 /* The occurrences recorded in avail_occr are exactly those that 1176 we want to set to non-zero in COMP. */ 1177 if (comp) 1178 for (occr = expr->avail_occr; occr != NULL; occr = occr->next) 1179 { 1180 SET_BIT (comp[BLOCK_NUM (occr->insn)], indx); 1181 1182 /* While we're scanning the table, this is a good place to 1183 initialize this. */ 1184 occr->copied_p = 0; 1185 } 1186 1187 /* While we're scanning the table, this is a good place to 1188 initialize this. */ 1189 expr->reaching_reg = 0; 1190 } 1191 } 1192} 1193 1194/* Register set information. 1195 1196 `reg_set_table' records where each register is set or otherwise 1197 modified. */ 1198 1199static struct obstack reg_set_obstack; 1200 1201static void 1202alloc_reg_set_mem (n_regs) 1203 int n_regs; 1204{ 1205 unsigned int n; 1206 1207 reg_set_table_size = n_regs + REG_SET_TABLE_SLOP; 1208 n = reg_set_table_size * sizeof (struct reg_set *); 1209 reg_set_table = (struct reg_set **) gmalloc (n); 1210 memset ((char *) reg_set_table, 0, n); 1211 1212 gcc_obstack_init (®_set_obstack); 1213} 1214 1215static void 1216free_reg_set_mem () 1217{ 1218 free (reg_set_table); 1219 obstack_free (®_set_obstack, NULL); 1220} 1221 1222/* Record REGNO in the reg_set table. */ 1223 1224static void 1225record_one_set (regno, insn) 1226 int regno; 1227 rtx insn; 1228{ 1229 /* Allocate a new reg_set element and link it onto the list. */ 1230 struct reg_set *new_reg_info; 1231 1232 /* If the table isn't big enough, enlarge it. */ 1233 if (regno >= reg_set_table_size) 1234 { 1235 int new_size = regno + REG_SET_TABLE_SLOP; 1236 1237 reg_set_table 1238 = (struct reg_set **) grealloc ((char *) reg_set_table, 1239 new_size * sizeof (struct reg_set *)); 1240 memset ((char *) (reg_set_table + reg_set_table_size), 0, 1241 (new_size - reg_set_table_size) * sizeof (struct reg_set *)); 1242 reg_set_table_size = new_size; 1243 } 1244 1245 new_reg_info = (struct reg_set *) obstack_alloc (®_set_obstack, 1246 sizeof (struct reg_set)); 1247 bytes_used += sizeof (struct reg_set); 1248 new_reg_info->insn = insn; 1249 new_reg_info->next = reg_set_table[regno]; 1250 reg_set_table[regno] = new_reg_info; 1251} 1252 1253/* Called from compute_sets via note_stores to handle one SET or CLOBBER in 1254 an insn. The DATA is really the instruction in which the SET is 1255 occurring. */ 1256 1257static void 1258record_set_info (dest, setter, data) 1259 rtx dest, setter ATTRIBUTE_UNUSED; 1260 void *data; 1261{ 1262 rtx record_set_insn = (rtx) data; 1263 1264 if (GET_CODE (dest) == REG && REGNO (dest) >= FIRST_PSEUDO_REGISTER) 1265 record_one_set (REGNO (dest), record_set_insn); 1266} 1267 1268/* Scan the function and record each set of each pseudo-register. 1269 1270 This is called once, at the start of the gcse pass. See the comments for 1271 `reg_set_table' for further documenation. */ 1272 1273static void 1274compute_sets (f) 1275 rtx f; 1276{ 1277 rtx insn; 1278 1279 for (insn = f; insn != 0; insn = NEXT_INSN (insn)) 1280 if (INSN_P (insn)) 1281 note_stores (PATTERN (insn), record_set_info, insn); 1282} 1283 1284/* Hash table support. */ 1285 1286/* For each register, the cuid of the first/last insn in the block 1287 that set it, or -1 if not set. */ 1288#define NEVER_SET -1 1289 1290struct reg_avail_info 1291{ 1292 int last_bb; 1293 int first_set; 1294 int last_set; 1295}; 1296 1297static struct reg_avail_info *reg_avail_info; 1298static int current_bb; 1299 1300 1301/* See whether X, the source of a set, is something we want to consider for 1302 GCSE. */ 1303 1304static int 1305want_to_gcse_p (x) 1306 rtx x; 1307{ 1308 static rtx test_insn = 0; 1309 int num_clobbers = 0; 1310 int icode; 1311 1312 switch (GET_CODE (x)) 1313 { 1314 case REG: 1315 case SUBREG: 1316 case CONST_INT: 1317 case CONST_DOUBLE: 1318 case CALL: 1319 return 0; 1320 1321 default: 1322 break; 1323 } 1324 1325 /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */ 1326 if (general_operand (x, GET_MODE (x))) 1327 return 1; 1328 else if (GET_MODE (x) == VOIDmode) 1329 return 0; 1330 1331 /* Otherwise, check if we can make a valid insn from it. First initialize 1332 our test insn if we haven't already. */ 1333 if (test_insn == 0) 1334 { 1335 test_insn 1336 = make_insn_raw (gen_rtx_SET (VOIDmode, 1337 gen_rtx_REG (word_mode, 1338 FIRST_PSEUDO_REGISTER * 2), 1339 const0_rtx)); 1340 NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0; 1341 ggc_add_rtx_root (&test_insn, 1); 1342 } 1343 1344 /* Now make an insn like the one we would make when GCSE'ing and see if 1345 valid. */ 1346 PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x)); 1347 SET_SRC (PATTERN (test_insn)) = x; 1348 return ((icode = recog (PATTERN (test_insn), test_insn, &num_clobbers)) >= 0 1349 && (num_clobbers == 0 || ! added_clobbers_hard_reg_p (icode))); 1350} 1351 1352/* Return non-zero if the operands of expression X are unchanged from the 1353 start of INSN's basic block up to but not including INSN (if AVAIL_P == 0), 1354 or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */ 1355 1356static int 1357oprs_unchanged_p (x, insn, avail_p) 1358 rtx x, insn; 1359 int avail_p; 1360{ 1361 int i, j; 1362 enum rtx_code code; 1363 const char *fmt; 1364 1365 if (x == 0) 1366 return 1; 1367 1368 code = GET_CODE (x); 1369 switch (code) 1370 { 1371 case REG: 1372 { 1373 struct reg_avail_info *info = ®_avail_info[REGNO (x)]; 1374 1375 if (info->last_bb != current_bb) 1376 return 1; 1377 if (avail_p) 1378 return info->last_set < INSN_CUID (insn); 1379 else 1380 return info->first_set >= INSN_CUID (insn); 1381 } 1382 1383 case MEM: 1384 if (load_killed_in_block_p (BASIC_BLOCK (current_bb), INSN_CUID (insn), 1385 x, avail_p)) 1386 return 0; 1387 else 1388 return oprs_unchanged_p (XEXP (x, 0), insn, avail_p); 1389 1390 case PRE_DEC: 1391 case PRE_INC: 1392 case POST_DEC: 1393 case POST_INC: 1394 case PRE_MODIFY: 1395 case POST_MODIFY: 1396 return 0; 1397 1398 case PC: 1399 case CC0: /*FIXME*/ 1400 case CONST: 1401 case CONST_INT: 1402 case CONST_DOUBLE: 1403 case SYMBOL_REF: 1404 case LABEL_REF: 1405 case ADDR_VEC: 1406 case ADDR_DIFF_VEC: 1407 return 1; 1408 1409 default: 1410 break; 1411 } 1412 1413 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) 1414 { 1415 if (fmt[i] == 'e') 1416 { 1417 /* If we are about to do the last recursive call needed at this 1418 level, change it into iteration. This function is called enough 1419 to be worth it. */ 1420 if (i == 0) 1421 return oprs_unchanged_p (XEXP (x, i), insn, avail_p); 1422 1423 else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p)) 1424 return 0; 1425 } 1426 else if (fmt[i] == 'E') 1427 for (j = 0; j < XVECLEN (x, i); j++) 1428 if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p)) 1429 return 0; 1430 } 1431 1432 return 1; 1433} 1434 1435/* Used for communication between mems_conflict_for_gcse_p and 1436 load_killed_in_block_p. Nonzero if mems_conflict_for_gcse_p finds a 1437 conflict between two memory references. */ 1438static int gcse_mems_conflict_p; 1439 1440/* Used for communication between mems_conflict_for_gcse_p and 1441 load_killed_in_block_p. A memory reference for a load instruction, 1442 mems_conflict_for_gcse_p will see if a memory store conflicts with 1443 this memory load. */ 1444static rtx gcse_mem_operand; 1445 1446/* DEST is the output of an instruction. If it is a memory reference, and 1447 possibly conflicts with the load found in gcse_mem_operand, then set 1448 gcse_mems_conflict_p to a nonzero value. */ 1449 1450static void 1451mems_conflict_for_gcse_p (dest, setter, data) 1452 rtx dest, setter ATTRIBUTE_UNUSED; 1453 void *data ATTRIBUTE_UNUSED; 1454{ 1455 while (GET_CODE (dest) == SUBREG 1456 || GET_CODE (dest) == ZERO_EXTRACT 1457 || GET_CODE (dest) == SIGN_EXTRACT 1458 || GET_CODE (dest) == STRICT_LOW_PART) 1459 dest = XEXP (dest, 0); 1460 1461 /* If DEST is not a MEM, then it will not conflict with the load. Note 1462 that function calls are assumed to clobber memory, but are handled 1463 elsewhere. */ 1464 if (GET_CODE (dest) != MEM) 1465 return; 1466 1467 /* If we are setting a MEM in our list of specially recognized MEMs, 1468 don't mark as killed this time. */ 1469 1470 if (dest == gcse_mem_operand && pre_ldst_mems != NULL) 1471 { 1472 if (!find_rtx_in_ldst (dest)) 1473 gcse_mems_conflict_p = 1; 1474 return; 1475 } 1476 1477 if (true_dependence (dest, GET_MODE (dest), gcse_mem_operand, 1478 rtx_addr_varies_p)) 1479 gcse_mems_conflict_p = 1; 1480} 1481 1482/* Return nonzero if the expression in X (a memory reference) is killed 1483 in block BB before or after the insn with the CUID in UID_LIMIT. 1484 AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills 1485 before UID_LIMIT. 1486 1487 To check the entire block, set UID_LIMIT to max_uid + 1 and 1488 AVAIL_P to 0. */ 1489 1490static int 1491load_killed_in_block_p (bb, uid_limit, x, avail_p) 1492 basic_block bb; 1493 int uid_limit; 1494 rtx x; 1495 int avail_p; 1496{ 1497 rtx list_entry = modify_mem_list[bb->index]; 1498 while (list_entry) 1499 { 1500 rtx setter; 1501 /* Ignore entries in the list that do not apply. */ 1502 if ((avail_p 1503 && INSN_CUID (XEXP (list_entry, 0)) < uid_limit) 1504 || (! avail_p 1505 && INSN_CUID (XEXP (list_entry, 0)) > uid_limit)) 1506 { 1507 list_entry = XEXP (list_entry, 1); 1508 continue; 1509 } 1510 1511 setter = XEXP (list_entry, 0); 1512 1513 /* If SETTER is a call everything is clobbered. Note that calls 1514 to pure functions are never put on the list, so we need not 1515 worry about them. */ 1516 if (GET_CODE (setter) == CALL_INSN) 1517 return 1; 1518 1519 /* SETTER must be an INSN of some kind that sets memory. Call 1520 note_stores to examine each hunk of memory that is modified. 1521 1522 The note_stores interface is pretty limited, so we have to 1523 communicate via global variables. Yuk. */ 1524 gcse_mem_operand = x; 1525 gcse_mems_conflict_p = 0; 1526 note_stores (PATTERN (setter), mems_conflict_for_gcse_p, NULL); 1527 if (gcse_mems_conflict_p) 1528 return 1; 1529 list_entry = XEXP (list_entry, 1); 1530 } 1531 return 0; 1532} 1533 1534/* Return non-zero if the operands of expression X are unchanged from 1535 the start of INSN's basic block up to but not including INSN. */ 1536 1537static int 1538oprs_anticipatable_p (x, insn) 1539 rtx x, insn; 1540{ 1541 return oprs_unchanged_p (x, insn, 0); 1542} 1543 1544/* Return non-zero if the operands of expression X are unchanged from 1545 INSN to the end of INSN's basic block. */ 1546 1547static int 1548oprs_available_p (x, insn) 1549 rtx x, insn; 1550{ 1551 return oprs_unchanged_p (x, insn, 1); 1552} 1553 1554/* Hash expression X. 1555 1556 MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean 1557 indicating if a volatile operand is found or if the expression contains 1558 something we don't want to insert in the table. 1559 1560 ??? One might want to merge this with canon_hash. Later. */ 1561 1562static unsigned int 1563hash_expr (x, mode, do_not_record_p, hash_table_size) 1564 rtx x; 1565 enum machine_mode mode; 1566 int *do_not_record_p; 1567 int hash_table_size; 1568{ 1569 unsigned int hash; 1570 1571 *do_not_record_p = 0; 1572 1573 hash = hash_expr_1 (x, mode, do_not_record_p); 1574 return hash % hash_table_size; 1575} 1576 1577/* Hash a string. Just add its bytes up. */ 1578 1579static inline unsigned 1580hash_string_1 (ps) 1581 const char *ps; 1582{ 1583 unsigned hash = 0; 1584 const unsigned char *p = (const unsigned char *) ps; 1585 1586 if (p) 1587 while (*p) 1588 hash += *p++; 1589 1590 return hash; 1591} 1592 1593/* Subroutine of hash_expr to do the actual work. */ 1594 1595static unsigned int 1596hash_expr_1 (x, mode, do_not_record_p) 1597 rtx x; 1598 enum machine_mode mode; 1599 int *do_not_record_p; 1600{ 1601 int i, j; 1602 unsigned hash = 0; 1603 enum rtx_code code; 1604 const char *fmt; 1605 1606 /* Used to turn recursion into iteration. We can't rely on GCC's 1607 tail-recursion eliminatio since we need to keep accumulating values 1608 in HASH. */ 1609 1610 if (x == 0) 1611 return hash; 1612 1613 repeat: 1614 code = GET_CODE (x); 1615 switch (code) 1616 { 1617 case REG: 1618 hash += ((unsigned int) REG << 7) + REGNO (x); 1619 return hash; 1620 1621 case CONST_INT: 1622 hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode 1623 + (unsigned int) INTVAL (x)); 1624 return hash; 1625 1626 case CONST_DOUBLE: 1627 /* This is like the general case, except that it only counts 1628 the integers representing the constant. */ 1629 hash += (unsigned int) code + (unsigned int) GET_MODE (x); 1630 if (GET_MODE (x) != VOIDmode) 1631 for (i = 2; i < GET_RTX_LENGTH (CONST_DOUBLE); i++) 1632 hash += (unsigned int) XWINT (x, i); 1633 else 1634 hash += ((unsigned int) CONST_DOUBLE_LOW (x) 1635 + (unsigned int) CONST_DOUBLE_HIGH (x)); 1636 return hash; 1637 1638 /* Assume there is only one rtx object for any given label. */ 1639 case LABEL_REF: 1640 /* We don't hash on the address of the CODE_LABEL to avoid bootstrap 1641 differences and differences between each stage's debugging dumps. */ 1642 hash += (((unsigned int) LABEL_REF << 7) 1643 + CODE_LABEL_NUMBER (XEXP (x, 0))); 1644 return hash; 1645 1646 case SYMBOL_REF: 1647 { 1648 /* Don't hash on the symbol's address to avoid bootstrap differences. 1649 Different hash values may cause expressions to be recorded in 1650 different orders and thus different registers to be used in the 1651 final assembler. This also avoids differences in the dump files 1652 between various stages. */ 1653 unsigned int h = 0; 1654 const unsigned char *p = (const unsigned char *) XSTR (x, 0); 1655 1656 while (*p) 1657 h += (h << 7) + *p++; /* ??? revisit */ 1658 1659 hash += ((unsigned int) SYMBOL_REF << 7) + h; 1660 return hash; 1661 } 1662 1663 case MEM: 1664 if (MEM_VOLATILE_P (x)) 1665 { 1666 *do_not_record_p = 1; 1667 return 0; 1668 } 1669 1670 hash += (unsigned int) MEM; 1671 hash += MEM_ALIAS_SET (x); 1672 x = XEXP (x, 0); 1673 goto repeat; 1674 1675 case PRE_DEC: 1676 case PRE_INC: 1677 case POST_DEC: 1678 case POST_INC: 1679 case PC: 1680 case CC0: 1681 case CALL: 1682 case UNSPEC_VOLATILE: 1683 *do_not_record_p = 1; 1684 return 0; 1685 1686 case ASM_OPERANDS: 1687 if (MEM_VOLATILE_P (x)) 1688 { 1689 *do_not_record_p = 1; 1690 return 0; 1691 } 1692 else 1693 { 1694 /* We don't want to take the filename and line into account. */ 1695 hash += (unsigned) code + (unsigned) GET_MODE (x) 1696 + hash_string_1 (ASM_OPERANDS_TEMPLATE (x)) 1697 + hash_string_1 (ASM_OPERANDS_OUTPUT_CONSTRAINT (x)) 1698 + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x); 1699 1700 if (ASM_OPERANDS_INPUT_LENGTH (x)) 1701 { 1702 for (i = 1; i < ASM_OPERANDS_INPUT_LENGTH (x); i++) 1703 { 1704 hash += (hash_expr_1 (ASM_OPERANDS_INPUT (x, i), 1705 GET_MODE (ASM_OPERANDS_INPUT (x, i)), 1706 do_not_record_p) 1707 + hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT 1708 (x, i))); 1709 } 1710 1711 hash += hash_string_1 (ASM_OPERANDS_INPUT_CONSTRAINT (x, 0)); 1712 x = ASM_OPERANDS_INPUT (x, 0); 1713 mode = GET_MODE (x); 1714 goto repeat; 1715 } 1716 return hash; 1717 } 1718 1719 default: 1720 break; 1721 } 1722 1723 hash += (unsigned) code + (unsigned) GET_MODE (x); 1724 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) 1725 { 1726 if (fmt[i] == 'e') 1727 { 1728 /* If we are about to do the last recursive call 1729 needed at this level, change it into iteration. 1730 This function is called enough to be worth it. */ 1731 if (i == 0) 1732 { 1733 x = XEXP (x, i); 1734 goto repeat; 1735 } 1736 1737 hash += hash_expr_1 (XEXP (x, i), 0, do_not_record_p); 1738 if (*do_not_record_p) 1739 return 0; 1740 } 1741 1742 else if (fmt[i] == 'E') 1743 for (j = 0; j < XVECLEN (x, i); j++) 1744 { 1745 hash += hash_expr_1 (XVECEXP (x, i, j), 0, do_not_record_p); 1746 if (*do_not_record_p) 1747 return 0; 1748 } 1749 1750 else if (fmt[i] == 's') 1751 hash += hash_string_1 (XSTR (x, i)); 1752 else if (fmt[i] == 'i') 1753 hash += (unsigned int) XINT (x, i); 1754 else 1755 abort (); 1756 } 1757 1758 return hash; 1759} 1760 1761/* Hash a set of register REGNO. 1762 1763 Sets are hashed on the register that is set. This simplifies the PRE copy 1764 propagation code. 1765 1766 ??? May need to make things more elaborate. Later, as necessary. */ 1767 1768static unsigned int 1769hash_set (regno, hash_table_size) 1770 int regno; 1771 int hash_table_size; 1772{ 1773 unsigned int hash; 1774 1775 hash = regno; 1776 return hash % hash_table_size; 1777} 1778 1779/* Return non-zero if exp1 is equivalent to exp2. 1780 ??? Borrowed from cse.c. Might want to remerge with cse.c. Later. */ 1781 1782static int 1783expr_equiv_p (x, y) 1784 rtx x, y; 1785{ 1786 int i, j; 1787 enum rtx_code code; 1788 const char *fmt; 1789 1790 if (x == y) 1791 return 1; 1792 1793 if (x == 0 || y == 0) 1794 return x == y; 1795 1796 code = GET_CODE (x); 1797 if (code != GET_CODE (y)) 1798 return 0; 1799 1800 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent. */ 1801 if (GET_MODE (x) != GET_MODE (y)) 1802 return 0; 1803 1804 switch (code) 1805 { 1806 case PC: 1807 case CC0: 1808 return x == y; 1809 1810 case CONST_INT: 1811 return INTVAL (x) == INTVAL (y); 1812 1813 case LABEL_REF: 1814 return XEXP (x, 0) == XEXP (y, 0); 1815 1816 case SYMBOL_REF: 1817 return XSTR (x, 0) == XSTR (y, 0); 1818 1819 case REG: 1820 return REGNO (x) == REGNO (y); 1821 1822 case MEM: 1823 /* Can't merge two expressions in different alias sets, since we can 1824 decide that the expression is transparent in a block when it isn't, 1825 due to it being set with the different alias set. */ 1826 if (MEM_ALIAS_SET (x) != MEM_ALIAS_SET (y)) 1827 return 0; 1828 break; 1829 1830 /* For commutative operations, check both orders. */ 1831 case PLUS: 1832 case MULT: 1833 case AND: 1834 case IOR: 1835 case XOR: 1836 case NE: 1837 case EQ: 1838 return ((expr_equiv_p (XEXP (x, 0), XEXP (y, 0)) 1839 && expr_equiv_p (XEXP (x, 1), XEXP (y, 1))) 1840 || (expr_equiv_p (XEXP (x, 0), XEXP (y, 1)) 1841 && expr_equiv_p (XEXP (x, 1), XEXP (y, 0)))); 1842 1843 case ASM_OPERANDS: 1844 /* We don't use the generic code below because we want to 1845 disregard filename and line numbers. */ 1846 1847 /* A volatile asm isn't equivalent to any other. */ 1848 if (MEM_VOLATILE_P (x) || MEM_VOLATILE_P (y)) 1849 return 0; 1850 1851 if (GET_MODE (x) != GET_MODE (y) 1852 || strcmp (ASM_OPERANDS_TEMPLATE (x), ASM_OPERANDS_TEMPLATE (y)) 1853 || strcmp (ASM_OPERANDS_OUTPUT_CONSTRAINT (x), 1854 ASM_OPERANDS_OUTPUT_CONSTRAINT (y)) 1855 || ASM_OPERANDS_OUTPUT_IDX (x) != ASM_OPERANDS_OUTPUT_IDX (y) 1856 || ASM_OPERANDS_INPUT_LENGTH (x) != ASM_OPERANDS_INPUT_LENGTH (y)) 1857 return 0; 1858 1859 if (ASM_OPERANDS_INPUT_LENGTH (x)) 1860 { 1861 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--) 1862 if (! expr_equiv_p (ASM_OPERANDS_INPUT (x, i), 1863 ASM_OPERANDS_INPUT (y, i)) 1864 || strcmp (ASM_OPERANDS_INPUT_CONSTRAINT (x, i), 1865 ASM_OPERANDS_INPUT_CONSTRAINT (y, i))) 1866 return 0; 1867 } 1868 1869 return 1; 1870 1871 default: 1872 break; 1873 } 1874 1875 /* Compare the elements. If any pair of corresponding elements 1876 fail to match, return 0 for the whole thing. */ 1877 1878 fmt = GET_RTX_FORMAT (code); 1879 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 1880 { 1881 switch (fmt[i]) 1882 { 1883 case 'e': 1884 if (! expr_equiv_p (XEXP (x, i), XEXP (y, i))) 1885 return 0; 1886 break; 1887 1888 case 'E': 1889 if (XVECLEN (x, i) != XVECLEN (y, i)) 1890 return 0; 1891 for (j = 0; j < XVECLEN (x, i); j++) 1892 if (! expr_equiv_p (XVECEXP (x, i, j), XVECEXP (y, i, j))) 1893 return 0; 1894 break; 1895 1896 case 's': 1897 if (strcmp (XSTR (x, i), XSTR (y, i))) 1898 return 0; 1899 break; 1900 1901 case 'i': 1902 if (XINT (x, i) != XINT (y, i)) 1903 return 0; 1904 break; 1905 1906 case 'w': 1907 if (XWINT (x, i) != XWINT (y, i)) 1908 return 0; 1909 break; 1910 1911 case '0': 1912 break; 1913 1914 default: 1915 abort (); 1916 } 1917 } 1918 1919 return 1; 1920} 1921 1922/* Insert expression X in INSN in the hash table. 1923 If it is already present, record it as the last occurrence in INSN's 1924 basic block. 1925 1926 MODE is the mode of the value X is being stored into. 1927 It is only used if X is a CONST_INT. 1928 1929 ANTIC_P is non-zero if X is an anticipatable expression. 1930 AVAIL_P is non-zero if X is an available expression. */ 1931 1932static void 1933insert_expr_in_table (x, mode, insn, antic_p, avail_p) 1934 rtx x; 1935 enum machine_mode mode; 1936 rtx insn; 1937 int antic_p, avail_p; 1938{ 1939 int found, do_not_record_p; 1940 unsigned int hash; 1941 struct expr *cur_expr, *last_expr = NULL; 1942 struct occr *antic_occr, *avail_occr; 1943 struct occr *last_occr = NULL; 1944 1945 hash = hash_expr (x, mode, &do_not_record_p, expr_hash_table_size); 1946 1947 /* Do not insert expression in table if it contains volatile operands, 1948 or if hash_expr determines the expression is something we don't want 1949 to or can't handle. */ 1950 if (do_not_record_p) 1951 return; 1952 1953 cur_expr = expr_hash_table[hash]; 1954 found = 0; 1955 1956 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x))) 1957 { 1958 /* If the expression isn't found, save a pointer to the end of 1959 the list. */ 1960 last_expr = cur_expr; 1961 cur_expr = cur_expr->next_same_hash; 1962 } 1963 1964 if (! found) 1965 { 1966 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr)); 1967 bytes_used += sizeof (struct expr); 1968 if (expr_hash_table[hash] == NULL) 1969 /* This is the first pattern that hashed to this index. */ 1970 expr_hash_table[hash] = cur_expr; 1971 else 1972 /* Add EXPR to end of this hash chain. */ 1973 last_expr->next_same_hash = cur_expr; 1974 1975 /* Set the fields of the expr element. */ 1976 cur_expr->expr = x; 1977 cur_expr->bitmap_index = n_exprs++; 1978 cur_expr->next_same_hash = NULL; 1979 cur_expr->antic_occr = NULL; 1980 cur_expr->avail_occr = NULL; 1981 } 1982 1983 /* Now record the occurrence(s). */ 1984 if (antic_p) 1985 { 1986 antic_occr = cur_expr->antic_occr; 1987 1988 /* Search for another occurrence in the same basic block. */ 1989 while (antic_occr && BLOCK_NUM (antic_occr->insn) != BLOCK_NUM (insn)) 1990 { 1991 /* If an occurrence isn't found, save a pointer to the end of 1992 the list. */ 1993 last_occr = antic_occr; 1994 antic_occr = antic_occr->next; 1995 } 1996 1997 if (antic_occr) 1998 /* Found another instance of the expression in the same basic block. 1999 Prefer the currently recorded one. We want the first one in the 2000 block and the block is scanned from start to end. */ 2001 ; /* nothing to do */ 2002 else 2003 { 2004 /* First occurrence of this expression in this basic block. */ 2005 antic_occr = (struct occr *) gcse_alloc (sizeof (struct occr)); 2006 bytes_used += sizeof (struct occr); 2007 /* First occurrence of this expression in any block? */ 2008 if (cur_expr->antic_occr == NULL) 2009 cur_expr->antic_occr = antic_occr; 2010 else 2011 last_occr->next = antic_occr; 2012 2013 antic_occr->insn = insn; 2014 antic_occr->next = NULL; 2015 } 2016 } 2017 2018 if (avail_p) 2019 { 2020 avail_occr = cur_expr->avail_occr; 2021 2022 /* Search for another occurrence in the same basic block. */ 2023 while (avail_occr && BLOCK_NUM (avail_occr->insn) != BLOCK_NUM (insn)) 2024 { 2025 /* If an occurrence isn't found, save a pointer to the end of 2026 the list. */ 2027 last_occr = avail_occr; 2028 avail_occr = avail_occr->next; 2029 } 2030 2031 if (avail_occr) 2032 /* Found another instance of the expression in the same basic block. 2033 Prefer this occurrence to the currently recorded one. We want 2034 the last one in the block and the block is scanned from start 2035 to end. */ 2036 avail_occr->insn = insn; 2037 else 2038 { 2039 /* First occurrence of this expression in this basic block. */ 2040 avail_occr = (struct occr *) gcse_alloc (sizeof (struct occr)); 2041 bytes_used += sizeof (struct occr); 2042 2043 /* First occurrence of this expression in any block? */ 2044 if (cur_expr->avail_occr == NULL) 2045 cur_expr->avail_occr = avail_occr; 2046 else 2047 last_occr->next = avail_occr; 2048 2049 avail_occr->insn = insn; 2050 avail_occr->next = NULL; 2051 } 2052 } 2053} 2054 2055/* Insert pattern X in INSN in the hash table. 2056 X is a SET of a reg to either another reg or a constant. 2057 If it is already present, record it as the last occurrence in INSN's 2058 basic block. */ 2059 2060static void 2061insert_set_in_table (x, insn) 2062 rtx x; 2063 rtx insn; 2064{ 2065 int found; 2066 unsigned int hash; 2067 struct expr *cur_expr, *last_expr = NULL; 2068 struct occr *cur_occr, *last_occr = NULL; 2069 2070 if (GET_CODE (x) != SET 2071 || GET_CODE (SET_DEST (x)) != REG) 2072 abort (); 2073 2074 hash = hash_set (REGNO (SET_DEST (x)), set_hash_table_size); 2075 2076 cur_expr = set_hash_table[hash]; 2077 found = 0; 2078 2079 while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x))) 2080 { 2081 /* If the expression isn't found, save a pointer to the end of 2082 the list. */ 2083 last_expr = cur_expr; 2084 cur_expr = cur_expr->next_same_hash; 2085 } 2086 2087 if (! found) 2088 { 2089 cur_expr = (struct expr *) gcse_alloc (sizeof (struct expr)); 2090 bytes_used += sizeof (struct expr); 2091 if (set_hash_table[hash] == NULL) 2092 /* This is the first pattern that hashed to this index. */ 2093 set_hash_table[hash] = cur_expr; 2094 else 2095 /* Add EXPR to end of this hash chain. */ 2096 last_expr->next_same_hash = cur_expr; 2097 2098 /* Set the fields of the expr element. 2099 We must copy X because it can be modified when copy propagation is 2100 performed on its operands. */ 2101 cur_expr->expr = copy_rtx (x); 2102 cur_expr->bitmap_index = n_sets++; 2103 cur_expr->next_same_hash = NULL; 2104 cur_expr->antic_occr = NULL; 2105 cur_expr->avail_occr = NULL; 2106 } 2107 2108 /* Now record the occurrence. */ 2109 cur_occr = cur_expr->avail_occr; 2110 2111 /* Search for another occurrence in the same basic block. */ 2112 while (cur_occr && BLOCK_NUM (cur_occr->insn) != BLOCK_NUM (insn)) 2113 { 2114 /* If an occurrence isn't found, save a pointer to the end of 2115 the list. */ 2116 last_occr = cur_occr; 2117 cur_occr = cur_occr->next; 2118 } 2119 2120 if (cur_occr) 2121 /* Found another instance of the expression in the same basic block. 2122 Prefer this occurrence to the currently recorded one. We want the 2123 last one in the block and the block is scanned from start to end. */ 2124 cur_occr->insn = insn; 2125 else 2126 { 2127 /* First occurrence of this expression in this basic block. */ 2128 cur_occr = (struct occr *) gcse_alloc (sizeof (struct occr)); 2129 bytes_used += sizeof (struct occr); 2130 2131 /* First occurrence of this expression in any block? */ 2132 if (cur_expr->avail_occr == NULL) 2133 cur_expr->avail_occr = cur_occr; 2134 else 2135 last_occr->next = cur_occr; 2136 2137 cur_occr->insn = insn; 2138 cur_occr->next = NULL; 2139 } 2140} 2141 2142/* Scan pattern PAT of INSN and add an entry to the hash table. If SET_P is 2143 non-zero, this is for the assignment hash table, otherwise it is for the 2144 expression hash table. */ 2145 2146static void 2147hash_scan_set (pat, insn, set_p) 2148 rtx pat, insn; 2149 int set_p; 2150{ 2151 rtx src = SET_SRC (pat); 2152 rtx dest = SET_DEST (pat); 2153 rtx note; 2154 2155 if (GET_CODE (src) == CALL) 2156 hash_scan_call (src, insn); 2157 2158 else if (GET_CODE (dest) == REG) 2159 { 2160 unsigned int regno = REGNO (dest); 2161 rtx tmp; 2162 2163 /* If this is a single set and we are doing constant propagation, 2164 see if a REG_NOTE shows this equivalent to a constant. */ 2165 if (set_p && (note = find_reg_equal_equiv_note (insn)) != 0 2166 && CONSTANT_P (XEXP (note, 0))) 2167 src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src); 2168 2169 /* Only record sets of pseudo-regs in the hash table. */ 2170 if (! set_p 2171 && regno >= FIRST_PSEUDO_REGISTER 2172 /* Don't GCSE something if we can't do a reg/reg copy. */ 2173 && can_copy_p [GET_MODE (dest)] 2174 /* Is SET_SRC something we want to gcse? */ 2175 && want_to_gcse_p (src) 2176 /* Don't CSE a nop. */ 2177 && ! set_noop_p (pat) 2178 /* Don't GCSE if it has attached REG_EQUIV note. 2179 At this point this only function parameters should have 2180 REG_EQUIV notes and if the argument slot is used somewhere 2181 explicitly, it means address of parameter has been taken, 2182 so we should not extend the lifetime of the pseudo. */ 2183 && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0 2184 || GET_CODE (XEXP (note, 0)) != MEM)) 2185 { 2186 /* An expression is not anticipatable if its operands are 2187 modified before this insn or if this is not the only SET in 2188 this insn. */ 2189 int antic_p = oprs_anticipatable_p (src, insn) && single_set (insn); 2190 /* An expression is not available if its operands are 2191 subsequently modified, including this insn. It's also not 2192 available if this is a branch, because we can't insert 2193 a set after the branch. */ 2194 int avail_p = (oprs_available_p (src, insn) 2195 && ! JUMP_P (insn)); 2196 2197 insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p); 2198 } 2199 2200 /* Record sets for constant/copy propagation. */ 2201 else if (set_p 2202 && regno >= FIRST_PSEUDO_REGISTER 2203 && ((GET_CODE (src) == REG 2204 && REGNO (src) >= FIRST_PSEUDO_REGISTER 2205 && can_copy_p [GET_MODE (dest)] 2206 && REGNO (src) != regno) 2207 || CONSTANT_P (src)) 2208 /* A copy is not available if its src or dest is subsequently 2209 modified. Here we want to search from INSN+1 on, but 2210 oprs_available_p searches from INSN on. */ 2211 && (insn == BLOCK_END (BLOCK_NUM (insn)) 2212 || ((tmp = next_nonnote_insn (insn)) != NULL_RTX 2213 && oprs_available_p (pat, tmp)))) 2214 insert_set_in_table (pat, insn); 2215 } 2216} 2217 2218static void 2219hash_scan_clobber (x, insn) 2220 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED; 2221{ 2222 /* Currently nothing to do. */ 2223} 2224 2225static void 2226hash_scan_call (x, insn) 2227 rtx x ATTRIBUTE_UNUSED, insn ATTRIBUTE_UNUSED; 2228{ 2229 /* Currently nothing to do. */ 2230} 2231 2232/* Process INSN and add hash table entries as appropriate. 2233 2234 Only available expressions that set a single pseudo-reg are recorded. 2235 2236 Single sets in a PARALLEL could be handled, but it's an extra complication 2237 that isn't dealt with right now. The trick is handling the CLOBBERs that 2238 are also in the PARALLEL. Later. 2239 2240 If SET_P is non-zero, this is for the assignment hash table, 2241 otherwise it is for the expression hash table. 2242 If IN_LIBCALL_BLOCK nonzero, we are in a libcall block, and should 2243 not record any expressions. */ 2244 2245static void 2246hash_scan_insn (insn, set_p, in_libcall_block) 2247 rtx insn; 2248 int set_p; 2249 int in_libcall_block; 2250{ 2251 rtx pat = PATTERN (insn); 2252 int i; 2253 2254 if (in_libcall_block) 2255 return; 2256 2257 /* Pick out the sets of INSN and for other forms of instructions record 2258 what's been modified. */ 2259 2260 if (GET_CODE (pat) == SET) 2261 hash_scan_set (pat, insn, set_p); 2262 else if (GET_CODE (pat) == PARALLEL) 2263 for (i = 0; i < XVECLEN (pat, 0); i++) 2264 { 2265 rtx x = XVECEXP (pat, 0, i); 2266 2267 if (GET_CODE (x) == SET) 2268 hash_scan_set (x, insn, set_p); 2269 else if (GET_CODE (x) == CLOBBER) 2270 hash_scan_clobber (x, insn); 2271 else if (GET_CODE (x) == CALL) 2272 hash_scan_call (x, insn); 2273 } 2274 2275 else if (GET_CODE (pat) == CLOBBER) 2276 hash_scan_clobber (pat, insn); 2277 else if (GET_CODE (pat) == CALL) 2278 hash_scan_call (pat, insn); 2279} 2280 2281static void 2282dump_hash_table (file, name, table, table_size, total_size) 2283 FILE *file; 2284 const char *name; 2285 struct expr **table; 2286 int table_size, total_size; 2287{ 2288 int i; 2289 /* Flattened out table, so it's printed in proper order. */ 2290 struct expr **flat_table; 2291 unsigned int *hash_val; 2292 struct expr *expr; 2293 2294 flat_table 2295 = (struct expr **) xcalloc (total_size, sizeof (struct expr *)); 2296 hash_val = (unsigned int *) xmalloc (total_size * sizeof (unsigned int)); 2297 2298 for (i = 0; i < table_size; i++) 2299 for (expr = table[i]; expr != NULL; expr = expr->next_same_hash) 2300 { 2301 flat_table[expr->bitmap_index] = expr; 2302 hash_val[expr->bitmap_index] = i; 2303 } 2304 2305 fprintf (file, "%s hash table (%d buckets, %d entries)\n", 2306 name, table_size, total_size); 2307 2308 for (i = 0; i < total_size; i++) 2309 if (flat_table[i] != 0) 2310 { 2311 expr = flat_table[i]; 2312 fprintf (file, "Index %d (hash value %d)\n ", 2313 expr->bitmap_index, hash_val[i]); 2314 print_rtl (file, expr->expr); 2315 fprintf (file, "\n"); 2316 } 2317 2318 fprintf (file, "\n"); 2319 2320 free (flat_table); 2321 free (hash_val); 2322} 2323 2324/* Record register first/last/block set information for REGNO in INSN. 2325 2326 first_set records the first place in the block where the register 2327 is set and is used to compute "anticipatability". 2328 2329 last_set records the last place in the block where the register 2330 is set and is used to compute "availability". 2331 2332 last_bb records the block for which first_set and last_set are 2333 valid, as a quick test to invalidate them. 2334 2335 reg_set_in_block records whether the register is set in the block 2336 and is used to compute "transparency". */ 2337 2338static void 2339record_last_reg_set_info (insn, regno) 2340 rtx insn; 2341 int regno; 2342{ 2343 struct reg_avail_info *info = ®_avail_info[regno]; 2344 int cuid = INSN_CUID (insn); 2345 2346 info->last_set = cuid; 2347 if (info->last_bb != current_bb) 2348 { 2349 info->last_bb = current_bb; 2350 info->first_set = cuid; 2351 SET_BIT (reg_set_in_block[current_bb], regno); 2352 } 2353} 2354 2355 2356/* Record all of the canonicalized MEMs of record_last_mem_set_info's insn. 2357 Note we store a pair of elements in the list, so they have to be 2358 taken off pairwise. */ 2359 2360static void 2361canon_list_insert (dest, unused1, v_insn) 2362 rtx dest ATTRIBUTE_UNUSED; 2363 rtx unused1 ATTRIBUTE_UNUSED; 2364 void * v_insn; 2365{ 2366 rtx dest_addr, insn; 2367 2368 while (GET_CODE (dest) == SUBREG 2369 || GET_CODE (dest) == ZERO_EXTRACT 2370 || GET_CODE (dest) == SIGN_EXTRACT 2371 || GET_CODE (dest) == STRICT_LOW_PART) 2372 dest = XEXP (dest, 0); 2373 2374 /* If DEST is not a MEM, then it will not conflict with a load. Note 2375 that function calls are assumed to clobber memory, but are handled 2376 elsewhere. */ 2377 2378 if (GET_CODE (dest) != MEM) 2379 return; 2380 2381 dest_addr = get_addr (XEXP (dest, 0)); 2382 dest_addr = canon_rtx (dest_addr); 2383 insn = (rtx) v_insn; 2384 2385 canon_modify_mem_list[BLOCK_NUM (insn)] = 2386 alloc_INSN_LIST (dest_addr, canon_modify_mem_list[BLOCK_NUM (insn)]); 2387 canon_modify_mem_list[BLOCK_NUM (insn)] = 2388 alloc_INSN_LIST (dest, canon_modify_mem_list[BLOCK_NUM (insn)]); 2389 bitmap_set_bit (canon_modify_mem_list_set, BLOCK_NUM (insn)); 2390} 2391 2392/* Record memory modification information for INSN. We do not actually care 2393 about the memory location(s) that are set, or even how they are set (consider 2394 a CALL_INSN). We merely need to record which insns modify memory. */ 2395 2396static void 2397record_last_mem_set_info (insn) 2398 rtx insn; 2399{ 2400 /* load_killed_in_block_p will handle the case of calls clobbering 2401 everything. */ 2402 modify_mem_list[BLOCK_NUM (insn)] = 2403 alloc_INSN_LIST (insn, modify_mem_list[BLOCK_NUM (insn)]); 2404 bitmap_set_bit (modify_mem_list_set, BLOCK_NUM (insn)); 2405 2406 if (GET_CODE (insn) == CALL_INSN) 2407 { 2408 /* Note that traversals of this loop (other than for free-ing) 2409 will break after encountering a CALL_INSN. So, there's no 2410 need to insert a pair of items, as canon_list_insert does. */ 2411 canon_modify_mem_list[BLOCK_NUM (insn)] = 2412 alloc_INSN_LIST (insn, canon_modify_mem_list[BLOCK_NUM (insn)]); 2413 bitmap_set_bit (canon_modify_mem_list_set, BLOCK_NUM (insn)); 2414 } 2415 else 2416 note_stores (PATTERN (insn), canon_list_insert, (void*) insn ); 2417} 2418 2419/* Called from compute_hash_table via note_stores to handle one 2420 SET or CLOBBER in an insn. DATA is really the instruction in which 2421 the SET is taking place. */ 2422 2423static void 2424record_last_set_info (dest, setter, data) 2425 rtx dest, setter ATTRIBUTE_UNUSED; 2426 void *data; 2427{ 2428 rtx last_set_insn = (rtx) data; 2429 2430 if (GET_CODE (dest) == SUBREG) 2431 dest = SUBREG_REG (dest); 2432 2433 if (GET_CODE (dest) == REG) 2434 record_last_reg_set_info (last_set_insn, REGNO (dest)); 2435 else if (GET_CODE (dest) == MEM 2436 /* Ignore pushes, they clobber nothing. */ 2437 && ! push_operand (dest, GET_MODE (dest))) 2438 record_last_mem_set_info (last_set_insn); 2439} 2440 2441/* Top level function to create an expression or assignment hash table. 2442 2443 Expression entries are placed in the hash table if 2444 - they are of the form (set (pseudo-reg) src), 2445 - src is something we want to perform GCSE on, 2446 - none of the operands are subsequently modified in the block 2447 2448 Assignment entries are placed in the hash table if 2449 - they are of the form (set (pseudo-reg) src), 2450 - src is something we want to perform const/copy propagation on, 2451 - none of the operands or target are subsequently modified in the block 2452 2453 Currently src must be a pseudo-reg or a const_int. 2454 2455 F is the first insn. 2456 SET_P is non-zero for computing the assignment hash table. */ 2457 2458static void 2459compute_hash_table (set_p) 2460 int set_p; 2461{ 2462 unsigned int i; 2463 2464 /* While we compute the hash table we also compute a bit array of which 2465 registers are set in which blocks. 2466 ??? This isn't needed during const/copy propagation, but it's cheap to 2467 compute. Later. */ 2468 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks); 2469 2470 /* re-Cache any INSN_LIST nodes we have allocated. */ 2471 clear_modify_mem_tables (); 2472 /* Some working arrays used to track first and last set in each block. */ 2473 reg_avail_info = (struct reg_avail_info*) 2474 gmalloc (max_gcse_regno * sizeof (struct reg_avail_info)); 2475 2476 for (i = 0; i < max_gcse_regno; ++i) 2477 reg_avail_info[i].last_bb = NEVER_SET; 2478 2479 for (current_bb = 0; current_bb < n_basic_blocks; current_bb++) 2480 { 2481 rtx insn; 2482 unsigned int regno; 2483 int in_libcall_block; 2484 2485 /* First pass over the instructions records information used to 2486 determine when registers and memory are first and last set. 2487 ??? hard-reg reg_set_in_block computation 2488 could be moved to compute_sets since they currently don't change. */ 2489 2490 for (insn = BLOCK_HEAD (current_bb); 2491 insn && insn != NEXT_INSN (BLOCK_END (current_bb)); 2492 insn = NEXT_INSN (insn)) 2493 { 2494 if (! INSN_P (insn)) 2495 continue; 2496 2497 if (GET_CODE (insn) == CALL_INSN) 2498 { 2499 bool clobbers_all = false; 2500#ifdef NON_SAVING_SETJMP 2501 if (NON_SAVING_SETJMP 2502 && find_reg_note (insn, REG_SETJMP, NULL_RTX)) 2503 clobbers_all = true; 2504#endif 2505 2506 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 2507 if (clobbers_all 2508 || TEST_HARD_REG_BIT (regs_invalidated_by_call, regno)) 2509 record_last_reg_set_info (insn, regno); 2510 2511 mark_call (insn); 2512 } 2513 2514 note_stores (PATTERN (insn), record_last_set_info, insn); 2515 } 2516 2517 /* The next pass builds the hash table. */ 2518 2519 for (insn = BLOCK_HEAD (current_bb), in_libcall_block = 0; 2520 insn && insn != NEXT_INSN (BLOCK_END (current_bb)); 2521 insn = NEXT_INSN (insn)) 2522 if (INSN_P (insn)) 2523 { 2524 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX)) 2525 in_libcall_block = 1; 2526 else if (set_p && find_reg_note (insn, REG_RETVAL, NULL_RTX)) 2527 in_libcall_block = 0; 2528 hash_scan_insn (insn, set_p, in_libcall_block); 2529 if (!set_p && find_reg_note (insn, REG_RETVAL, NULL_RTX)) 2530 in_libcall_block = 0; 2531 } 2532 } 2533 2534 free (reg_avail_info); 2535 reg_avail_info = NULL; 2536} 2537 2538/* Allocate space for the set hash table. 2539 N_INSNS is the number of instructions in the function. 2540 It is used to determine the number of buckets to use. */ 2541 2542static void 2543alloc_set_hash_table (n_insns) 2544 int n_insns; 2545{ 2546 int n; 2547 2548 set_hash_table_size = n_insns / 4; 2549 if (set_hash_table_size < 11) 2550 set_hash_table_size = 11; 2551 2552 /* Attempt to maintain efficient use of hash table. 2553 Making it an odd number is simplest for now. 2554 ??? Later take some measurements. */ 2555 set_hash_table_size |= 1; 2556 n = set_hash_table_size * sizeof (struct expr *); 2557 set_hash_table = (struct expr **) gmalloc (n); 2558} 2559 2560/* Free things allocated by alloc_set_hash_table. */ 2561 2562static void 2563free_set_hash_table () 2564{ 2565 free (set_hash_table); 2566} 2567 2568/* Compute the hash table for doing copy/const propagation. */ 2569 2570static void 2571compute_set_hash_table () 2572{ 2573 /* Initialize count of number of entries in hash table. */ 2574 n_sets = 0; 2575 memset ((char *) set_hash_table, 0, 2576 set_hash_table_size * sizeof (struct expr *)); 2577 2578 compute_hash_table (1); 2579} 2580 2581/* Allocate space for the expression hash table. 2582 N_INSNS is the number of instructions in the function. 2583 It is used to determine the number of buckets to use. */ 2584 2585static void 2586alloc_expr_hash_table (n_insns) 2587 unsigned int n_insns; 2588{ 2589 int n; 2590 2591 expr_hash_table_size = n_insns / 2; 2592 /* Make sure the amount is usable. */ 2593 if (expr_hash_table_size < 11) 2594 expr_hash_table_size = 11; 2595 2596 /* Attempt to maintain efficient use of hash table. 2597 Making it an odd number is simplest for now. 2598 ??? Later take some measurements. */ 2599 expr_hash_table_size |= 1; 2600 n = expr_hash_table_size * sizeof (struct expr *); 2601 expr_hash_table = (struct expr **) gmalloc (n); 2602} 2603 2604/* Free things allocated by alloc_expr_hash_table. */ 2605 2606static void 2607free_expr_hash_table () 2608{ 2609 free (expr_hash_table); 2610} 2611 2612/* Compute the hash table for doing GCSE. */ 2613 2614static void 2615compute_expr_hash_table () 2616{ 2617 /* Initialize count of number of entries in hash table. */ 2618 n_exprs = 0; 2619 memset ((char *) expr_hash_table, 0, 2620 expr_hash_table_size * sizeof (struct expr *)); 2621 2622 compute_hash_table (0); 2623} 2624 2625/* Expression tracking support. */ 2626 2627/* Lookup pattern PAT in the expression table. 2628 The result is a pointer to the table entry, or NULL if not found. */ 2629 2630static struct expr * 2631lookup_expr (pat) 2632 rtx pat; 2633{ 2634 int do_not_record_p; 2635 unsigned int hash = hash_expr (pat, GET_MODE (pat), &do_not_record_p, 2636 expr_hash_table_size); 2637 struct expr *expr; 2638 2639 if (do_not_record_p) 2640 return NULL; 2641 2642 expr = expr_hash_table[hash]; 2643 2644 while (expr && ! expr_equiv_p (expr->expr, pat)) 2645 expr = expr->next_same_hash; 2646 2647 return expr; 2648} 2649 2650/* Lookup REGNO in the set table. If PAT is non-NULL look for the entry that 2651 matches it, otherwise return the first entry for REGNO. The result is a 2652 pointer to the table entry, or NULL if not found. */ 2653 2654static struct expr * 2655lookup_set (regno, pat) 2656 unsigned int regno; 2657 rtx pat; 2658{ 2659 unsigned int hash = hash_set (regno, set_hash_table_size); 2660 struct expr *expr; 2661 2662 expr = set_hash_table[hash]; 2663 2664 if (pat) 2665 { 2666 while (expr && ! expr_equiv_p (expr->expr, pat)) 2667 expr = expr->next_same_hash; 2668 } 2669 else 2670 { 2671 while (expr && REGNO (SET_DEST (expr->expr)) != regno) 2672 expr = expr->next_same_hash; 2673 } 2674 2675 return expr; 2676} 2677 2678/* Return the next entry for REGNO in list EXPR. */ 2679 2680static struct expr * 2681next_set (regno, expr) 2682 unsigned int regno; 2683 struct expr *expr; 2684{ 2685 do 2686 expr = expr->next_same_hash; 2687 while (expr && REGNO (SET_DEST (expr->expr)) != regno); 2688 2689 return expr; 2690} 2691 2692/* Clear canon_modify_mem_list and modify_mem_list tables. */ 2693static void 2694clear_modify_mem_tables () 2695{ 2696 int i; 2697 2698 EXECUTE_IF_SET_IN_BITMAP 2699 (canon_modify_mem_list_set, 0, i, 2700 free_INSN_LIST_list (modify_mem_list + i)); 2701 bitmap_clear (canon_modify_mem_list_set); 2702 2703 EXECUTE_IF_SET_IN_BITMAP 2704 (canon_modify_mem_list_set, 0, i, 2705 free_INSN_LIST_list (canon_modify_mem_list + i)); 2706 bitmap_clear (modify_mem_list_set); 2707} 2708 2709/* Release memory used by modify_mem_list_set and canon_modify_mem_list_set. */ 2710 2711static void 2712free_modify_mem_tables () 2713{ 2714 clear_modify_mem_tables (); 2715 free (modify_mem_list); 2716 free (canon_modify_mem_list); 2717 modify_mem_list = 0; 2718 canon_modify_mem_list = 0; 2719} 2720 2721/* Reset tables used to keep track of what's still available [since the 2722 start of the block]. */ 2723 2724static void 2725reset_opr_set_tables () 2726{ 2727 /* Maintain a bitmap of which regs have been set since beginning of 2728 the block. */ 2729 CLEAR_REG_SET (reg_set_bitmap); 2730 2731 /* Also keep a record of the last instruction to modify memory. 2732 For now this is very trivial, we only record whether any memory 2733 location has been modified. */ 2734 clear_modify_mem_tables (); 2735} 2736 2737/* Return non-zero if the operands of X are not set before INSN in 2738 INSN's basic block. */ 2739 2740static int 2741oprs_not_set_p (x, insn) 2742 rtx x, insn; 2743{ 2744 int i, j; 2745 enum rtx_code code; 2746 const char *fmt; 2747 2748 if (x == 0) 2749 return 1; 2750 2751 code = GET_CODE (x); 2752 switch (code) 2753 { 2754 case PC: 2755 case CC0: 2756 case CONST: 2757 case CONST_INT: 2758 case CONST_DOUBLE: 2759 case SYMBOL_REF: 2760 case LABEL_REF: 2761 case ADDR_VEC: 2762 case ADDR_DIFF_VEC: 2763 return 1; 2764 2765 case MEM: 2766 if (load_killed_in_block_p (BLOCK_FOR_INSN (insn), 2767 INSN_CUID (insn), x, 0)) 2768 return 0; 2769 else 2770 return oprs_not_set_p (XEXP (x, 0), insn); 2771 2772 case REG: 2773 return ! REGNO_REG_SET_P (reg_set_bitmap, REGNO (x)); 2774 2775 default: 2776 break; 2777 } 2778 2779 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) 2780 { 2781 if (fmt[i] == 'e') 2782 { 2783 /* If we are about to do the last recursive call 2784 needed at this level, change it into iteration. 2785 This function is called enough to be worth it. */ 2786 if (i == 0) 2787 return oprs_not_set_p (XEXP (x, i), insn); 2788 2789 if (! oprs_not_set_p (XEXP (x, i), insn)) 2790 return 0; 2791 } 2792 else if (fmt[i] == 'E') 2793 for (j = 0; j < XVECLEN (x, i); j++) 2794 if (! oprs_not_set_p (XVECEXP (x, i, j), insn)) 2795 return 0; 2796 } 2797 2798 return 1; 2799} 2800 2801/* Mark things set by a CALL. */ 2802 2803static void 2804mark_call (insn) 2805 rtx insn; 2806{ 2807 if (! CONST_OR_PURE_CALL_P (insn)) 2808 record_last_mem_set_info (insn); 2809} 2810 2811/* Mark things set by a SET. */ 2812 2813static void 2814mark_set (pat, insn) 2815 rtx pat, insn; 2816{ 2817 rtx dest = SET_DEST (pat); 2818 2819 while (GET_CODE (dest) == SUBREG 2820 || GET_CODE (dest) == ZERO_EXTRACT 2821 || GET_CODE (dest) == SIGN_EXTRACT 2822 || GET_CODE (dest) == STRICT_LOW_PART) 2823 dest = XEXP (dest, 0); 2824 2825 if (GET_CODE (dest) == REG) 2826 SET_REGNO_REG_SET (reg_set_bitmap, REGNO (dest)); 2827 else if (GET_CODE (dest) == MEM) 2828 record_last_mem_set_info (insn); 2829 2830 if (GET_CODE (SET_SRC (pat)) == CALL) 2831 mark_call (insn); 2832} 2833 2834/* Record things set by a CLOBBER. */ 2835 2836static void 2837mark_clobber (pat, insn) 2838 rtx pat, insn; 2839{ 2840 rtx clob = XEXP (pat, 0); 2841 2842 while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART) 2843 clob = XEXP (clob, 0); 2844 2845 if (GET_CODE (clob) == REG) 2846 SET_REGNO_REG_SET (reg_set_bitmap, REGNO (clob)); 2847 else 2848 record_last_mem_set_info (insn); 2849} 2850 2851/* Record things set by INSN. 2852 This data is used by oprs_not_set_p. */ 2853 2854static void 2855mark_oprs_set (insn) 2856 rtx insn; 2857{ 2858 rtx pat = PATTERN (insn); 2859 int i; 2860 2861 if (GET_CODE (pat) == SET) 2862 mark_set (pat, insn); 2863 else if (GET_CODE (pat) == PARALLEL) 2864 for (i = 0; i < XVECLEN (pat, 0); i++) 2865 { 2866 rtx x = XVECEXP (pat, 0, i); 2867 2868 if (GET_CODE (x) == SET) 2869 mark_set (x, insn); 2870 else if (GET_CODE (x) == CLOBBER) 2871 mark_clobber (x, insn); 2872 else if (GET_CODE (x) == CALL) 2873 mark_call (insn); 2874 } 2875 2876 else if (GET_CODE (pat) == CLOBBER) 2877 mark_clobber (pat, insn); 2878 else if (GET_CODE (pat) == CALL) 2879 mark_call (insn); 2880} 2881 2882 2883/* Classic GCSE reaching definition support. */ 2884 2885/* Allocate reaching def variables. */ 2886 2887static void 2888alloc_rd_mem (n_blocks, n_insns) 2889 int n_blocks, n_insns; 2890{ 2891 rd_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns); 2892 sbitmap_vector_zero (rd_kill, n_basic_blocks); 2893 2894 rd_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns); 2895 sbitmap_vector_zero (rd_gen, n_basic_blocks); 2896 2897 reaching_defs = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns); 2898 sbitmap_vector_zero (reaching_defs, n_basic_blocks); 2899 2900 rd_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_insns); 2901 sbitmap_vector_zero (rd_out, n_basic_blocks); 2902} 2903 2904/* Free reaching def variables. */ 2905 2906static void 2907free_rd_mem () 2908{ 2909 sbitmap_vector_free (rd_kill); 2910 sbitmap_vector_free (rd_gen); 2911 sbitmap_vector_free (reaching_defs); 2912 sbitmap_vector_free (rd_out); 2913} 2914 2915/* Add INSN to the kills of BB. REGNO, set in BB, is killed by INSN. */ 2916 2917static void 2918handle_rd_kill_set (insn, regno, bb) 2919 rtx insn; 2920 int regno; 2921 basic_block bb; 2922{ 2923 struct reg_set *this_reg; 2924 2925 for (this_reg = reg_set_table[regno]; this_reg; this_reg = this_reg ->next) 2926 if (BLOCK_NUM (this_reg->insn) != BLOCK_NUM (insn)) 2927 SET_BIT (rd_kill[bb->index], INSN_CUID (this_reg->insn)); 2928} 2929 2930/* Compute the set of kill's for reaching definitions. */ 2931 2932static void 2933compute_kill_rd () 2934{ 2935 int bb, cuid; 2936 unsigned int regno; 2937 int i; 2938 2939 /* For each block 2940 For each set bit in `gen' of the block (i.e each insn which 2941 generates a definition in the block) 2942 Call the reg set by the insn corresponding to that bit regx 2943 Look at the linked list starting at reg_set_table[regx] 2944 For each setting of regx in the linked list, which is not in 2945 this block 2946 Set the bit in `kill' corresponding to that insn. */ 2947 for (bb = 0; bb < n_basic_blocks; bb++) 2948 for (cuid = 0; cuid < max_cuid; cuid++) 2949 if (TEST_BIT (rd_gen[bb], cuid)) 2950 { 2951 rtx insn = CUID_INSN (cuid); 2952 rtx pat = PATTERN (insn); 2953 2954 if (GET_CODE (insn) == CALL_INSN) 2955 { 2956 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 2957 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno)) 2958 handle_rd_kill_set (insn, regno, BASIC_BLOCK (bb)); 2959 } 2960 2961 if (GET_CODE (pat) == PARALLEL) 2962 { 2963 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--) 2964 { 2965 enum rtx_code code = GET_CODE (XVECEXP (pat, 0, i)); 2966 2967 if ((code == SET || code == CLOBBER) 2968 && GET_CODE (XEXP (XVECEXP (pat, 0, i), 0)) == REG) 2969 handle_rd_kill_set (insn, 2970 REGNO (XEXP (XVECEXP (pat, 0, i), 0)), 2971 BASIC_BLOCK (bb)); 2972 } 2973 } 2974 else if (GET_CODE (pat) == SET && GET_CODE (SET_DEST (pat)) == REG) 2975 /* Each setting of this register outside of this block 2976 must be marked in the set of kills in this block. */ 2977 handle_rd_kill_set (insn, REGNO (SET_DEST (pat)), BASIC_BLOCK (bb)); 2978 } 2979} 2980 2981/* Compute the reaching definitions as in 2982 Compilers Principles, Techniques, and Tools. Aho, Sethi, Ullman, 2983 Chapter 10. It is the same algorithm as used for computing available 2984 expressions but applied to the gens and kills of reaching definitions. */ 2985 2986static void 2987compute_rd () 2988{ 2989 int bb, changed, passes; 2990 2991 for (bb = 0; bb < n_basic_blocks; bb++) 2992 sbitmap_copy (rd_out[bb] /*dst*/, rd_gen[bb] /*src*/); 2993 2994 passes = 0; 2995 changed = 1; 2996 while (changed) 2997 { 2998 changed = 0; 2999 for (bb = 0; bb < n_basic_blocks; bb++) 3000 { 3001 sbitmap_union_of_preds (reaching_defs[bb], rd_out, bb); 3002 changed |= sbitmap_union_of_diff (rd_out[bb], rd_gen[bb], 3003 reaching_defs[bb], rd_kill[bb]); 3004 } 3005 passes++; 3006 } 3007 3008 if (gcse_file) 3009 fprintf (gcse_file, "reaching def computation: %d passes\n", passes); 3010} 3011 3012/* Classic GCSE available expression support. */ 3013 3014/* Allocate memory for available expression computation. */ 3015 3016static void 3017alloc_avail_expr_mem (n_blocks, n_exprs) 3018 int n_blocks, n_exprs; 3019{ 3020 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs); 3021 sbitmap_vector_zero (ae_kill, n_basic_blocks); 3022 3023 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs); 3024 sbitmap_vector_zero (ae_gen, n_basic_blocks); 3025 3026 ae_in = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs); 3027 sbitmap_vector_zero (ae_in, n_basic_blocks); 3028 3029 ae_out = (sbitmap *) sbitmap_vector_alloc (n_blocks, n_exprs); 3030 sbitmap_vector_zero (ae_out, n_basic_blocks); 3031} 3032 3033static void 3034free_avail_expr_mem () 3035{ 3036 sbitmap_vector_free (ae_kill); 3037 sbitmap_vector_free (ae_gen); 3038 sbitmap_vector_free (ae_in); 3039 sbitmap_vector_free (ae_out); 3040} 3041 3042/* Compute the set of available expressions generated in each basic block. */ 3043 3044static void 3045compute_ae_gen () 3046{ 3047 unsigned int i; 3048 struct expr *expr; 3049 struct occr *occr; 3050 3051 /* For each recorded occurrence of each expression, set ae_gen[bb][expr]. 3052 This is all we have to do because an expression is not recorded if it 3053 is not available, and the only expressions we want to work with are the 3054 ones that are recorded. */ 3055 for (i = 0; i < expr_hash_table_size; i++) 3056 for (expr = expr_hash_table[i]; expr != 0; expr = expr->next_same_hash) 3057 for (occr = expr->avail_occr; occr != 0; occr = occr->next) 3058 SET_BIT (ae_gen[BLOCK_NUM (occr->insn)], expr->bitmap_index); 3059} 3060 3061/* Return non-zero if expression X is killed in BB. */ 3062 3063static int 3064expr_killed_p (x, bb) 3065 rtx x; 3066 basic_block bb; 3067{ 3068 int i, j; 3069 enum rtx_code code; 3070 const char *fmt; 3071 3072 if (x == 0) 3073 return 1; 3074 3075 code = GET_CODE (x); 3076 switch (code) 3077 { 3078 case REG: 3079 return TEST_BIT (reg_set_in_block[bb->index], REGNO (x)); 3080 3081 case MEM: 3082 if (load_killed_in_block_p (bb, get_max_uid () + 1, x, 0)) 3083 return 1; 3084 else 3085 return expr_killed_p (XEXP (x, 0), bb); 3086 3087 case PC: 3088 case CC0: /*FIXME*/ 3089 case CONST: 3090 case CONST_INT: 3091 case CONST_DOUBLE: 3092 case SYMBOL_REF: 3093 case LABEL_REF: 3094 case ADDR_VEC: 3095 case ADDR_DIFF_VEC: 3096 return 0; 3097 3098 default: 3099 break; 3100 } 3101 3102 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) 3103 { 3104 if (fmt[i] == 'e') 3105 { 3106 /* If we are about to do the last recursive call 3107 needed at this level, change it into iteration. 3108 This function is called enough to be worth it. */ 3109 if (i == 0) 3110 return expr_killed_p (XEXP (x, i), bb); 3111 else if (expr_killed_p (XEXP (x, i), bb)) 3112 return 1; 3113 } 3114 else if (fmt[i] == 'E') 3115 for (j = 0; j < XVECLEN (x, i); j++) 3116 if (expr_killed_p (XVECEXP (x, i, j), bb)) 3117 return 1; 3118 } 3119 3120 return 0; 3121} 3122 3123/* Compute the set of available expressions killed in each basic block. */ 3124 3125static void 3126compute_ae_kill (ae_gen, ae_kill) 3127 sbitmap *ae_gen, *ae_kill; 3128{ 3129 int bb; 3130 unsigned int i; 3131 struct expr *expr; 3132 3133 for (bb = 0; bb < n_basic_blocks; bb++) 3134 for (i = 0; i < expr_hash_table_size; i++) 3135 for (expr = expr_hash_table[i]; expr; expr = expr->next_same_hash) 3136 { 3137 /* Skip EXPR if generated in this block. */ 3138 if (TEST_BIT (ae_gen[bb], expr->bitmap_index)) 3139 continue; 3140 3141 if (expr_killed_p (expr->expr, BASIC_BLOCK (bb))) 3142 SET_BIT (ae_kill[bb], expr->bitmap_index); 3143 } 3144} 3145 3146/* Actually perform the Classic GCSE optimizations. */ 3147 3148/* Return non-zero if occurrence OCCR of expression EXPR reaches block BB. 3149 3150 CHECK_SELF_LOOP is non-zero if we should consider a block reaching itself 3151 as a positive reach. We want to do this when there are two computations 3152 of the expression in the block. 3153 3154 VISITED is a pointer to a working buffer for tracking which BB's have 3155 been visited. It is NULL for the top-level call. 3156 3157 We treat reaching expressions that go through blocks containing the same 3158 reaching expression as "not reaching". E.g. if EXPR is generated in blocks 3159 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block 3160 2 as not reaching. The intent is to improve the probability of finding 3161 only one reaching expression and to reduce register lifetimes by picking 3162 the closest such expression. */ 3163 3164static int 3165expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited) 3166 struct occr *occr; 3167 struct expr *expr; 3168 basic_block bb; 3169 int check_self_loop; 3170 char *visited; 3171{ 3172 edge pred; 3173 3174 for (pred = bb->pred; pred != NULL; pred = pred->pred_next) 3175 { 3176 basic_block pred_bb = pred->src; 3177 3178 if (visited[pred_bb->index]) 3179 /* This predecessor has already been visited. Nothing to do. */ 3180 ; 3181 else if (pred_bb == bb) 3182 { 3183 /* BB loops on itself. */ 3184 if (check_self_loop 3185 && TEST_BIT (ae_gen[pred_bb->index], expr->bitmap_index) 3186 && BLOCK_NUM (occr->insn) == pred_bb->index) 3187 return 1; 3188 3189 visited[pred_bb->index] = 1; 3190 } 3191 3192 /* Ignore this predecessor if it kills the expression. */ 3193 else if (TEST_BIT (ae_kill[pred_bb->index], expr->bitmap_index)) 3194 visited[pred_bb->index] = 1; 3195 3196 /* Does this predecessor generate this expression? */ 3197 else if (TEST_BIT (ae_gen[pred_bb->index], expr->bitmap_index)) 3198 { 3199 /* Is this the occurrence we're looking for? 3200 Note that there's only one generating occurrence per block 3201 so we just need to check the block number. */ 3202 if (BLOCK_NUM (occr->insn) == pred_bb->index) 3203 return 1; 3204 3205 visited[pred_bb->index] = 1; 3206 } 3207 3208 /* Neither gen nor kill. */ 3209 else 3210 { 3211 visited[pred_bb->index] = 1; 3212 if (expr_reaches_here_p_work (occr, expr, pred_bb, check_self_loop, 3213 visited)) 3214 3215 return 1; 3216 } 3217 } 3218 3219 /* All paths have been checked. */ 3220 return 0; 3221} 3222 3223/* This wrapper for expr_reaches_here_p_work() is to ensure that any 3224 memory allocated for that function is returned. */ 3225 3226static int 3227expr_reaches_here_p (occr, expr, bb, check_self_loop) 3228 struct occr *occr; 3229 struct expr *expr; 3230 basic_block bb; 3231 int check_self_loop; 3232{ 3233 int rval; 3234 char *visited = (char *) xcalloc (n_basic_blocks, 1); 3235 3236 rval = expr_reaches_here_p_work (occr, expr, bb, check_self_loop, visited); 3237 3238 free (visited); 3239 return rval; 3240} 3241 3242/* Return the instruction that computes EXPR that reaches INSN's basic block. 3243 If there is more than one such instruction, return NULL. 3244 3245 Called only by handle_avail_expr. */ 3246 3247static rtx 3248computing_insn (expr, insn) 3249 struct expr *expr; 3250 rtx insn; 3251{ 3252 basic_block bb = BLOCK_FOR_INSN (insn); 3253 3254 if (expr->avail_occr->next == NULL) 3255 { 3256 if (BLOCK_FOR_INSN (expr->avail_occr->insn) == bb) 3257 /* The available expression is actually itself 3258 (i.e. a loop in the flow graph) so do nothing. */ 3259 return NULL; 3260 3261 /* (FIXME) Case that we found a pattern that was created by 3262 a substitution that took place. */ 3263 return expr->avail_occr->insn; 3264 } 3265 else 3266 { 3267 /* Pattern is computed more than once. 3268 Search backwards from this insn to see how many of these 3269 computations actually reach this insn. */ 3270 struct occr *occr; 3271 rtx insn_computes_expr = NULL; 3272 int can_reach = 0; 3273 3274 for (occr = expr->avail_occr; occr != NULL; occr = occr->next) 3275 { 3276 if (BLOCK_FOR_INSN (occr->insn) == bb) 3277 { 3278 /* The expression is generated in this block. 3279 The only time we care about this is when the expression 3280 is generated later in the block [and thus there's a loop]. 3281 We let the normal cse pass handle the other cases. */ 3282 if (INSN_CUID (insn) < INSN_CUID (occr->insn) 3283 && expr_reaches_here_p (occr, expr, bb, 1)) 3284 { 3285 can_reach++; 3286 if (can_reach > 1) 3287 return NULL; 3288 3289 insn_computes_expr = occr->insn; 3290 } 3291 } 3292 else if (expr_reaches_here_p (occr, expr, bb, 0)) 3293 { 3294 can_reach++; 3295 if (can_reach > 1) 3296 return NULL; 3297 3298 insn_computes_expr = occr->insn; 3299 } 3300 } 3301 3302 if (insn_computes_expr == NULL) 3303 abort (); 3304 3305 return insn_computes_expr; 3306 } 3307} 3308 3309/* Return non-zero if the definition in DEF_INSN can reach INSN. 3310 Only called by can_disregard_other_sets. */ 3311 3312static int 3313def_reaches_here_p (insn, def_insn) 3314 rtx insn, def_insn; 3315{ 3316 rtx reg; 3317 3318 if (TEST_BIT (reaching_defs[BLOCK_NUM (insn)], INSN_CUID (def_insn))) 3319 return 1; 3320 3321 if (BLOCK_NUM (insn) == BLOCK_NUM (def_insn)) 3322 { 3323 if (INSN_CUID (def_insn) < INSN_CUID (insn)) 3324 { 3325 if (GET_CODE (PATTERN (def_insn)) == PARALLEL) 3326 return 1; 3327 else if (GET_CODE (PATTERN (def_insn)) == CLOBBER) 3328 reg = XEXP (PATTERN (def_insn), 0); 3329 else if (GET_CODE (PATTERN (def_insn)) == SET) 3330 reg = SET_DEST (PATTERN (def_insn)); 3331 else 3332 abort (); 3333 3334 return ! reg_set_between_p (reg, NEXT_INSN (def_insn), insn); 3335 } 3336 else 3337 return 0; 3338 } 3339 3340 return 0; 3341} 3342 3343/* Return non-zero if *ADDR_THIS_REG can only have one value at INSN. The 3344 value returned is the number of definitions that reach INSN. Returning a 3345 value of zero means that [maybe] more than one definition reaches INSN and 3346 the caller can't perform whatever optimization it is trying. i.e. it is 3347 always safe to return zero. */ 3348 3349static int 3350can_disregard_other_sets (addr_this_reg, insn, for_combine) 3351 struct reg_set **addr_this_reg; 3352 rtx insn; 3353 int for_combine; 3354{ 3355 int number_of_reaching_defs = 0; 3356 struct reg_set *this_reg; 3357 3358 for (this_reg = *addr_this_reg; this_reg != 0; this_reg = this_reg->next) 3359 if (def_reaches_here_p (insn, this_reg->insn)) 3360 { 3361 number_of_reaching_defs++; 3362 /* Ignore parallels for now. */ 3363 if (GET_CODE (PATTERN (this_reg->insn)) == PARALLEL) 3364 return 0; 3365 3366 if (!for_combine 3367 && (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER 3368 || ! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)), 3369 SET_SRC (PATTERN (insn))))) 3370 /* A setting of the reg to a different value reaches INSN. */ 3371 return 0; 3372 3373 if (number_of_reaching_defs > 1) 3374 { 3375 /* If in this setting the value the register is being set to is 3376 equal to the previous value the register was set to and this 3377 setting reaches the insn we are trying to do the substitution 3378 on then we are ok. */ 3379 if (GET_CODE (PATTERN (this_reg->insn)) == CLOBBER) 3380 return 0; 3381 else if (! rtx_equal_p (SET_SRC (PATTERN (this_reg->insn)), 3382 SET_SRC (PATTERN (insn)))) 3383 return 0; 3384 } 3385 3386 *addr_this_reg = this_reg; 3387 } 3388 3389 return number_of_reaching_defs; 3390} 3391 3392/* Expression computed by insn is available and the substitution is legal, 3393 so try to perform the substitution. 3394 3395 The result is non-zero if any changes were made. */ 3396 3397static int 3398handle_avail_expr (insn, expr) 3399 rtx insn; 3400 struct expr *expr; 3401{ 3402 rtx pat, insn_computes_expr, expr_set; 3403 rtx to; 3404 struct reg_set *this_reg; 3405 int found_setting, use_src; 3406 int changed = 0; 3407 3408 /* We only handle the case where one computation of the expression 3409 reaches this instruction. */ 3410 insn_computes_expr = computing_insn (expr, insn); 3411 if (insn_computes_expr == NULL) 3412 return 0; 3413 expr_set = single_set (insn_computes_expr); 3414 if (!expr_set) 3415 abort (); 3416 3417 found_setting = 0; 3418 use_src = 0; 3419 3420 /* At this point we know only one computation of EXPR outside of this 3421 block reaches this insn. Now try to find a register that the 3422 expression is computed into. */ 3423 if (GET_CODE (SET_SRC (expr_set)) == REG) 3424 { 3425 /* This is the case when the available expression that reaches 3426 here has already been handled as an available expression. */ 3427 unsigned int regnum_for_replacing 3428 = REGNO (SET_SRC (expr_set)); 3429 3430 /* If the register was created by GCSE we can't use `reg_set_table', 3431 however we know it's set only once. */ 3432 if (regnum_for_replacing >= max_gcse_regno 3433 /* If the register the expression is computed into is set only once, 3434 or only one set reaches this insn, we can use it. */ 3435 || (((this_reg = reg_set_table[regnum_for_replacing]), 3436 this_reg->next == NULL) 3437 || can_disregard_other_sets (&this_reg, insn, 0))) 3438 { 3439 use_src = 1; 3440 found_setting = 1; 3441 } 3442 } 3443 3444 if (!found_setting) 3445 { 3446 unsigned int regnum_for_replacing 3447 = REGNO (SET_DEST (expr_set)); 3448 3449 /* This shouldn't happen. */ 3450 if (regnum_for_replacing >= max_gcse_regno) 3451 abort (); 3452 3453 this_reg = reg_set_table[regnum_for_replacing]; 3454 3455 /* If the register the expression is computed into is set only once, 3456 or only one set reaches this insn, use it. */ 3457 if (this_reg->next == NULL 3458 || can_disregard_other_sets (&this_reg, insn, 0)) 3459 found_setting = 1; 3460 } 3461 3462 if (found_setting) 3463 { 3464 pat = PATTERN (insn); 3465 if (use_src) 3466 to = SET_SRC (expr_set); 3467 else 3468 to = SET_DEST (expr_set); 3469 changed = validate_change (insn, &SET_SRC (pat), to, 0); 3470 3471 /* We should be able to ignore the return code from validate_change but 3472 to play it safe we check. */ 3473 if (changed) 3474 { 3475 gcse_subst_count++; 3476 if (gcse_file != NULL) 3477 { 3478 fprintf (gcse_file, "GCSE: Replacing the source in insn %d with", 3479 INSN_UID (insn)); 3480 fprintf (gcse_file, " reg %d %s insn %d\n", 3481 REGNO (to), use_src ? "from" : "set in", 3482 INSN_UID (insn_computes_expr)); 3483 } 3484 } 3485 } 3486 3487 /* The register that the expr is computed into is set more than once. */ 3488 else if (1 /*expensive_op(this_pattrn->op) && do_expensive_gcse)*/) 3489 { 3490 /* Insert an insn after insnx that copies the reg set in insnx 3491 into a new pseudo register call this new register REGN. 3492 From insnb until end of basic block or until REGB is set 3493 replace all uses of REGB with REGN. */ 3494 rtx new_insn; 3495 3496 to = gen_reg_rtx (GET_MODE (SET_DEST (expr_set))); 3497 3498 /* Generate the new insn. */ 3499 /* ??? If the change fails, we return 0, even though we created 3500 an insn. I think this is ok. */ 3501 new_insn 3502 = emit_insn_after (gen_rtx_SET (VOIDmode, to, 3503 SET_DEST (expr_set)), 3504 insn_computes_expr); 3505 3506 /* Keep register set table up to date. */ 3507 record_one_set (REGNO (to), new_insn); 3508 3509 gcse_create_count++; 3510 if (gcse_file != NULL) 3511 { 3512 fprintf (gcse_file, "GCSE: Creating insn %d to copy value of reg %d", 3513 INSN_UID (NEXT_INSN (insn_computes_expr)), 3514 REGNO (SET_SRC (PATTERN (NEXT_INSN (insn_computes_expr))))); 3515 fprintf (gcse_file, ", computed in insn %d,\n", 3516 INSN_UID (insn_computes_expr)); 3517 fprintf (gcse_file, " into newly allocated reg %d\n", 3518 REGNO (to)); 3519 } 3520 3521 pat = PATTERN (insn); 3522 3523 /* Do register replacement for INSN. */ 3524 changed = validate_change (insn, &SET_SRC (pat), 3525 SET_DEST (PATTERN 3526 (NEXT_INSN (insn_computes_expr))), 3527 0); 3528 3529 /* We should be able to ignore the return code from validate_change but 3530 to play it safe we check. */ 3531 if (changed) 3532 { 3533 gcse_subst_count++; 3534 if (gcse_file != NULL) 3535 { 3536 fprintf (gcse_file, 3537 "GCSE: Replacing the source in insn %d with reg %d ", 3538 INSN_UID (insn), 3539 REGNO (SET_DEST (PATTERN (NEXT_INSN 3540 (insn_computes_expr))))); 3541 fprintf (gcse_file, "set in insn %d\n", 3542 INSN_UID (insn_computes_expr)); 3543 } 3544 } 3545 } 3546 3547 return changed; 3548} 3549 3550/* Perform classic GCSE. This is called by one_classic_gcse_pass after all 3551 the dataflow analysis has been done. 3552 3553 The result is non-zero if a change was made. */ 3554 3555static int 3556classic_gcse () 3557{ 3558 int bb, changed; 3559 rtx insn; 3560 3561 /* Note we start at block 1. */ 3562 3563 changed = 0; 3564 for (bb = 1; bb < n_basic_blocks; bb++) 3565 { 3566 /* Reset tables used to keep track of what's still valid [since the 3567 start of the block]. */ 3568 reset_opr_set_tables (); 3569 3570 for (insn = BLOCK_HEAD (bb); 3571 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb)); 3572 insn = NEXT_INSN (insn)) 3573 { 3574 /* Is insn of form (set (pseudo-reg) ...)? */ 3575 if (GET_CODE (insn) == INSN 3576 && GET_CODE (PATTERN (insn)) == SET 3577 && GET_CODE (SET_DEST (PATTERN (insn))) == REG 3578 && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_PSEUDO_REGISTER) 3579 { 3580 rtx pat = PATTERN (insn); 3581 rtx src = SET_SRC (pat); 3582 struct expr *expr; 3583 3584 if (want_to_gcse_p (src) 3585 /* Is the expression recorded? */ 3586 && ((expr = lookup_expr (src)) != NULL) 3587 /* Is the expression available [at the start of the 3588 block]? */ 3589 && TEST_BIT (ae_in[bb], expr->bitmap_index) 3590 /* Are the operands unchanged since the start of the 3591 block? */ 3592 && oprs_not_set_p (src, insn)) 3593 changed |= handle_avail_expr (insn, expr); 3594 } 3595 3596 /* Keep track of everything modified by this insn. */ 3597 /* ??? Need to be careful w.r.t. mods done to INSN. */ 3598 if (INSN_P (insn)) 3599 mark_oprs_set (insn); 3600 } 3601 } 3602 3603 return changed; 3604} 3605 3606/* Top level routine to perform one classic GCSE pass. 3607 3608 Return non-zero if a change was made. */ 3609 3610static int 3611one_classic_gcse_pass (pass) 3612 int pass; 3613{ 3614 int changed = 0; 3615 3616 gcse_subst_count = 0; 3617 gcse_create_count = 0; 3618 3619 alloc_expr_hash_table (max_cuid); 3620 alloc_rd_mem (n_basic_blocks, max_cuid); 3621 compute_expr_hash_table (); 3622 if (gcse_file) 3623 dump_hash_table (gcse_file, "Expression", expr_hash_table, 3624 expr_hash_table_size, n_exprs); 3625 3626 if (n_exprs > 0) 3627 { 3628 compute_kill_rd (); 3629 compute_rd (); 3630 alloc_avail_expr_mem (n_basic_blocks, n_exprs); 3631 compute_ae_gen (); 3632 compute_ae_kill (ae_gen, ae_kill); 3633 compute_available (ae_gen, ae_kill, ae_out, ae_in); 3634 changed = classic_gcse (); 3635 free_avail_expr_mem (); 3636 } 3637 3638 free_rd_mem (); 3639 free_expr_hash_table (); 3640 3641 if (gcse_file) 3642 { 3643 fprintf (gcse_file, "\n"); 3644 fprintf (gcse_file, "GCSE of %s, pass %d: %d bytes needed, %d substs,", 3645 current_function_name, pass, bytes_used, gcse_subst_count); 3646 fprintf (gcse_file, "%d insns created\n", gcse_create_count); 3647 } 3648 3649 return changed; 3650} 3651 3652/* Compute copy/constant propagation working variables. */ 3653 3654/* Local properties of assignments. */ 3655static sbitmap *cprop_pavloc; 3656static sbitmap *cprop_absaltered; 3657 3658/* Global properties of assignments (computed from the local properties). */ 3659static sbitmap *cprop_avin; 3660static sbitmap *cprop_avout; 3661 3662/* Allocate vars used for copy/const propagation. N_BLOCKS is the number of 3663 basic blocks. N_SETS is the number of sets. */ 3664 3665static void 3666alloc_cprop_mem (n_blocks, n_sets) 3667 int n_blocks, n_sets; 3668{ 3669 cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets); 3670 cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets); 3671 3672 cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets); 3673 cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets); 3674} 3675 3676/* Free vars used by copy/const propagation. */ 3677 3678static void 3679free_cprop_mem () 3680{ 3681 sbitmap_vector_free (cprop_pavloc); 3682 sbitmap_vector_free (cprop_absaltered); 3683 sbitmap_vector_free (cprop_avin); 3684 sbitmap_vector_free (cprop_avout); 3685} 3686 3687/* For each block, compute whether X is transparent. X is either an 3688 expression or an assignment [though we don't care which, for this context 3689 an assignment is treated as an expression]. For each block where an 3690 element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX 3691 bit in BMAP. */ 3692 3693static void 3694compute_transp (x, indx, bmap, set_p) 3695 rtx x; 3696 int indx; 3697 sbitmap *bmap; 3698 int set_p; 3699{ 3700 int bb, i, j; 3701 enum rtx_code code; 3702 reg_set *r; 3703 const char *fmt; 3704 3705 /* repeat is used to turn tail-recursion into iteration since GCC 3706 can't do it when there's no return value. */ 3707 repeat: 3708 3709 if (x == 0) 3710 return; 3711 3712 code = GET_CODE (x); 3713 switch (code) 3714 { 3715 case REG: 3716 if (set_p) 3717 { 3718 if (REGNO (x) < FIRST_PSEUDO_REGISTER) 3719 { 3720 for (bb = 0; bb < n_basic_blocks; bb++) 3721 if (TEST_BIT (reg_set_in_block[bb], REGNO (x))) 3722 SET_BIT (bmap[bb], indx); 3723 } 3724 else 3725 { 3726 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next) 3727 SET_BIT (bmap[BLOCK_NUM (r->insn)], indx); 3728 } 3729 } 3730 else 3731 { 3732 if (REGNO (x) < FIRST_PSEUDO_REGISTER) 3733 { 3734 for (bb = 0; bb < n_basic_blocks; bb++) 3735 if (TEST_BIT (reg_set_in_block[bb], REGNO (x))) 3736 RESET_BIT (bmap[bb], indx); 3737 } 3738 else 3739 { 3740 for (r = reg_set_table[REGNO (x)]; r != NULL; r = r->next) 3741 RESET_BIT (bmap[BLOCK_NUM (r->insn)], indx); 3742 } 3743 } 3744 3745 return; 3746 3747 case MEM: 3748 for (bb = 0; bb < n_basic_blocks; bb++) 3749 { 3750 rtx list_entry = canon_modify_mem_list[bb]; 3751 3752 while (list_entry) 3753 { 3754 rtx dest, dest_addr; 3755 3756 if (GET_CODE (XEXP (list_entry, 0)) == CALL_INSN) 3757 { 3758 if (set_p) 3759 SET_BIT (bmap[bb], indx); 3760 else 3761 RESET_BIT (bmap[bb], indx); 3762 break; 3763 } 3764 /* LIST_ENTRY must be an INSN of some kind that sets memory. 3765 Examine each hunk of memory that is modified. */ 3766 3767 dest = XEXP (list_entry, 0); 3768 list_entry = XEXP (list_entry, 1); 3769 dest_addr = XEXP (list_entry, 0); 3770 3771 if (canon_true_dependence (dest, GET_MODE (dest), dest_addr, 3772 x, rtx_addr_varies_p)) 3773 { 3774 if (set_p) 3775 SET_BIT (bmap[bb], indx); 3776 else 3777 RESET_BIT (bmap[bb], indx); 3778 break; 3779 } 3780 list_entry = XEXP (list_entry, 1); 3781 } 3782 } 3783 3784 x = XEXP (x, 0); 3785 goto repeat; 3786 3787 case PC: 3788 case CC0: /*FIXME*/ 3789 case CONST: 3790 case CONST_INT: 3791 case CONST_DOUBLE: 3792 case SYMBOL_REF: 3793 case LABEL_REF: 3794 case ADDR_VEC: 3795 case ADDR_DIFF_VEC: 3796 return; 3797 3798 default: 3799 break; 3800 } 3801 3802 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) 3803 { 3804 if (fmt[i] == 'e') 3805 { 3806 /* If we are about to do the last recursive call 3807 needed at this level, change it into iteration. 3808 This function is called enough to be worth it. */ 3809 if (i == 0) 3810 { 3811 x = XEXP (x, i); 3812 goto repeat; 3813 } 3814 3815 compute_transp (XEXP (x, i), indx, bmap, set_p); 3816 } 3817 else if (fmt[i] == 'E') 3818 for (j = 0; j < XVECLEN (x, i); j++) 3819 compute_transp (XVECEXP (x, i, j), indx, bmap, set_p); 3820 } 3821} 3822 3823/* Top level routine to do the dataflow analysis needed by copy/const 3824 propagation. */ 3825 3826static void 3827compute_cprop_data () 3828{ 3829 compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, 1); 3830 compute_available (cprop_pavloc, cprop_absaltered, 3831 cprop_avout, cprop_avin); 3832} 3833 3834/* Copy/constant propagation. */ 3835 3836/* Maximum number of register uses in an insn that we handle. */ 3837#define MAX_USES 8 3838 3839/* Table of uses found in an insn. 3840 Allocated statically to avoid alloc/free complexity and overhead. */ 3841static struct reg_use reg_use_table[MAX_USES]; 3842 3843/* Index into `reg_use_table' while building it. */ 3844static int reg_use_count; 3845 3846/* Set up a list of register numbers used in INSN. The found uses are stored 3847 in `reg_use_table'. `reg_use_count' is initialized to zero before entry, 3848 and contains the number of uses in the table upon exit. 3849 3850 ??? If a register appears multiple times we will record it multiple times. 3851 This doesn't hurt anything but it will slow things down. */ 3852 3853static void 3854find_used_regs (xptr, data) 3855 rtx *xptr; 3856 void *data ATTRIBUTE_UNUSED; 3857{ 3858 int i, j; 3859 enum rtx_code code; 3860 const char *fmt; 3861 rtx x = *xptr; 3862 3863 /* repeat is used to turn tail-recursion into iteration since GCC 3864 can't do it when there's no return value. */ 3865 repeat: 3866 if (x == 0) 3867 return; 3868 3869 code = GET_CODE (x); 3870 if (REG_P (x)) 3871 { 3872 if (reg_use_count == MAX_USES) 3873 return; 3874 3875 reg_use_table[reg_use_count].reg_rtx = x; 3876 reg_use_count++; 3877 } 3878 3879 /* Recursively scan the operands of this expression. */ 3880 3881 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) 3882 { 3883 if (fmt[i] == 'e') 3884 { 3885 /* If we are about to do the last recursive call 3886 needed at this level, change it into iteration. 3887 This function is called enough to be worth it. */ 3888 if (i == 0) 3889 { 3890 x = XEXP (x, 0); 3891 goto repeat; 3892 } 3893 3894 find_used_regs (&XEXP (x, i), data); 3895 } 3896 else if (fmt[i] == 'E') 3897 for (j = 0; j < XVECLEN (x, i); j++) 3898 find_used_regs (&XVECEXP (x, i, j), data); 3899 } 3900} 3901 3902/* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO. 3903 Returns non-zero is successful. */ 3904 3905static int 3906try_replace_reg (from, to, insn) 3907 rtx from, to, insn; 3908{ 3909 rtx note = find_reg_equal_equiv_note (insn); 3910 rtx src = 0; 3911 int success = 0; 3912 rtx set = single_set (insn); 3913 3914 success = validate_replace_src (from, to, insn); 3915 3916 /* If above failed and this is a single set, try to simplify the source of 3917 the set given our substitution. We could perhaps try this for multiple 3918 SETs, but it probably won't buy us anything. */ 3919 if (!success && set != 0) 3920 { 3921 src = simplify_replace_rtx (SET_SRC (set), from, to); 3922 3923 if (!rtx_equal_p (src, SET_SRC (set)) 3924 && validate_change (insn, &SET_SRC (set), src, 0)) 3925 success = 1; 3926 } 3927 3928 /* If we've failed to do replacement, have a single SET, and don't already 3929 have a note, add a REG_EQUAL note to not lose information. */ 3930 if (!success && note == 0 && set != 0) 3931 note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src)); 3932 3933 /* If there is already a NOTE, update the expression in it with our 3934 replacement. */ 3935 else if (note != 0) 3936 XEXP (note, 0) = simplify_replace_rtx (XEXP (note, 0), from, to); 3937 3938 /* REG_EQUAL may get simplified into register. 3939 We don't allow that. Remove that note. This code ought 3940 not to hapen, because previous code ought to syntetize 3941 reg-reg move, but be on the safe side. */ 3942 if (note && REG_P (XEXP (note, 0))) 3943 remove_note (insn, note); 3944 3945 return success; 3946} 3947 3948/* Find a set of REGNOs that are available on entry to INSN's block. Returns 3949 NULL no such set is found. */ 3950 3951static struct expr * 3952find_avail_set (regno, insn) 3953 int regno; 3954 rtx insn; 3955{ 3956 /* SET1 contains the last set found that can be returned to the caller for 3957 use in a substitution. */ 3958 struct expr *set1 = 0; 3959 3960 /* Loops are not possible here. To get a loop we would need two sets 3961 available at the start of the block containing INSN. ie we would 3962 need two sets like this available at the start of the block: 3963 3964 (set (reg X) (reg Y)) 3965 (set (reg Y) (reg X)) 3966 3967 This can not happen since the set of (reg Y) would have killed the 3968 set of (reg X) making it unavailable at the start of this block. */ 3969 while (1) 3970 { 3971 rtx src; 3972 struct expr *set = lookup_set (regno, NULL_RTX); 3973 3974 /* Find a set that is available at the start of the block 3975 which contains INSN. */ 3976 while (set) 3977 { 3978 if (TEST_BIT (cprop_avin[BLOCK_NUM (insn)], set->bitmap_index)) 3979 break; 3980 set = next_set (regno, set); 3981 } 3982 3983 /* If no available set was found we've reached the end of the 3984 (possibly empty) copy chain. */ 3985 if (set == 0) 3986 break; 3987 3988 if (GET_CODE (set->expr) != SET) 3989 abort (); 3990 3991 src = SET_SRC (set->expr); 3992 3993 /* We know the set is available. 3994 Now check that SRC is ANTLOC (i.e. none of the source operands 3995 have changed since the start of the block). 3996 3997 If the source operand changed, we may still use it for the next 3998 iteration of this loop, but we may not use it for substitutions. */ 3999 4000 if (CONSTANT_P (src) || oprs_not_set_p (src, insn)) 4001 set1 = set; 4002 4003 /* If the source of the set is anything except a register, then 4004 we have reached the end of the copy chain. */ 4005 if (GET_CODE (src) != REG) 4006 break; 4007 4008 /* Follow the copy chain, ie start another iteration of the loop 4009 and see if we have an available copy into SRC. */ 4010 regno = REGNO (src); 4011 } 4012 4013 /* SET1 holds the last set that was available and anticipatable at 4014 INSN. */ 4015 return set1; 4016} 4017 4018/* Subroutine of cprop_insn that tries to propagate constants into 4019 JUMP_INSNS. INSN must be a conditional jump. FROM is what we will try to 4020 replace, SRC is the constant we will try to substitute for it. Returns 4021 nonzero if a change was made. We know INSN has just a SET. */ 4022 4023static int 4024cprop_jump (bb, insn, from, src) 4025 rtx insn; 4026 rtx from; 4027 rtx src; 4028 basic_block bb; 4029{ 4030 rtx set = PATTERN (insn); 4031 rtx new = simplify_replace_rtx (SET_SRC (set), from, src); 4032 4033 /* If no simplification can be made, then try the next 4034 register. */ 4035 if (rtx_equal_p (new, SET_SRC (set))) 4036 return 0; 4037 4038 /* If this is now a no-op delete it, otherwise this must be a valid insn. */ 4039 if (new == pc_rtx) 4040 delete_insn (insn); 4041 else 4042 { 4043 if (! validate_change (insn, &SET_SRC (set), new, 0)) 4044 return 0; 4045 4046 /* If this has turned into an unconditional jump, 4047 then put a barrier after it so that the unreachable 4048 code will be deleted. */ 4049 if (GET_CODE (SET_SRC (set)) == LABEL_REF) 4050 emit_barrier_after (insn); 4051 } 4052 4053 run_jump_opt_after_gcse = 1; 4054 4055 const_prop_count++; 4056 if (gcse_file != NULL) 4057 { 4058 fprintf (gcse_file, 4059 "CONST-PROP: Replacing reg %d in insn %d with constant ", 4060 REGNO (from), INSN_UID (insn)); 4061 print_rtl (gcse_file, src); 4062 fprintf (gcse_file, "\n"); 4063 } 4064 purge_dead_edges (bb); 4065 4066 return 1; 4067} 4068 4069#ifdef HAVE_cc0 4070 4071/* Subroutine of cprop_insn that tries to propagate constants into JUMP_INSNS 4072 for machines that have CC0. INSN is a single set that stores into CC0; 4073 the insn following it is a conditional jump. REG_USED is the use we will 4074 try to replace, SRC is the constant we will try to substitute for it. 4075 Returns nonzero if a change was made. */ 4076 4077static int 4078cprop_cc0_jump (bb, insn, reg_used, src) 4079 basic_block bb; 4080 rtx insn; 4081 struct reg_use *reg_used; 4082 rtx src; 4083{ 4084 /* First substitute in the SET_SRC of INSN, then substitute that for 4085 CC0 in JUMP. */ 4086 rtx jump = NEXT_INSN (insn); 4087 rtx new_src = simplify_replace_rtx (SET_SRC (PATTERN (insn)), 4088 reg_used->reg_rtx, src); 4089 4090 if (! cprop_jump (bb, jump, cc0_rtx, new_src)) 4091 return 0; 4092 4093 /* If we succeeded, delete the cc0 setter. */ 4094 delete_insn (insn); 4095 4096 return 1; 4097} 4098#endif 4099 4100/* Perform constant and copy propagation on INSN. 4101 The result is non-zero if a change was made. */ 4102 4103static int 4104cprop_insn (bb, insn, alter_jumps) 4105 basic_block bb; 4106 rtx insn; 4107 int alter_jumps; 4108{ 4109 struct reg_use *reg_used; 4110 int changed = 0; 4111 rtx note; 4112 4113 if (!INSN_P (insn)) 4114 return 0; 4115 4116 reg_use_count = 0; 4117 note_uses (&PATTERN (insn), find_used_regs, NULL); 4118 4119 note = find_reg_equal_equiv_note (insn); 4120 4121 /* We may win even when propagating constants into notes. */ 4122 if (note) 4123 find_used_regs (&XEXP (note, 0), NULL); 4124 4125 for (reg_used = ®_use_table[0]; reg_use_count > 0; 4126 reg_used++, reg_use_count--) 4127 { 4128 unsigned int regno = REGNO (reg_used->reg_rtx); 4129 rtx pat, src; 4130 struct expr *set; 4131 4132 /* Ignore registers created by GCSE. 4133 We do this because ... */ 4134 if (regno >= max_gcse_regno) 4135 continue; 4136 4137 /* If the register has already been set in this block, there's 4138 nothing we can do. */ 4139 if (! oprs_not_set_p (reg_used->reg_rtx, insn)) 4140 continue; 4141 4142 /* Find an assignment that sets reg_used and is available 4143 at the start of the block. */ 4144 set = find_avail_set (regno, insn); 4145 if (! set) 4146 continue; 4147 4148 pat = set->expr; 4149 /* ??? We might be able to handle PARALLELs. Later. */ 4150 if (GET_CODE (pat) != SET) 4151 abort (); 4152 4153 src = SET_SRC (pat); 4154 4155 /* Constant propagation. */ 4156 if (CONSTANT_P (src)) 4157 { 4158 /* Handle normal insns first. */ 4159 if (GET_CODE (insn) == INSN 4160 && try_replace_reg (reg_used->reg_rtx, src, insn)) 4161 { 4162 changed = 1; 4163 const_prop_count++; 4164 if (gcse_file != NULL) 4165 { 4166 fprintf (gcse_file, "CONST-PROP: Replacing reg %d in ", 4167 regno); 4168 fprintf (gcse_file, "insn %d with constant ", 4169 INSN_UID (insn)); 4170 print_rtl (gcse_file, src); 4171 fprintf (gcse_file, "\n"); 4172 } 4173 4174 /* The original insn setting reg_used may or may not now be 4175 deletable. We leave the deletion to flow. */ 4176 } 4177 4178 /* Try to propagate a CONST_INT into a conditional jump. 4179 We're pretty specific about what we will handle in this 4180 code, we can extend this as necessary over time. 4181 4182 Right now the insn in question must look like 4183 (set (pc) (if_then_else ...)) */ 4184 else if (alter_jumps 4185 && GET_CODE (insn) == JUMP_INSN 4186 && condjump_p (insn) 4187 && ! simplejump_p (insn)) 4188 changed |= cprop_jump (bb, insn, reg_used->reg_rtx, src); 4189 4190#ifdef HAVE_cc0 4191 /* Similar code for machines that use a pair of CC0 setter and 4192 conditional jump insn. */ 4193 else if (alter_jumps 4194 && GET_CODE (PATTERN (insn)) == SET 4195 && SET_DEST (PATTERN (insn)) == cc0_rtx 4196 && GET_CODE (NEXT_INSN (insn)) == JUMP_INSN 4197 && condjump_p (NEXT_INSN (insn)) 4198 && ! simplejump_p (NEXT_INSN (insn)) 4199 && cprop_cc0_jump (bb, insn, reg_used, src)) 4200 { 4201 changed = 1; 4202 break; 4203 } 4204#endif 4205 } 4206 else if (GET_CODE (src) == REG 4207 && REGNO (src) >= FIRST_PSEUDO_REGISTER 4208 && REGNO (src) != regno) 4209 { 4210 if (try_replace_reg (reg_used->reg_rtx, src, insn)) 4211 { 4212 changed = 1; 4213 copy_prop_count++; 4214 if (gcse_file != NULL) 4215 { 4216 fprintf (gcse_file, "COPY-PROP: Replacing reg %d in insn %d", 4217 regno, INSN_UID (insn)); 4218 fprintf (gcse_file, " with reg %d\n", REGNO (src)); 4219 } 4220 4221 /* The original insn setting reg_used may or may not now be 4222 deletable. We leave the deletion to flow. */ 4223 /* FIXME: If it turns out that the insn isn't deletable, 4224 then we may have unnecessarily extended register lifetimes 4225 and made things worse. */ 4226 } 4227 } 4228 } 4229 4230 return changed; 4231} 4232 4233/* Forward propagate copies. This includes copies and constants. Return 4234 non-zero if a change was made. */ 4235 4236static int 4237cprop (alter_jumps) 4238 int alter_jumps; 4239{ 4240 int bb, changed; 4241 rtx insn; 4242 4243 /* Note we start at block 1. */ 4244 4245 changed = 0; 4246 for (bb = 1; bb < n_basic_blocks; bb++) 4247 { 4248 /* Reset tables used to keep track of what's still valid [since the 4249 start of the block]. */ 4250 reset_opr_set_tables (); 4251 4252 for (insn = BLOCK_HEAD (bb); 4253 insn != NULL && insn != NEXT_INSN (BLOCK_END (bb)); 4254 insn = NEXT_INSN (insn)) 4255 if (INSN_P (insn)) 4256 { 4257 changed |= cprop_insn (BASIC_BLOCK (bb), insn, alter_jumps); 4258 4259 /* Keep track of everything modified by this insn. */ 4260 /* ??? Need to be careful w.r.t. mods done to INSN. Don't 4261 call mark_oprs_set if we turned the insn into a NOTE. */ 4262 if (GET_CODE (insn) != NOTE) 4263 mark_oprs_set (insn); 4264 } 4265 } 4266 4267 if (gcse_file != NULL) 4268 fprintf (gcse_file, "\n"); 4269 4270 return changed; 4271} 4272 4273/* Perform one copy/constant propagation pass. 4274 F is the first insn in the function. 4275 PASS is the pass count. */ 4276 4277static int 4278one_cprop_pass (pass, alter_jumps) 4279 int pass; 4280 int alter_jumps; 4281{ 4282 int changed = 0; 4283 4284 const_prop_count = 0; 4285 copy_prop_count = 0; 4286 4287 alloc_set_hash_table (max_cuid); 4288 compute_set_hash_table (); 4289 if (gcse_file) 4290 dump_hash_table (gcse_file, "SET", set_hash_table, set_hash_table_size, 4291 n_sets); 4292 if (n_sets > 0) 4293 { 4294 alloc_cprop_mem (n_basic_blocks, n_sets); 4295 compute_cprop_data (); 4296 changed = cprop (alter_jumps); 4297 free_cprop_mem (); 4298 } 4299 4300 free_set_hash_table (); 4301 4302 if (gcse_file) 4303 { 4304 fprintf (gcse_file, "CPROP of %s, pass %d: %d bytes needed, ", 4305 current_function_name, pass, bytes_used); 4306 fprintf (gcse_file, "%d const props, %d copy props\n\n", 4307 const_prop_count, copy_prop_count); 4308 } 4309 4310 return changed; 4311} 4312 4313/* Compute PRE+LCM working variables. */ 4314 4315/* Local properties of expressions. */ 4316/* Nonzero for expressions that are transparent in the block. */ 4317static sbitmap *transp; 4318 4319/* Nonzero for expressions that are transparent at the end of the block. 4320 This is only zero for expressions killed by abnormal critical edge 4321 created by a calls. */ 4322static sbitmap *transpout; 4323 4324/* Nonzero for expressions that are computed (available) in the block. */ 4325static sbitmap *comp; 4326 4327/* Nonzero for expressions that are locally anticipatable in the block. */ 4328static sbitmap *antloc; 4329 4330/* Nonzero for expressions where this block is an optimal computation 4331 point. */ 4332static sbitmap *pre_optimal; 4333 4334/* Nonzero for expressions which are redundant in a particular block. */ 4335static sbitmap *pre_redundant; 4336 4337/* Nonzero for expressions which should be inserted on a specific edge. */ 4338static sbitmap *pre_insert_map; 4339 4340/* Nonzero for expressions which should be deleted in a specific block. */ 4341static sbitmap *pre_delete_map; 4342 4343/* Contains the edge_list returned by pre_edge_lcm. */ 4344static struct edge_list *edge_list; 4345 4346/* Redundant insns. */ 4347static sbitmap pre_redundant_insns; 4348 4349/* Allocate vars used for PRE analysis. */ 4350 4351static void 4352alloc_pre_mem (n_blocks, n_exprs) 4353 int n_blocks, n_exprs; 4354{ 4355 transp = sbitmap_vector_alloc (n_blocks, n_exprs); 4356 comp = sbitmap_vector_alloc (n_blocks, n_exprs); 4357 antloc = sbitmap_vector_alloc (n_blocks, n_exprs); 4358 4359 pre_optimal = NULL; 4360 pre_redundant = NULL; 4361 pre_insert_map = NULL; 4362 pre_delete_map = NULL; 4363 ae_in = NULL; 4364 ae_out = NULL; 4365 ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs); 4366 4367 /* pre_insert and pre_delete are allocated later. */ 4368} 4369 4370/* Free vars used for PRE analysis. */ 4371 4372static void 4373free_pre_mem () 4374{ 4375 sbitmap_vector_free (transp); 4376 sbitmap_vector_free (comp); 4377 4378 /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */ 4379 4380 if (pre_optimal) 4381 sbitmap_vector_free (pre_optimal); 4382 if (pre_redundant) 4383 sbitmap_vector_free (pre_redundant); 4384 if (pre_insert_map) 4385 sbitmap_vector_free (pre_insert_map); 4386 if (pre_delete_map) 4387 sbitmap_vector_free (pre_delete_map); 4388 if (ae_in) 4389 sbitmap_vector_free (ae_in); 4390 if (ae_out) 4391 sbitmap_vector_free (ae_out); 4392 4393 transp = comp = NULL; 4394 pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL; 4395 ae_in = ae_out = NULL; 4396} 4397 4398/* Top level routine to do the dataflow analysis needed by PRE. */ 4399 4400static void 4401compute_pre_data () 4402{ 4403 sbitmap trapping_expr; 4404 int i; 4405 unsigned int ui; 4406 4407 compute_local_properties (transp, comp, antloc, 0); 4408 sbitmap_vector_zero (ae_kill, n_basic_blocks); 4409 4410 /* Collect expressions which might trap. */ 4411 trapping_expr = sbitmap_alloc (n_exprs); 4412 sbitmap_zero (trapping_expr); 4413 for (ui = 0; ui < expr_hash_table_size; ui++) 4414 { 4415 struct expr *e; 4416 for (e = expr_hash_table[ui]; e != NULL; e = e->next_same_hash) 4417 if (may_trap_p (e->expr)) 4418 SET_BIT (trapping_expr, e->bitmap_index); 4419 } 4420 4421 /* Compute ae_kill for each basic block using: 4422 4423 ~(TRANSP | COMP) 4424 4425 This is significantly faster than compute_ae_kill. */ 4426 4427 for (i = 0; i < n_basic_blocks; i++) 4428 { 4429 edge e; 4430 4431 /* If the current block is the destination of an abnormal edge, we 4432 kill all trapping expressions because we won't be able to properly 4433 place the instruction on the edge. So make them neither 4434 anticipatable nor transparent. This is fairly conservative. */ 4435 for (e = BASIC_BLOCK (i)->pred; e ; e = e->pred_next) 4436 if (e->flags & EDGE_ABNORMAL) 4437 { 4438 sbitmap_difference (antloc[i], antloc[i], trapping_expr); 4439 sbitmap_difference (transp[i], transp[i], trapping_expr); 4440 break; 4441 } 4442 4443 sbitmap_a_or_b (ae_kill[i], transp[i], comp[i]); 4444 sbitmap_not (ae_kill[i], ae_kill[i]); 4445 } 4446 4447 edge_list = pre_edge_lcm (gcse_file, n_exprs, transp, comp, antloc, 4448 ae_kill, &pre_insert_map, &pre_delete_map); 4449 sbitmap_vector_free (antloc); 4450 antloc = NULL; 4451 sbitmap_vector_free (ae_kill); 4452 ae_kill = NULL; 4453 sbitmap_free (trapping_expr); 4454} 4455 4456/* PRE utilities */ 4457 4458/* Return non-zero if an occurrence of expression EXPR in OCCR_BB would reach 4459 block BB. 4460 4461 VISITED is a pointer to a working buffer for tracking which BB's have 4462 been visited. It is NULL for the top-level call. 4463 4464 We treat reaching expressions that go through blocks containing the same 4465 reaching expression as "not reaching". E.g. if EXPR is generated in blocks 4466 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block 4467 2 as not reaching. The intent is to improve the probability of finding 4468 only one reaching expression and to reduce register lifetimes by picking 4469 the closest such expression. */ 4470 4471static int 4472pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited) 4473 basic_block occr_bb; 4474 struct expr *expr; 4475 basic_block bb; 4476 char *visited; 4477{ 4478 edge pred; 4479 4480 for (pred = bb->pred; pred != NULL; pred = pred->pred_next) 4481 { 4482 basic_block pred_bb = pred->src; 4483 4484 if (pred->src == ENTRY_BLOCK_PTR 4485 /* Has predecessor has already been visited? */ 4486 || visited[pred_bb->index]) 4487 ;/* Nothing to do. */ 4488 4489 /* Does this predecessor generate this expression? */ 4490 else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index)) 4491 { 4492 /* Is this the occurrence we're looking for? 4493 Note that there's only one generating occurrence per block 4494 so we just need to check the block number. */ 4495 if (occr_bb == pred_bb) 4496 return 1; 4497 4498 visited[pred_bb->index] = 1; 4499 } 4500 /* Ignore this predecessor if it kills the expression. */ 4501 else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index)) 4502 visited[pred_bb->index] = 1; 4503 4504 /* Neither gen nor kill. */ 4505 else 4506 { 4507 visited[pred_bb->index] = 1; 4508 if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited)) 4509 return 1; 4510 } 4511 } 4512 4513 /* All paths have been checked. */ 4514 return 0; 4515} 4516 4517/* The wrapper for pre_expr_reaches_here_work that ensures that any 4518 memory allocated for that function is returned. */ 4519 4520static int 4521pre_expr_reaches_here_p (occr_bb, expr, bb) 4522 basic_block occr_bb; 4523 struct expr *expr; 4524 basic_block bb; 4525{ 4526 int rval; 4527 char *visited = (char *) xcalloc (n_basic_blocks, 1); 4528 4529 rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited); 4530 4531 free (visited); 4532 return rval; 4533} 4534 4535 4536/* Given an expr, generate RTL which we can insert at the end of a BB, 4537 or on an edge. Set the block number of any insns generated to 4538 the value of BB. */ 4539 4540static rtx 4541process_insert_insn (expr) 4542 struct expr *expr; 4543{ 4544 rtx reg = expr->reaching_reg; 4545 rtx exp = copy_rtx (expr->expr); 4546 rtx pat; 4547 4548 start_sequence (); 4549 4550 /* If the expression is something that's an operand, like a constant, 4551 just copy it to a register. */ 4552 if (general_operand (exp, GET_MODE (reg))) 4553 emit_move_insn (reg, exp); 4554 4555 /* Otherwise, make a new insn to compute this expression and make sure the 4556 insn will be recognized (this also adds any needed CLOBBERs). Copy the 4557 expression to make sure we don't have any sharing issues. */ 4558 else if (insn_invalid_p (emit_insn (gen_rtx_SET (VOIDmode, reg, exp)))) 4559 abort (); 4560 4561 pat = gen_sequence (); 4562 end_sequence (); 4563 4564 return pat; 4565} 4566 4567/* Add EXPR to the end of basic block BB. 4568 4569 This is used by both the PRE and code hoisting. 4570 4571 For PRE, we want to verify that the expr is either transparent 4572 or locally anticipatable in the target block. This check makes 4573 no sense for code hoisting. */ 4574 4575static void 4576insert_insn_end_bb (expr, bb, pre) 4577 struct expr *expr; 4578 basic_block bb; 4579 int pre; 4580{ 4581 rtx insn = bb->end; 4582 rtx new_insn; 4583 rtx reg = expr->reaching_reg; 4584 int regno = REGNO (reg); 4585 rtx pat; 4586 int i; 4587 4588 pat = process_insert_insn (expr); 4589 4590 /* If the last insn is a jump, insert EXPR in front [taking care to 4591 handle cc0, etc. properly]. */ 4592 4593 if (GET_CODE (insn) == JUMP_INSN) 4594 { 4595#ifdef HAVE_cc0 4596 rtx note; 4597#endif 4598 4599 /* If this is a jump table, then we can't insert stuff here. Since 4600 we know the previous real insn must be the tablejump, we insert 4601 the new instruction just before the tablejump. */ 4602 if (GET_CODE (PATTERN (insn)) == ADDR_VEC 4603 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC) 4604 insn = prev_real_insn (insn); 4605 4606#ifdef HAVE_cc0 4607 /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts 4608 if cc0 isn't set. */ 4609 note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX); 4610 if (note) 4611 insn = XEXP (note, 0); 4612 else 4613 { 4614 rtx maybe_cc0_setter = prev_nonnote_insn (insn); 4615 if (maybe_cc0_setter 4616 && INSN_P (maybe_cc0_setter) 4617 && sets_cc0_p (PATTERN (maybe_cc0_setter))) 4618 insn = maybe_cc0_setter; 4619 } 4620#endif 4621 /* FIXME: What if something in cc0/jump uses value set in new insn? */ 4622 new_insn = emit_insn_before (pat, insn); 4623 } 4624 4625 /* Likewise if the last insn is a call, as will happen in the presence 4626 of exception handling. */ 4627 else if (GET_CODE (insn) == CALL_INSN) 4628 { 4629 /* Keeping in mind SMALL_REGISTER_CLASSES and parameters in registers, 4630 we search backward and place the instructions before the first 4631 parameter is loaded. Do this for everyone for consistency and a 4632 presumtion that we'll get better code elsewhere as well. 4633 4634 It should always be the case that we can put these instructions 4635 anywhere in the basic block with performing PRE optimizations. 4636 Check this. */ 4637 4638 if (pre 4639 && !TEST_BIT (antloc[bb->index], expr->bitmap_index) 4640 && !TEST_BIT (transp[bb->index], expr->bitmap_index)) 4641 abort (); 4642 4643 /* Since different machines initialize their parameter registers 4644 in different orders, assume nothing. Collect the set of all 4645 parameter registers. */ 4646 insn = find_first_parameter_load (insn, bb->head); 4647 4648 /* If we found all the parameter loads, then we want to insert 4649 before the first parameter load. 4650 4651 If we did not find all the parameter loads, then we might have 4652 stopped on the head of the block, which could be a CODE_LABEL. 4653 If we inserted before the CODE_LABEL, then we would be putting 4654 the insn in the wrong basic block. In that case, put the insn 4655 after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */ 4656 while (GET_CODE (insn) == CODE_LABEL 4657 || NOTE_INSN_BASIC_BLOCK_P (insn)) 4658 insn = NEXT_INSN (insn); 4659 4660 new_insn = emit_insn_before (pat, insn); 4661 } 4662 else 4663 new_insn = emit_insn_after (pat, insn); 4664 4665 /* Keep block number table up to date. 4666 Note, PAT could be a multiple insn sequence, we have to make 4667 sure that each insn in the sequence is handled. */ 4668 if (GET_CODE (pat) == SEQUENCE) 4669 { 4670 for (i = 0; i < XVECLEN (pat, 0); i++) 4671 { 4672 rtx insn = XVECEXP (pat, 0, i); 4673 if (INSN_P (insn)) 4674 add_label_notes (PATTERN (insn), new_insn); 4675 4676 note_stores (PATTERN (insn), record_set_info, insn); 4677 } 4678 } 4679 else 4680 { 4681 add_label_notes (pat, new_insn); 4682 4683 /* Keep register set table up to date. */ 4684 record_one_set (regno, new_insn); 4685 } 4686 4687 gcse_create_count++; 4688 4689 if (gcse_file) 4690 { 4691 fprintf (gcse_file, "PRE/HOIST: end of bb %d, insn %d, ", 4692 bb->index, INSN_UID (new_insn)); 4693 fprintf (gcse_file, "copying expression %d to reg %d\n", 4694 expr->bitmap_index, regno); 4695 } 4696} 4697 4698/* Insert partially redundant expressions on edges in the CFG to make 4699 the expressions fully redundant. */ 4700 4701static int 4702pre_edge_insert (edge_list, index_map) 4703 struct edge_list *edge_list; 4704 struct expr **index_map; 4705{ 4706 int e, i, j, num_edges, set_size, did_insert = 0; 4707 sbitmap *inserted; 4708 4709 /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge 4710 if it reaches any of the deleted expressions. */ 4711 4712 set_size = pre_insert_map[0]->size; 4713 num_edges = NUM_EDGES (edge_list); 4714 inserted = sbitmap_vector_alloc (num_edges, n_exprs); 4715 sbitmap_vector_zero (inserted, num_edges); 4716 4717 for (e = 0; e < num_edges; e++) 4718 { 4719 int indx; 4720 basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e); 4721 4722 for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS) 4723 { 4724 SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i]; 4725 4726 for (j = indx; insert && j < n_exprs; j++, insert >>= 1) 4727 if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX) 4728 { 4729 struct expr *expr = index_map[j]; 4730 struct occr *occr; 4731 4732 /* Now look at each deleted occurrence of this expression. */ 4733 for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 4734 { 4735 if (! occr->deleted_p) 4736 continue; 4737 4738 /* Insert this expression on this edge if if it would 4739 reach the deleted occurrence in BB. */ 4740 if (!TEST_BIT (inserted[e], j)) 4741 { 4742 rtx insn; 4743 edge eg = INDEX_EDGE (edge_list, e); 4744 4745 /* We can't insert anything on an abnormal and 4746 critical edge, so we insert the insn at the end of 4747 the previous block. There are several alternatives 4748 detailed in Morgans book P277 (sec 10.5) for 4749 handling this situation. This one is easiest for 4750 now. */ 4751 4752 if ((eg->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL) 4753 insert_insn_end_bb (index_map[j], bb, 0); 4754 else 4755 { 4756 insn = process_insert_insn (index_map[j]); 4757 insert_insn_on_edge (insn, eg); 4758 } 4759 4760 if (gcse_file) 4761 { 4762 fprintf (gcse_file, "PRE/HOIST: edge (%d,%d), ", 4763 bb->index, 4764 INDEX_EDGE_SUCC_BB (edge_list, e)->index); 4765 fprintf (gcse_file, "copy expression %d\n", 4766 expr->bitmap_index); 4767 } 4768 4769 update_ld_motion_stores (expr); 4770 SET_BIT (inserted[e], j); 4771 did_insert = 1; 4772 gcse_create_count++; 4773 } 4774 } 4775 } 4776 } 4777 } 4778 4779 sbitmap_vector_free (inserted); 4780 return did_insert; 4781} 4782 4783/* Copy the result of INSN to REG. INDX is the expression number. */ 4784 4785static void 4786pre_insert_copy_insn (expr, insn) 4787 struct expr *expr; 4788 rtx insn; 4789{ 4790 rtx reg = expr->reaching_reg; 4791 int regno = REGNO (reg); 4792 int indx = expr->bitmap_index; 4793 rtx set = single_set (insn); 4794 rtx new_insn; 4795 4796 if (!set) 4797 abort (); 4798 4799 new_insn = emit_insn_after (gen_move_insn (reg, SET_DEST (set)), insn); 4800 4801 /* Keep register set table up to date. */ 4802 record_one_set (regno, new_insn); 4803 4804 gcse_create_count++; 4805 4806 if (gcse_file) 4807 fprintf (gcse_file, 4808 "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n", 4809 BLOCK_NUM (insn), INSN_UID (new_insn), indx, 4810 INSN_UID (insn), regno); 4811 update_ld_motion_stores (expr); 4812} 4813 4814/* Copy available expressions that reach the redundant expression 4815 to `reaching_reg'. */ 4816 4817static void 4818pre_insert_copies () 4819{ 4820 unsigned int i; 4821 struct expr *expr; 4822 struct occr *occr; 4823 struct occr *avail; 4824 4825 /* For each available expression in the table, copy the result to 4826 `reaching_reg' if the expression reaches a deleted one. 4827 4828 ??? The current algorithm is rather brute force. 4829 Need to do some profiling. */ 4830 4831 for (i = 0; i < expr_hash_table_size; i++) 4832 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash) 4833 { 4834 /* If the basic block isn't reachable, PPOUT will be TRUE. However, 4835 we don't want to insert a copy here because the expression may not 4836 really be redundant. So only insert an insn if the expression was 4837 deleted. This test also avoids further processing if the 4838 expression wasn't deleted anywhere. */ 4839 if (expr->reaching_reg == NULL) 4840 continue; 4841 4842 for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 4843 { 4844 if (! occr->deleted_p) 4845 continue; 4846 4847 for (avail = expr->avail_occr; avail != NULL; avail = avail->next) 4848 { 4849 rtx insn = avail->insn; 4850 4851 /* No need to handle this one if handled already. */ 4852 if (avail->copied_p) 4853 continue; 4854 4855 /* Don't handle this one if it's a redundant one. */ 4856 if (TEST_BIT (pre_redundant_insns, INSN_CUID (insn))) 4857 continue; 4858 4859 /* Or if the expression doesn't reach the deleted one. */ 4860 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn), 4861 expr, 4862 BLOCK_FOR_INSN (occr->insn))) 4863 continue; 4864 4865 /* Copy the result of avail to reaching_reg. */ 4866 pre_insert_copy_insn (expr, insn); 4867 avail->copied_p = 1; 4868 } 4869 } 4870 } 4871} 4872 4873/* Delete redundant computations. 4874 Deletion is done by changing the insn to copy the `reaching_reg' of 4875 the expression into the result of the SET. It is left to later passes 4876 (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it. 4877 4878 Returns non-zero if a change is made. */ 4879 4880static int 4881pre_delete () 4882{ 4883 unsigned int i; 4884 int changed; 4885 struct expr *expr; 4886 struct occr *occr; 4887 4888 changed = 0; 4889 for (i = 0; i < expr_hash_table_size; i++) 4890 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash) 4891 { 4892 int indx = expr->bitmap_index; 4893 4894 /* We only need to search antic_occr since we require 4895 ANTLOC != 0. */ 4896 4897 for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 4898 { 4899 rtx insn = occr->insn; 4900 rtx set; 4901 basic_block bb = BLOCK_FOR_INSN (insn); 4902 4903 if (TEST_BIT (pre_delete_map[bb->index], indx)) 4904 { 4905 set = single_set (insn); 4906 if (! set) 4907 abort (); 4908 4909 /* Create a pseudo-reg to store the result of reaching 4910 expressions into. Get the mode for the new pseudo from 4911 the mode of the original destination pseudo. */ 4912 if (expr->reaching_reg == NULL) 4913 expr->reaching_reg 4914 = gen_reg_rtx (GET_MODE (SET_DEST (set))); 4915 4916 /* In theory this should never fail since we're creating 4917 a reg->reg copy. 4918 4919 However, on the x86 some of the movXX patterns actually 4920 contain clobbers of scratch regs. This may cause the 4921 insn created by validate_change to not match any pattern 4922 and thus cause validate_change to fail. */ 4923 if (validate_change (insn, &SET_SRC (set), 4924 expr->reaching_reg, 0)) 4925 { 4926 occr->deleted_p = 1; 4927 SET_BIT (pre_redundant_insns, INSN_CUID (insn)); 4928 changed = 1; 4929 gcse_subst_count++; 4930 } 4931 4932 if (gcse_file) 4933 { 4934 fprintf (gcse_file, 4935 "PRE: redundant insn %d (expression %d) in ", 4936 INSN_UID (insn), indx); 4937 fprintf (gcse_file, "bb %d, reaching reg is %d\n", 4938 bb->index, REGNO (expr->reaching_reg)); 4939 } 4940 } 4941 } 4942 } 4943 4944 return changed; 4945} 4946 4947/* Perform GCSE optimizations using PRE. 4948 This is called by one_pre_gcse_pass after all the dataflow analysis 4949 has been done. 4950 4951 This is based on the original Morel-Renvoise paper Fred Chow's thesis, and 4952 lazy code motion from Knoop, Ruthing and Steffen as described in Advanced 4953 Compiler Design and Implementation. 4954 4955 ??? A new pseudo reg is created to hold the reaching expression. The nice 4956 thing about the classical approach is that it would try to use an existing 4957 reg. If the register can't be adequately optimized [i.e. we introduce 4958 reload problems], one could add a pass here to propagate the new register 4959 through the block. 4960 4961 ??? We don't handle single sets in PARALLELs because we're [currently] not 4962 able to copy the rest of the parallel when we insert copies to create full 4963 redundancies from partial redundancies. However, there's no reason why we 4964 can't handle PARALLELs in the cases where there are no partial 4965 redundancies. */ 4966 4967static int 4968pre_gcse () 4969{ 4970 unsigned int i; 4971 int did_insert, changed; 4972 struct expr **index_map; 4973 struct expr *expr; 4974 4975 /* Compute a mapping from expression number (`bitmap_index') to 4976 hash table entry. */ 4977 4978 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *)); 4979 for (i = 0; i < expr_hash_table_size; i++) 4980 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash) 4981 index_map[expr->bitmap_index] = expr; 4982 4983 /* Reset bitmap used to track which insns are redundant. */ 4984 pre_redundant_insns = sbitmap_alloc (max_cuid); 4985 sbitmap_zero (pre_redundant_insns); 4986 4987 /* Delete the redundant insns first so that 4988 - we know what register to use for the new insns and for the other 4989 ones with reaching expressions 4990 - we know which insns are redundant when we go to create copies */ 4991 4992 changed = pre_delete (); 4993 4994 did_insert = pre_edge_insert (edge_list, index_map); 4995 4996 /* In other places with reaching expressions, copy the expression to the 4997 specially allocated pseudo-reg that reaches the redundant expr. */ 4998 pre_insert_copies (); 4999 if (did_insert) 5000 { 5001 commit_edge_insertions (); 5002 changed = 1; 5003 } 5004 5005 free (index_map); 5006 sbitmap_free (pre_redundant_insns); 5007 return changed; 5008} 5009 5010/* Top level routine to perform one PRE GCSE pass. 5011 5012 Return non-zero if a change was made. */ 5013 5014static int 5015one_pre_gcse_pass (pass) 5016 int pass; 5017{ 5018 int changed = 0; 5019 5020 gcse_subst_count = 0; 5021 gcse_create_count = 0; 5022 5023 alloc_expr_hash_table (max_cuid); 5024 add_noreturn_fake_exit_edges (); 5025 if (flag_gcse_lm) 5026 compute_ld_motion_mems (); 5027 5028 compute_expr_hash_table (); 5029 trim_ld_motion_mems (); 5030 if (gcse_file) 5031 dump_hash_table (gcse_file, "Expression", expr_hash_table, 5032 expr_hash_table_size, n_exprs); 5033 5034 if (n_exprs > 0) 5035 { 5036 alloc_pre_mem (n_basic_blocks, n_exprs); 5037 compute_pre_data (); 5038 changed |= pre_gcse (); 5039 free_edge_list (edge_list); 5040 free_pre_mem (); 5041 } 5042 5043 free_ldst_mems (); 5044 remove_fake_edges (); 5045 free_expr_hash_table (); 5046 5047 if (gcse_file) 5048 { 5049 fprintf (gcse_file, "\nPRE GCSE of %s, pass %d: %d bytes needed, ", 5050 current_function_name, pass, bytes_used); 5051 fprintf (gcse_file, "%d substs, %d insns created\n", 5052 gcse_subst_count, gcse_create_count); 5053 } 5054 5055 return changed; 5056} 5057 5058/* If X contains any LABEL_REF's, add REG_LABEL notes for them to INSN. 5059 If notes are added to an insn which references a CODE_LABEL, the 5060 LABEL_NUSES count is incremented. We have to add REG_LABEL notes, 5061 because the following loop optimization pass requires them. */ 5062 5063/* ??? This is very similar to the loop.c add_label_notes function. We 5064 could probably share code here. */ 5065 5066/* ??? If there was a jump optimization pass after gcse and before loop, 5067 then we would not need to do this here, because jump would add the 5068 necessary REG_LABEL notes. */ 5069 5070static void 5071add_label_notes (x, insn) 5072 rtx x; 5073 rtx insn; 5074{ 5075 enum rtx_code code = GET_CODE (x); 5076 int i, j; 5077 const char *fmt; 5078 5079 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x)) 5080 { 5081 /* This code used to ignore labels that referred to dispatch tables to 5082 avoid flow generating (slighly) worse code. 5083 5084 We no longer ignore such label references (see LABEL_REF handling in 5085 mark_jump_label for additional information). */ 5086 5087 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, XEXP (x, 0), 5088 REG_NOTES (insn)); 5089 if (LABEL_P (XEXP (x, 0))) 5090 LABEL_NUSES (XEXP (x, 0))++; 5091 return; 5092 } 5093 5094 for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) 5095 { 5096 if (fmt[i] == 'e') 5097 add_label_notes (XEXP (x, i), insn); 5098 else if (fmt[i] == 'E') 5099 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 5100 add_label_notes (XVECEXP (x, i, j), insn); 5101 } 5102} 5103 5104/* Compute transparent outgoing information for each block. 5105 5106 An expression is transparent to an edge unless it is killed by 5107 the edge itself. This can only happen with abnormal control flow, 5108 when the edge is traversed through a call. This happens with 5109 non-local labels and exceptions. 5110 5111 This would not be necessary if we split the edge. While this is 5112 normally impossible for abnormal critical edges, with some effort 5113 it should be possible with exception handling, since we still have 5114 control over which handler should be invoked. But due to increased 5115 EH table sizes, this may not be worthwhile. */ 5116 5117static void 5118compute_transpout () 5119{ 5120 int bb; 5121 unsigned int i; 5122 struct expr *expr; 5123 5124 sbitmap_vector_ones (transpout, n_basic_blocks); 5125 5126 for (bb = 0; bb < n_basic_blocks; ++bb) 5127 { 5128 /* Note that flow inserted a nop a the end of basic blocks that 5129 end in call instructions for reasons other than abnormal 5130 control flow. */ 5131 if (GET_CODE (BLOCK_END (bb)) != CALL_INSN) 5132 continue; 5133 5134 for (i = 0; i < expr_hash_table_size; i++) 5135 for (expr = expr_hash_table[i]; expr ; expr = expr->next_same_hash) 5136 if (GET_CODE (expr->expr) == MEM) 5137 { 5138 if (GET_CODE (XEXP (expr->expr, 0)) == SYMBOL_REF 5139 && CONSTANT_POOL_ADDRESS_P (XEXP (expr->expr, 0))) 5140 continue; 5141 5142 /* ??? Optimally, we would use interprocedural alias 5143 analysis to determine if this mem is actually killed 5144 by this call. */ 5145 RESET_BIT (transpout[bb], expr->bitmap_index); 5146 } 5147 } 5148} 5149 5150/* Removal of useless null pointer checks */ 5151 5152/* Called via note_stores. X is set by SETTER. If X is a register we must 5153 invalidate nonnull_local and set nonnull_killed. DATA is really a 5154 `null_pointer_info *'. 5155 5156 We ignore hard registers. */ 5157 5158static void 5159invalidate_nonnull_info (x, setter, data) 5160 rtx x; 5161 rtx setter ATTRIBUTE_UNUSED; 5162 void *data; 5163{ 5164 unsigned int regno; 5165 struct null_pointer_info *npi = (struct null_pointer_info *) data; 5166 5167 while (GET_CODE (x) == SUBREG) 5168 x = SUBREG_REG (x); 5169 5170 /* Ignore anything that is not a register or is a hard register. */ 5171 if (GET_CODE (x) != REG 5172 || REGNO (x) < npi->min_reg 5173 || REGNO (x) >= npi->max_reg) 5174 return; 5175 5176 regno = REGNO (x) - npi->min_reg; 5177 5178 RESET_BIT (npi->nonnull_local[npi->current_block], regno); 5179 SET_BIT (npi->nonnull_killed[npi->current_block], regno); 5180} 5181 5182/* Do null-pointer check elimination for the registers indicated in 5183 NPI. NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps; 5184 they are not our responsibility to free. */ 5185 5186static void 5187delete_null_pointer_checks_1 (block_reg, nonnull_avin, 5188 nonnull_avout, npi) 5189 unsigned int *block_reg; 5190 sbitmap *nonnull_avin; 5191 sbitmap *nonnull_avout; 5192 struct null_pointer_info *npi; 5193{ 5194 int bb; 5195 int current_block; 5196 sbitmap *nonnull_local = npi->nonnull_local; 5197 sbitmap *nonnull_killed = npi->nonnull_killed; 5198 5199 /* Compute local properties, nonnull and killed. A register will have 5200 the nonnull property if at the end of the current block its value is 5201 known to be nonnull. The killed property indicates that somewhere in 5202 the block any information we had about the register is killed. 5203 5204 Note that a register can have both properties in a single block. That 5205 indicates that it's killed, then later in the block a new value is 5206 computed. */ 5207 sbitmap_vector_zero (nonnull_local, n_basic_blocks); 5208 sbitmap_vector_zero (nonnull_killed, n_basic_blocks); 5209 5210 for (current_block = 0; current_block < n_basic_blocks; current_block++) 5211 { 5212 rtx insn, stop_insn; 5213 5214 /* Set the current block for invalidate_nonnull_info. */ 5215 npi->current_block = current_block; 5216 5217 /* Scan each insn in the basic block looking for memory references and 5218 register sets. */ 5219 stop_insn = NEXT_INSN (BLOCK_END (current_block)); 5220 for (insn = BLOCK_HEAD (current_block); 5221 insn != stop_insn; 5222 insn = NEXT_INSN (insn)) 5223 { 5224 rtx set; 5225 rtx reg; 5226 5227 /* Ignore anything that is not a normal insn. */ 5228 if (! INSN_P (insn)) 5229 continue; 5230 5231 /* Basically ignore anything that is not a simple SET. We do have 5232 to make sure to invalidate nonnull_local and set nonnull_killed 5233 for such insns though. */ 5234 set = single_set (insn); 5235 if (!set) 5236 { 5237 note_stores (PATTERN (insn), invalidate_nonnull_info, npi); 5238 continue; 5239 } 5240 5241 /* See if we've got a usable memory load. We handle it first 5242 in case it uses its address register as a dest (which kills 5243 the nonnull property). */ 5244 if (GET_CODE (SET_SRC (set)) == MEM 5245 && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG 5246 && REGNO (reg) >= npi->min_reg 5247 && REGNO (reg) < npi->max_reg) 5248 SET_BIT (nonnull_local[current_block], 5249 REGNO (reg) - npi->min_reg); 5250 5251 /* Now invalidate stuff clobbered by this insn. */ 5252 note_stores (PATTERN (insn), invalidate_nonnull_info, npi); 5253 5254 /* And handle stores, we do these last since any sets in INSN can 5255 not kill the nonnull property if it is derived from a MEM 5256 appearing in a SET_DEST. */ 5257 if (GET_CODE (SET_DEST (set)) == MEM 5258 && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG 5259 && REGNO (reg) >= npi->min_reg 5260 && REGNO (reg) < npi->max_reg) 5261 SET_BIT (nonnull_local[current_block], 5262 REGNO (reg) - npi->min_reg); 5263 } 5264 } 5265 5266 /* Now compute global properties based on the local properties. This 5267 is a classic global availablity algorithm. */ 5268 compute_available (nonnull_local, nonnull_killed, 5269 nonnull_avout, nonnull_avin); 5270 5271 /* Now look at each bb and see if it ends with a compare of a value 5272 against zero. */ 5273 for (bb = 0; bb < n_basic_blocks; bb++) 5274 { 5275 rtx last_insn = BLOCK_END (bb); 5276 rtx condition, earliest; 5277 int compare_and_branch; 5278 5279 /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and 5280 since BLOCK_REG[BB] is zero if this block did not end with a 5281 comparison against zero, this condition works. */ 5282 if (block_reg[bb] < npi->min_reg 5283 || block_reg[bb] >= npi->max_reg) 5284 continue; 5285 5286 /* LAST_INSN is a conditional jump. Get its condition. */ 5287 condition = get_condition (last_insn, &earliest); 5288 5289 /* If we can't determine the condition then skip. */ 5290 if (! condition) 5291 continue; 5292 5293 /* Is the register known to have a nonzero value? */ 5294 if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg)) 5295 continue; 5296 5297 /* Try to compute whether the compare/branch at the loop end is one or 5298 two instructions. */ 5299 if (earliest == last_insn) 5300 compare_and_branch = 1; 5301 else if (earliest == prev_nonnote_insn (last_insn)) 5302 compare_and_branch = 2; 5303 else 5304 continue; 5305 5306 /* We know the register in this comparison is nonnull at exit from 5307 this block. We can optimize this comparison. */ 5308 if (GET_CODE (condition) == NE) 5309 { 5310 rtx new_jump; 5311 5312 new_jump = emit_jump_insn_before (gen_jump (JUMP_LABEL (last_insn)), 5313 last_insn); 5314 JUMP_LABEL (new_jump) = JUMP_LABEL (last_insn); 5315 LABEL_NUSES (JUMP_LABEL (new_jump))++; 5316 emit_barrier_after (new_jump); 5317 } 5318 5319 delete_insn (last_insn); 5320 if (compare_and_branch == 2) 5321 delete_insn (earliest); 5322 purge_dead_edges (BASIC_BLOCK (bb)); 5323 5324 /* Don't check this block again. (Note that BLOCK_END is 5325 invalid here; we deleted the last instruction in the 5326 block.) */ 5327 block_reg[bb] = 0; 5328 } 5329} 5330 5331/* Find EQ/NE comparisons against zero which can be (indirectly) evaluated 5332 at compile time. 5333 5334 This is conceptually similar to global constant/copy propagation and 5335 classic global CSE (it even uses the same dataflow equations as cprop). 5336 5337 If a register is used as memory address with the form (mem (reg)), then we 5338 know that REG can not be zero at that point in the program. Any instruction 5339 which sets REG "kills" this property. 5340 5341 So, if every path leading to a conditional branch has an available memory 5342 reference of that form, then we know the register can not have the value 5343 zero at the conditional branch. 5344 5345 So we merely need to compute the local properies and propagate that data 5346 around the cfg, then optimize where possible. 5347 5348 We run this pass two times. Once before CSE, then again after CSE. This 5349 has proven to be the most profitable approach. It is rare for new 5350 optimization opportunities of this nature to appear after the first CSE 5351 pass. 5352 5353 This could probably be integrated with global cprop with a little work. */ 5354 5355void 5356delete_null_pointer_checks (f) 5357 rtx f ATTRIBUTE_UNUSED; 5358{ 5359 sbitmap *nonnull_avin, *nonnull_avout; 5360 unsigned int *block_reg; 5361 int bb; 5362 int reg; 5363 int regs_per_pass; 5364 int max_reg; 5365 struct null_pointer_info npi; 5366 5367 /* If we have only a single block, then there's nothing to do. */ 5368 if (n_basic_blocks <= 1) 5369 return; 5370 5371 /* Trying to perform global optimizations on flow graphs which have 5372 a high connectivity will take a long time and is unlikely to be 5373 particularly useful. 5374 5375 In normal circumstances a cfg should have about twice as many edges 5376 as blocks. But we do not want to punish small functions which have 5377 a couple switch statements. So we require a relatively large number 5378 of basic blocks and the ratio of edges to blocks to be high. */ 5379 if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20) 5380 return; 5381 5382 /* We need four bitmaps, each with a bit for each register in each 5383 basic block. */ 5384 max_reg = max_reg_num (); 5385 regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg); 5386 5387 /* Allocate bitmaps to hold local and global properties. */ 5388 npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass); 5389 npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass); 5390 nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass); 5391 nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass); 5392 5393 /* Go through the basic blocks, seeing whether or not each block 5394 ends with a conditional branch whose condition is a comparison 5395 against zero. Record the register compared in BLOCK_REG. */ 5396 block_reg = (unsigned int *) xcalloc (n_basic_blocks, sizeof (int)); 5397 for (bb = 0; bb < n_basic_blocks; bb++) 5398 { 5399 rtx last_insn = BLOCK_END (bb); 5400 rtx condition, earliest, reg; 5401 5402 /* We only want conditional branches. */ 5403 if (GET_CODE (last_insn) != JUMP_INSN 5404 || !any_condjump_p (last_insn) 5405 || !onlyjump_p (last_insn)) 5406 continue; 5407 5408 /* LAST_INSN is a conditional jump. Get its condition. */ 5409 condition = get_condition (last_insn, &earliest); 5410 5411 /* If we were unable to get the condition, or it is not an equality 5412 comparison against zero then there's nothing we can do. */ 5413 if (!condition 5414 || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ) 5415 || GET_CODE (XEXP (condition, 1)) != CONST_INT 5416 || (XEXP (condition, 1) 5417 != CONST0_RTX (GET_MODE (XEXP (condition, 0))))) 5418 continue; 5419 5420 /* We must be checking a register against zero. */ 5421 reg = XEXP (condition, 0); 5422 if (GET_CODE (reg) != REG) 5423 continue; 5424 5425 block_reg[bb] = REGNO (reg); 5426 } 5427 5428 /* Go through the algorithm for each block of registers. */ 5429 for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass) 5430 { 5431 npi.min_reg = reg; 5432 npi.max_reg = MIN (reg + regs_per_pass, max_reg); 5433 delete_null_pointer_checks_1 (block_reg, nonnull_avin, 5434 nonnull_avout, &npi); 5435 } 5436 5437 /* Free the table of registers compared at the end of every block. */ 5438 free (block_reg); 5439 5440 /* Free bitmaps. */ 5441 sbitmap_vector_free (npi.nonnull_local); 5442 sbitmap_vector_free (npi.nonnull_killed); 5443 sbitmap_vector_free (nonnull_avin); 5444 sbitmap_vector_free (nonnull_avout); 5445} 5446 5447/* Code Hoisting variables and subroutines. */ 5448 5449/* Very busy expressions. */ 5450static sbitmap *hoist_vbein; 5451static sbitmap *hoist_vbeout; 5452 5453/* Hoistable expressions. */ 5454static sbitmap *hoist_exprs; 5455 5456/* Dominator bitmaps. */ 5457static sbitmap *dominators; 5458 5459/* ??? We could compute post dominators and run this algorithm in 5460 reverse to to perform tail merging, doing so would probably be 5461 more effective than the tail merging code in jump.c. 5462 5463 It's unclear if tail merging could be run in parallel with 5464 code hoisting. It would be nice. */ 5465 5466/* Allocate vars used for code hoisting analysis. */ 5467 5468static void 5469alloc_code_hoist_mem (n_blocks, n_exprs) 5470 int n_blocks, n_exprs; 5471{ 5472 antloc = sbitmap_vector_alloc (n_blocks, n_exprs); 5473 transp = sbitmap_vector_alloc (n_blocks, n_exprs); 5474 comp = sbitmap_vector_alloc (n_blocks, n_exprs); 5475 5476 hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs); 5477 hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs); 5478 hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs); 5479 transpout = sbitmap_vector_alloc (n_blocks, n_exprs); 5480 5481 dominators = sbitmap_vector_alloc (n_blocks, n_blocks); 5482} 5483 5484/* Free vars used for code hoisting analysis. */ 5485 5486static void 5487free_code_hoist_mem () 5488{ 5489 sbitmap_vector_free (antloc); 5490 sbitmap_vector_free (transp); 5491 sbitmap_vector_free (comp); 5492 5493 sbitmap_vector_free (hoist_vbein); 5494 sbitmap_vector_free (hoist_vbeout); 5495 sbitmap_vector_free (hoist_exprs); 5496 sbitmap_vector_free (transpout); 5497 5498 sbitmap_vector_free (dominators); 5499} 5500 5501/* Compute the very busy expressions at entry/exit from each block. 5502 5503 An expression is very busy if all paths from a given point 5504 compute the expression. */ 5505 5506static void 5507compute_code_hoist_vbeinout () 5508{ 5509 int bb, changed, passes; 5510 5511 sbitmap_vector_zero (hoist_vbeout, n_basic_blocks); 5512 sbitmap_vector_zero (hoist_vbein, n_basic_blocks); 5513 5514 passes = 0; 5515 changed = 1; 5516 5517 while (changed) 5518 { 5519 changed = 0; 5520 5521 /* We scan the blocks in the reverse order to speed up 5522 the convergence. */ 5523 for (bb = n_basic_blocks - 1; bb >= 0; bb--) 5524 { 5525 changed |= sbitmap_a_or_b_and_c (hoist_vbein[bb], antloc[bb], 5526 hoist_vbeout[bb], transp[bb]); 5527 if (bb != n_basic_blocks - 1) 5528 sbitmap_intersection_of_succs (hoist_vbeout[bb], hoist_vbein, bb); 5529 } 5530 5531 passes++; 5532 } 5533 5534 if (gcse_file) 5535 fprintf (gcse_file, "hoisting vbeinout computation: %d passes\n", passes); 5536} 5537 5538/* Top level routine to do the dataflow analysis needed by code hoisting. */ 5539 5540static void 5541compute_code_hoist_data () 5542{ 5543 compute_local_properties (transp, comp, antloc, 0); 5544 compute_transpout (); 5545 compute_code_hoist_vbeinout (); 5546 calculate_dominance_info (NULL, dominators, CDI_DOMINATORS); 5547 if (gcse_file) 5548 fprintf (gcse_file, "\n"); 5549} 5550 5551/* Determine if the expression identified by EXPR_INDEX would 5552 reach BB unimpared if it was placed at the end of EXPR_BB. 5553 5554 It's unclear exactly what Muchnick meant by "unimpared". It seems 5555 to me that the expression must either be computed or transparent in 5556 *every* block in the path(s) from EXPR_BB to BB. Any other definition 5557 would allow the expression to be hoisted out of loops, even if 5558 the expression wasn't a loop invariant. 5559 5560 Contrast this to reachability for PRE where an expression is 5561 considered reachable if *any* path reaches instead of *all* 5562 paths. */ 5563 5564static int 5565hoist_expr_reaches_here_p (expr_bb, expr_index, bb, visited) 5566 basic_block expr_bb; 5567 int expr_index; 5568 basic_block bb; 5569 char *visited; 5570{ 5571 edge pred; 5572 int visited_allocated_locally = 0; 5573 5574 5575 if (visited == NULL) 5576 { 5577 visited_allocated_locally = 1; 5578 visited = xcalloc (n_basic_blocks, 1); 5579 } 5580 5581 for (pred = bb->pred; pred != NULL; pred = pred->pred_next) 5582 { 5583 basic_block pred_bb = pred->src; 5584 5585 if (pred->src == ENTRY_BLOCK_PTR) 5586 break; 5587 else if (visited[pred_bb->index]) 5588 continue; 5589 5590 /* Does this predecessor generate this expression? */ 5591 else if (TEST_BIT (comp[pred_bb->index], expr_index)) 5592 break; 5593 else if (! TEST_BIT (transp[pred_bb->index], expr_index)) 5594 break; 5595 5596 /* Not killed. */ 5597 else 5598 { 5599 visited[pred_bb->index] = 1; 5600 if (! hoist_expr_reaches_here_p (expr_bb, expr_index, 5601 pred_bb, visited)) 5602 break; 5603 } 5604 } 5605 if (visited_allocated_locally) 5606 free (visited); 5607 5608 return (pred == NULL); 5609} 5610 5611/* Actually perform code hoisting. */ 5612 5613static void 5614hoist_code () 5615{ 5616 int bb, dominated; 5617 unsigned int i; 5618 struct expr **index_map; 5619 struct expr *expr; 5620 5621 sbitmap_vector_zero (hoist_exprs, n_basic_blocks); 5622 5623 /* Compute a mapping from expression number (`bitmap_index') to 5624 hash table entry. */ 5625 5626 index_map = (struct expr **) xcalloc (n_exprs, sizeof (struct expr *)); 5627 for (i = 0; i < expr_hash_table_size; i++) 5628 for (expr = expr_hash_table[i]; expr != NULL; expr = expr->next_same_hash) 5629 index_map[expr->bitmap_index] = expr; 5630 5631 /* Walk over each basic block looking for potentially hoistable 5632 expressions, nothing gets hoisted from the entry block. */ 5633 for (bb = 0; bb < n_basic_blocks; bb++) 5634 { 5635 int found = 0; 5636 int insn_inserted_p; 5637 5638 /* Examine each expression that is very busy at the exit of this 5639 block. These are the potentially hoistable expressions. */ 5640 for (i = 0; i < hoist_vbeout[bb]->n_bits; i++) 5641 { 5642 int hoistable = 0; 5643 5644 if (TEST_BIT (hoist_vbeout[bb], i) && TEST_BIT (transpout[bb], i)) 5645 { 5646 /* We've found a potentially hoistable expression, now 5647 we look at every block BB dominates to see if it 5648 computes the expression. */ 5649 for (dominated = 0; dominated < n_basic_blocks; dominated++) 5650 { 5651 /* Ignore self dominance. */ 5652 if (bb == dominated 5653 || ! TEST_BIT (dominators[dominated], bb)) 5654 continue; 5655 5656 /* We've found a dominated block, now see if it computes 5657 the busy expression and whether or not moving that 5658 expression to the "beginning" of that block is safe. */ 5659 if (!TEST_BIT (antloc[dominated], i)) 5660 continue; 5661 5662 /* Note if the expression would reach the dominated block 5663 unimpared if it was placed at the end of BB. 5664 5665 Keep track of how many times this expression is hoistable 5666 from a dominated block into BB. */ 5667 if (hoist_expr_reaches_here_p (BASIC_BLOCK (bb), i, 5668 BASIC_BLOCK (dominated), NULL)) 5669 hoistable++; 5670 } 5671 5672 /* If we found more than one hoistable occurrence of this 5673 expression, then note it in the bitmap of expressions to 5674 hoist. It makes no sense to hoist things which are computed 5675 in only one BB, and doing so tends to pessimize register 5676 allocation. One could increase this value to try harder 5677 to avoid any possible code expansion due to register 5678 allocation issues; however experiments have shown that 5679 the vast majority of hoistable expressions are only movable 5680 from two successors, so raising this threshhold is likely 5681 to nullify any benefit we get from code hoisting. */ 5682 if (hoistable > 1) 5683 { 5684 SET_BIT (hoist_exprs[bb], i); 5685 found = 1; 5686 } 5687 } 5688 } 5689 5690 /* If we found nothing to hoist, then quit now. */ 5691 if (! found) 5692 continue; 5693 5694 /* Loop over all the hoistable expressions. */ 5695 for (i = 0; i < hoist_exprs[bb]->n_bits; i++) 5696 { 5697 /* We want to insert the expression into BB only once, so 5698 note when we've inserted it. */ 5699 insn_inserted_p = 0; 5700 5701 /* These tests should be the same as the tests above. */ 5702 if (TEST_BIT (hoist_vbeout[bb], i)) 5703 { 5704 /* We've found a potentially hoistable expression, now 5705 we look at every block BB dominates to see if it 5706 computes the expression. */ 5707 for (dominated = 0; dominated < n_basic_blocks; dominated++) 5708 { 5709 /* Ignore self dominance. */ 5710 if (bb == dominated 5711 || ! TEST_BIT (dominators[dominated], bb)) 5712 continue; 5713 5714 /* We've found a dominated block, now see if it computes 5715 the busy expression and whether or not moving that 5716 expression to the "beginning" of that block is safe. */ 5717 if (!TEST_BIT (antloc[dominated], i)) 5718 continue; 5719 5720 /* The expression is computed in the dominated block and 5721 it would be safe to compute it at the start of the 5722 dominated block. Now we have to determine if the 5723 expression would reach the dominated block if it was 5724 placed at the end of BB. */ 5725 if (hoist_expr_reaches_here_p (BASIC_BLOCK (bb), i, 5726 BASIC_BLOCK (dominated), NULL)) 5727 { 5728 struct expr *expr = index_map[i]; 5729 struct occr *occr = expr->antic_occr; 5730 rtx insn; 5731 rtx set; 5732 5733 /* Find the right occurrence of this expression. */ 5734 while (BLOCK_NUM (occr->insn) != dominated && occr) 5735 occr = occr->next; 5736 5737 /* Should never happen. */ 5738 if (!occr) 5739 abort (); 5740 5741 insn = occr->insn; 5742 5743 set = single_set (insn); 5744 if (! set) 5745 abort (); 5746 5747 /* Create a pseudo-reg to store the result of reaching 5748 expressions into. Get the mode for the new pseudo 5749 from the mode of the original destination pseudo. */ 5750 if (expr->reaching_reg == NULL) 5751 expr->reaching_reg 5752 = gen_reg_rtx (GET_MODE (SET_DEST (set))); 5753 5754 /* In theory this should never fail since we're creating 5755 a reg->reg copy. 5756 5757 However, on the x86 some of the movXX patterns 5758 actually contain clobbers of scratch regs. This may 5759 cause the insn created by validate_change to not 5760 match any pattern and thus cause validate_change to 5761 fail. */ 5762 if (validate_change (insn, &SET_SRC (set), 5763 expr->reaching_reg, 0)) 5764 { 5765 occr->deleted_p = 1; 5766 if (!insn_inserted_p) 5767 { 5768 insert_insn_end_bb (index_map[i], 5769 BASIC_BLOCK (bb), 0); 5770 insn_inserted_p = 1; 5771 } 5772 } 5773 } 5774 } 5775 } 5776 } 5777 } 5778 5779 free (index_map); 5780} 5781 5782/* Top level routine to perform one code hoisting (aka unification) pass 5783 5784 Return non-zero if a change was made. */ 5785 5786static int 5787one_code_hoisting_pass () 5788{ 5789 int changed = 0; 5790 5791 alloc_expr_hash_table (max_cuid); 5792 compute_expr_hash_table (); 5793 if (gcse_file) 5794 dump_hash_table (gcse_file, "Code Hosting Expressions", expr_hash_table, 5795 expr_hash_table_size, n_exprs); 5796 5797 if (n_exprs > 0) 5798 { 5799 alloc_code_hoist_mem (n_basic_blocks, n_exprs); 5800 compute_code_hoist_data (); 5801 hoist_code (); 5802 free_code_hoist_mem (); 5803 } 5804 5805 free_expr_hash_table (); 5806 5807 return changed; 5808} 5809 5810/* Here we provide the things required to do store motion towards 5811 the exit. In order for this to be effective, gcse also needed to 5812 be taught how to move a load when it is kill only by a store to itself. 5813 5814 int i; 5815 float a[10]; 5816 5817 void foo(float scale) 5818 { 5819 for (i=0; i<10; i++) 5820 a[i] *= scale; 5821 } 5822 5823 'i' is both loaded and stored to in the loop. Normally, gcse cannot move 5824 the load out since its live around the loop, and stored at the bottom 5825 of the loop. 5826 5827 The 'Load Motion' referred to and implemented in this file is 5828 an enhancement to gcse which when using edge based lcm, recognizes 5829 this situation and allows gcse to move the load out of the loop. 5830 5831 Once gcse has hoisted the load, store motion can then push this 5832 load towards the exit, and we end up with no loads or stores of 'i' 5833 in the loop. */ 5834 5835/* This will search the ldst list for a matching expression. If it 5836 doesn't find one, we create one and initialize it. */ 5837 5838static struct ls_expr * 5839ldst_entry (x) 5840 rtx x; 5841{ 5842 struct ls_expr * ptr; 5843 5844 for (ptr = first_ls_expr(); ptr != NULL; ptr = next_ls_expr (ptr)) 5845 if (expr_equiv_p (ptr->pattern, x)) 5846 break; 5847 5848 if (!ptr) 5849 { 5850 ptr = (struct ls_expr *) xmalloc (sizeof (struct ls_expr)); 5851 5852 ptr->next = pre_ldst_mems; 5853 ptr->expr = NULL; 5854 ptr->pattern = x; 5855 ptr->loads = NULL_RTX; 5856 ptr->stores = NULL_RTX; 5857 ptr->reaching_reg = NULL_RTX; 5858 ptr->invalid = 0; 5859 ptr->index = 0; 5860 ptr->hash_index = 0; 5861 pre_ldst_mems = ptr; 5862 } 5863 5864 return ptr; 5865} 5866 5867/* Free up an individual ldst entry. */ 5868 5869static void 5870free_ldst_entry (ptr) 5871 struct ls_expr * ptr; 5872{ 5873 free_INSN_LIST_list (& ptr->loads); 5874 free_INSN_LIST_list (& ptr->stores); 5875 5876 free (ptr); 5877} 5878 5879/* Free up all memory associated with the ldst list. */ 5880 5881static void 5882free_ldst_mems () 5883{ 5884 while (pre_ldst_mems) 5885 { 5886 struct ls_expr * tmp = pre_ldst_mems; 5887 5888 pre_ldst_mems = pre_ldst_mems->next; 5889 5890 free_ldst_entry (tmp); 5891 } 5892 5893 pre_ldst_mems = NULL; 5894} 5895 5896/* Dump debugging info about the ldst list. */ 5897 5898static void 5899print_ldst_list (file) 5900 FILE * file; 5901{ 5902 struct ls_expr * ptr; 5903 5904 fprintf (file, "LDST list: \n"); 5905 5906 for (ptr = first_ls_expr(); ptr != NULL; ptr = next_ls_expr (ptr)) 5907 { 5908 fprintf (file, " Pattern (%3d): ", ptr->index); 5909 5910 print_rtl (file, ptr->pattern); 5911 5912 fprintf (file, "\n Loads : "); 5913 5914 if (ptr->loads) 5915 print_rtl (file, ptr->loads); 5916 else 5917 fprintf (file, "(nil)"); 5918 5919 fprintf (file, "\n Stores : "); 5920 5921 if (ptr->stores) 5922 print_rtl (file, ptr->stores); 5923 else 5924 fprintf (file, "(nil)"); 5925 5926 fprintf (file, "\n\n"); 5927 } 5928 5929 fprintf (file, "\n"); 5930} 5931 5932/* Returns 1 if X is in the list of ldst only expressions. */ 5933 5934static struct ls_expr * 5935find_rtx_in_ldst (x) 5936 rtx x; 5937{ 5938 struct ls_expr * ptr; 5939 5940 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next) 5941 if (expr_equiv_p (ptr->pattern, x) && ! ptr->invalid) 5942 return ptr; 5943 5944 return NULL; 5945} 5946 5947/* Assign each element of the list of mems a monotonically increasing value. */ 5948 5949static int 5950enumerate_ldsts () 5951{ 5952 struct ls_expr * ptr; 5953 int n = 0; 5954 5955 for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next) 5956 ptr->index = n++; 5957 5958 return n; 5959} 5960 5961/* Return first item in the list. */ 5962 5963static inline struct ls_expr * 5964first_ls_expr () 5965{ 5966 return pre_ldst_mems; 5967} 5968 5969/* Return the next item in ther list after the specified one. */ 5970 5971static inline struct ls_expr * 5972next_ls_expr (ptr) 5973 struct ls_expr * ptr; 5974{ 5975 return ptr->next; 5976} 5977 5978/* Load Motion for loads which only kill themselves. */ 5979 5980/* Return true if x is a simple MEM operation, with no registers or 5981 side effects. These are the types of loads we consider for the 5982 ld_motion list, otherwise we let the usual aliasing take care of it. */ 5983 5984static int 5985simple_mem (x) 5986 rtx x; 5987{ 5988 if (GET_CODE (x) != MEM) 5989 return 0; 5990 5991 if (MEM_VOLATILE_P (x)) 5992 return 0; 5993 5994 if (GET_MODE (x) == BLKmode) 5995 return 0; 5996 5997 if (!rtx_varies_p (XEXP (x, 0), 0)) 5998 return 1; 5999 6000 return 0; 6001} 6002 6003/* Make sure there isn't a buried reference in this pattern anywhere. 6004 If there is, invalidate the entry for it since we're not capable 6005 of fixing it up just yet.. We have to be sure we know about ALL 6006 loads since the aliasing code will allow all entries in the 6007 ld_motion list to not-alias itself. If we miss a load, we will get 6008 the wrong value since gcse might common it and we won't know to 6009 fix it up. */ 6010 6011static void 6012invalidate_any_buried_refs (x) 6013 rtx x; 6014{ 6015 const char * fmt; 6016 int i, j; 6017 struct ls_expr * ptr; 6018 6019 /* Invalidate it in the list. */ 6020 if (GET_CODE (x) == MEM && simple_mem (x)) 6021 { 6022 ptr = ldst_entry (x); 6023 ptr->invalid = 1; 6024 } 6025 6026 /* Recursively process the insn. */ 6027 fmt = GET_RTX_FORMAT (GET_CODE (x)); 6028 6029 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 6030 { 6031 if (fmt[i] == 'e') 6032 invalidate_any_buried_refs (XEXP (x, i)); 6033 else if (fmt[i] == 'E') 6034 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 6035 invalidate_any_buried_refs (XVECEXP (x, i, j)); 6036 } 6037} 6038 6039/* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple 6040 being defined as MEM loads and stores to symbols, with no 6041 side effects and no registers in the expression. If there are any 6042 uses/defs which don't match this criteria, it is invalidated and 6043 trimmed out later. */ 6044 6045static void 6046compute_ld_motion_mems () 6047{ 6048 struct ls_expr * ptr; 6049 int bb; 6050 rtx insn; 6051 6052 pre_ldst_mems = NULL; 6053 6054 for (bb = 0; bb < n_basic_blocks; bb++) 6055 { 6056 for (insn = BLOCK_HEAD (bb); 6057 insn && insn != NEXT_INSN (BLOCK_END (bb)); 6058 insn = NEXT_INSN (insn)) 6059 { 6060 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i') 6061 { 6062 if (GET_CODE (PATTERN (insn)) == SET) 6063 { 6064 rtx src = SET_SRC (PATTERN (insn)); 6065 rtx dest = SET_DEST (PATTERN (insn)); 6066 6067 /* Check for a simple LOAD... */ 6068 if (GET_CODE (src) == MEM && simple_mem (src)) 6069 { 6070 ptr = ldst_entry (src); 6071 if (GET_CODE (dest) == REG) 6072 ptr->loads = alloc_INSN_LIST (insn, ptr->loads); 6073 else 6074 ptr->invalid = 1; 6075 } 6076 else 6077 { 6078 /* Make sure there isn't a buried load somewhere. */ 6079 invalidate_any_buried_refs (src); 6080 } 6081 6082 /* Check for stores. Don't worry about aliased ones, they 6083 will block any movement we might do later. We only care 6084 about this exact pattern since those are the only 6085 circumstance that we will ignore the aliasing info. */ 6086 if (GET_CODE (dest) == MEM && simple_mem (dest)) 6087 { 6088 ptr = ldst_entry (dest); 6089 6090 if (GET_CODE (src) != MEM 6091 && GET_CODE (src) != ASM_OPERANDS) 6092 ptr->stores = alloc_INSN_LIST (insn, ptr->stores); 6093 else 6094 ptr->invalid = 1; 6095 } 6096 } 6097 else 6098 invalidate_any_buried_refs (PATTERN (insn)); 6099 } 6100 } 6101 } 6102} 6103 6104/* Remove any references that have been either invalidated or are not in the 6105 expression list for pre gcse. */ 6106 6107static void 6108trim_ld_motion_mems () 6109{ 6110 struct ls_expr * last = NULL; 6111 struct ls_expr * ptr = first_ls_expr (); 6112 6113 while (ptr != NULL) 6114 { 6115 int del = ptr->invalid; 6116 struct expr * expr = NULL; 6117 6118 /* Delete if entry has been made invalid. */ 6119 if (!del) 6120 { 6121 unsigned int i; 6122 6123 del = 1; 6124 /* Delete if we cannot find this mem in the expression list. */ 6125 for (i = 0; i < expr_hash_table_size && del; i++) 6126 { 6127 for (expr = expr_hash_table[i]; 6128 expr != NULL; 6129 expr = expr->next_same_hash) 6130 if (expr_equiv_p (expr->expr, ptr->pattern)) 6131 { 6132 del = 0; 6133 break; 6134 } 6135 } 6136 } 6137 6138 if (del) 6139 { 6140 if (last != NULL) 6141 { 6142 last->next = ptr->next; 6143 free_ldst_entry (ptr); 6144 ptr = last->next; 6145 } 6146 else 6147 { 6148 pre_ldst_mems = pre_ldst_mems->next; 6149 free_ldst_entry (ptr); 6150 ptr = pre_ldst_mems; 6151 } 6152 } 6153 else 6154 { 6155 /* Set the expression field if we are keeping it. */ 6156 last = ptr; 6157 ptr->expr = expr; 6158 ptr = ptr->next; 6159 } 6160 } 6161 6162 /* Show the world what we've found. */ 6163 if (gcse_file && pre_ldst_mems != NULL) 6164 print_ldst_list (gcse_file); 6165} 6166 6167/* This routine will take an expression which we are replacing with 6168 a reaching register, and update any stores that are needed if 6169 that expression is in the ld_motion list. Stores are updated by 6170 copying their SRC to the reaching register, and then storeing 6171 the reaching register into the store location. These keeps the 6172 correct value in the reaching register for the loads. */ 6173 6174static void 6175update_ld_motion_stores (expr) 6176 struct expr * expr; 6177{ 6178 struct ls_expr * mem_ptr; 6179 6180 if ((mem_ptr = find_rtx_in_ldst (expr->expr))) 6181 { 6182 /* We can try to find just the REACHED stores, but is shouldn't 6183 matter to set the reaching reg everywhere... some might be 6184 dead and should be eliminated later. */ 6185 6186 /* We replace SET mem = expr with 6187 SET reg = expr 6188 SET mem = reg , where reg is the 6189 reaching reg used in the load. */ 6190 rtx list = mem_ptr->stores; 6191 6192 for ( ; list != NULL_RTX; list = XEXP (list, 1)) 6193 { 6194 rtx insn = XEXP (list, 0); 6195 rtx pat = PATTERN (insn); 6196 rtx src = SET_SRC (pat); 6197 rtx reg = expr->reaching_reg; 6198 rtx copy, new; 6199 6200 /* If we've already copied it, continue. */ 6201 if (expr->reaching_reg == src) 6202 continue; 6203 6204 if (gcse_file) 6205 { 6206 fprintf (gcse_file, "PRE: store updated with reaching reg "); 6207 print_rtl (gcse_file, expr->reaching_reg); 6208 fprintf (gcse_file, ":\n "); 6209 print_inline_rtx (gcse_file, insn, 8); 6210 fprintf (gcse_file, "\n"); 6211 } 6212 6213 copy = gen_move_insn ( reg, SET_SRC (pat)); 6214 new = emit_insn_before (copy, insn); 6215 record_one_set (REGNO (reg), new); 6216 SET_SRC (pat) = reg; 6217 6218 /* un-recognize this pattern since it's probably different now. */ 6219 INSN_CODE (insn) = -1; 6220 gcse_create_count++; 6221 } 6222 } 6223} 6224 6225/* Store motion code. */ 6226 6227/* This is used to communicate the target bitvector we want to use in the 6228 reg_set_info routine when called via the note_stores mechanism. */ 6229static sbitmap * regvec; 6230 6231/* Used in computing the reverse edge graph bit vectors. */ 6232static sbitmap * st_antloc; 6233 6234/* Global holding the number of store expressions we are dealing with. */ 6235static int num_stores; 6236 6237/* Checks to set if we need to mark a register set. Called from note_stores. */ 6238 6239static void 6240reg_set_info (dest, setter, data) 6241 rtx dest, setter ATTRIBUTE_UNUSED; 6242 void * data ATTRIBUTE_UNUSED; 6243{ 6244 if (GET_CODE (dest) == SUBREG) 6245 dest = SUBREG_REG (dest); 6246 6247 if (GET_CODE (dest) == REG) 6248 SET_BIT (*regvec, REGNO (dest)); 6249} 6250 6251/* Return non-zero if the register operands of expression X are killed 6252 anywhere in basic block BB. */ 6253 6254static int 6255store_ops_ok (x, bb) 6256 rtx x; 6257 basic_block bb; 6258{ 6259 int i; 6260 enum rtx_code code; 6261 const char * fmt; 6262 6263 /* Repeat is used to turn tail-recursion into iteration. */ 6264 repeat: 6265 6266 if (x == 0) 6267 return 1; 6268 6269 code = GET_CODE (x); 6270 switch (code) 6271 { 6272 case REG: 6273 /* If a reg has changed after us in this 6274 block, the operand has been killed. */ 6275 return TEST_BIT (reg_set_in_block[bb->index], REGNO (x)); 6276 6277 case MEM: 6278 x = XEXP (x, 0); 6279 goto repeat; 6280 6281 case PRE_DEC: 6282 case PRE_INC: 6283 case POST_DEC: 6284 case POST_INC: 6285 return 0; 6286 6287 case PC: 6288 case CC0: /*FIXME*/ 6289 case CONST: 6290 case CONST_INT: 6291 case CONST_DOUBLE: 6292 case SYMBOL_REF: 6293 case LABEL_REF: 6294 case ADDR_VEC: 6295 case ADDR_DIFF_VEC: 6296 return 1; 6297 6298 default: 6299 break; 6300 } 6301 6302 i = GET_RTX_LENGTH (code) - 1; 6303 fmt = GET_RTX_FORMAT (code); 6304 6305 for (; i >= 0; i--) 6306 { 6307 if (fmt[i] == 'e') 6308 { 6309 rtx tem = XEXP (x, i); 6310 6311 /* If we are about to do the last recursive call 6312 needed at this level, change it into iteration. 6313 This function is called enough to be worth it. */ 6314 if (i == 0) 6315 { 6316 x = tem; 6317 goto repeat; 6318 } 6319 6320 if (! store_ops_ok (tem, bb)) 6321 return 0; 6322 } 6323 else if (fmt[i] == 'E') 6324 { 6325 int j; 6326 6327 for (j = 0; j < XVECLEN (x, i); j++) 6328 { 6329 if (! store_ops_ok (XVECEXP (x, i, j), bb)) 6330 return 0; 6331 } 6332 } 6333 } 6334 6335 return 1; 6336} 6337 6338/* Determine whether insn is MEM store pattern that we will consider moving. */ 6339 6340static void 6341find_moveable_store (insn) 6342 rtx insn; 6343{ 6344 struct ls_expr * ptr; 6345 rtx dest = PATTERN (insn); 6346 6347 if (GET_CODE (dest) != SET 6348 || GET_CODE (SET_SRC (dest)) == ASM_OPERANDS) 6349 return; 6350 6351 dest = SET_DEST (dest); 6352 6353 if (GET_CODE (dest) != MEM || MEM_VOLATILE_P (dest) 6354 || GET_MODE (dest) == BLKmode) 6355 return; 6356 6357 if (GET_CODE (XEXP (dest, 0)) != SYMBOL_REF) 6358 return; 6359 6360 if (rtx_varies_p (XEXP (dest, 0), 0)) 6361 return; 6362 6363 ptr = ldst_entry (dest); 6364 ptr->stores = alloc_INSN_LIST (insn, ptr->stores); 6365} 6366 6367/* Perform store motion. Much like gcse, except we move expressions the 6368 other way by looking at the flowgraph in reverse. */ 6369 6370static int 6371compute_store_table () 6372{ 6373 int bb, ret; 6374 unsigned regno; 6375 rtx insn, pat; 6376 6377 max_gcse_regno = max_reg_num (); 6378 6379 reg_set_in_block = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, 6380 max_gcse_regno); 6381 sbitmap_vector_zero (reg_set_in_block, n_basic_blocks); 6382 pre_ldst_mems = 0; 6383 6384 /* Find all the stores we care about. */ 6385 for (bb = 0; bb < n_basic_blocks; bb++) 6386 { 6387 regvec = & (reg_set_in_block[bb]); 6388 for (insn = BLOCK_END (bb); 6389 insn && insn != PREV_INSN (BLOCK_HEAD (bb)); 6390 insn = PREV_INSN (insn)) 6391 { 6392 /* Ignore anything that is not a normal insn. */ 6393 if (! INSN_P (insn)) 6394 continue; 6395 6396 if (GET_CODE (insn) == CALL_INSN) 6397 { 6398 bool clobbers_all = false; 6399#ifdef NON_SAVING_SETJMP 6400 if (NON_SAVING_SETJMP 6401 && find_reg_note (insn, REG_SETJMP, NULL_RTX)) 6402 clobbers_all = true; 6403#endif 6404 6405 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 6406 if (clobbers_all 6407 || TEST_HARD_REG_BIT (regs_invalidated_by_call, regno)) 6408 SET_BIT (reg_set_in_block[bb], regno); 6409 } 6410 6411 pat = PATTERN (insn); 6412 note_stores (pat, reg_set_info, NULL); 6413 6414 /* Now that we've marked regs, look for stores. */ 6415 if (GET_CODE (pat) == SET) 6416 find_moveable_store (insn); 6417 } 6418 } 6419 6420 ret = enumerate_ldsts (); 6421 6422 if (gcse_file) 6423 { 6424 fprintf (gcse_file, "Store Motion Expressions.\n"); 6425 print_ldst_list (gcse_file); 6426 } 6427 6428 return ret; 6429} 6430 6431/* Check to see if the load X is aliased with STORE_PATTERN. */ 6432 6433static int 6434load_kills_store (x, store_pattern) 6435 rtx x, store_pattern; 6436{ 6437 if (true_dependence (x, GET_MODE (x), store_pattern, rtx_addr_varies_p)) 6438 return 1; 6439 return 0; 6440} 6441 6442/* Go through the entire insn X, looking for any loads which might alias 6443 STORE_PATTERN. Return 1 if found. */ 6444 6445static int 6446find_loads (x, store_pattern) 6447 rtx x, store_pattern; 6448{ 6449 const char * fmt; 6450 int i, j; 6451 int ret = 0; 6452 6453 if (!x) 6454 return 0; 6455 6456 if (GET_CODE (x) == SET) 6457 x = SET_SRC (x); 6458 6459 if (GET_CODE (x) == MEM) 6460 { 6461 if (load_kills_store (x, store_pattern)) 6462 return 1; 6463 } 6464 6465 /* Recursively process the insn. */ 6466 fmt = GET_RTX_FORMAT (GET_CODE (x)); 6467 6468 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0 && !ret; i--) 6469 { 6470 if (fmt[i] == 'e') 6471 ret |= find_loads (XEXP (x, i), store_pattern); 6472 else if (fmt[i] == 'E') 6473 for (j = XVECLEN (x, i) - 1; j >= 0; j--) 6474 ret |= find_loads (XVECEXP (x, i, j), store_pattern); 6475 } 6476 return ret; 6477} 6478 6479/* Check if INSN kills the store pattern X (is aliased with it). 6480 Return 1 if it it does. */ 6481 6482static int 6483store_killed_in_insn (x, insn) 6484 rtx x, insn; 6485{ 6486 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') 6487 return 0; 6488 6489 if (GET_CODE (insn) == CALL_INSN) 6490 { 6491 /* A normal or pure call might read from pattern, 6492 but a const call will not. */ 6493 if (CONST_OR_PURE_CALL_P (insn)) 6494 { 6495 rtx link; 6496 6497 for (link = CALL_INSN_FUNCTION_USAGE (insn); 6498 link; 6499 link = XEXP (link, 1)) 6500 if (GET_CODE (XEXP (link, 0)) == USE 6501 && GET_CODE (XEXP (XEXP (link, 0), 0)) == MEM 6502 && GET_CODE (XEXP (XEXP (XEXP (link, 0), 0), 0)) == SCRATCH) 6503 return 1; 6504 return 0; 6505 } 6506 else 6507 return 1; 6508 } 6509 6510 if (GET_CODE (PATTERN (insn)) == SET) 6511 { 6512 rtx pat = PATTERN (insn); 6513 /* Check for memory stores to aliased objects. */ 6514 if (GET_CODE (SET_DEST (pat)) == MEM && !expr_equiv_p (SET_DEST (pat), x)) 6515 /* pretend its a load and check for aliasing. */ 6516 if (find_loads (SET_DEST (pat), x)) 6517 return 1; 6518 return find_loads (SET_SRC (pat), x); 6519 } 6520 else 6521 return find_loads (PATTERN (insn), x); 6522} 6523 6524/* Returns 1 if the expression X is loaded or clobbered on or after INSN 6525 within basic block BB. */ 6526 6527static int 6528store_killed_after (x, insn, bb) 6529 rtx x, insn; 6530 basic_block bb; 6531{ 6532 rtx last = bb->end; 6533 6534 if (insn == last) 6535 return 0; 6536 6537 /* Check if the register operands of the store are OK in this block. 6538 Note that if registers are changed ANYWHERE in the block, we'll 6539 decide we can't move it, regardless of whether it changed above 6540 or below the store. This could be improved by checking the register 6541 operands while lookinng for aliasing in each insn. */ 6542 if (!store_ops_ok (XEXP (x, 0), bb)) 6543 return 1; 6544 6545 for ( ; insn && insn != NEXT_INSN (last); insn = NEXT_INSN (insn)) 6546 if (store_killed_in_insn (x, insn)) 6547 return 1; 6548 6549 return 0; 6550} 6551 6552/* Returns 1 if the expression X is loaded or clobbered on or before INSN 6553 within basic block BB. */ 6554static int 6555store_killed_before (x, insn, bb) 6556 rtx x, insn; 6557 basic_block bb; 6558{ 6559 rtx first = bb->head; 6560 6561 if (insn == first) 6562 return store_killed_in_insn (x, insn); 6563 6564 /* Check if the register operands of the store are OK in this block. 6565 Note that if registers are changed ANYWHERE in the block, we'll 6566 decide we can't move it, regardless of whether it changed above 6567 or below the store. This could be improved by checking the register 6568 operands while lookinng for aliasing in each insn. */ 6569 if (!store_ops_ok (XEXP (x, 0), bb)) 6570 return 1; 6571 6572 for ( ; insn && insn != PREV_INSN (first); insn = PREV_INSN (insn)) 6573 if (store_killed_in_insn (x, insn)) 6574 return 1; 6575 6576 return 0; 6577} 6578 6579#define ANTIC_STORE_LIST(x) ((x)->loads) 6580#define AVAIL_STORE_LIST(x) ((x)->stores) 6581 6582/* Given the table of available store insns at the end of blocks, 6583 determine which ones are not killed by aliasing, and generate 6584 the appropriate vectors for gen and killed. */ 6585static void 6586build_store_vectors () 6587{ 6588 basic_block bb; 6589 int b; 6590 rtx insn, st; 6591 struct ls_expr * ptr; 6592 6593 /* Build the gen_vector. This is any store in the table which is not killed 6594 by aliasing later in its block. */ 6595 ae_gen = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, num_stores); 6596 sbitmap_vector_zero (ae_gen, n_basic_blocks); 6597 6598 st_antloc = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, num_stores); 6599 sbitmap_vector_zero (st_antloc, n_basic_blocks); 6600 6601 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr)) 6602 { 6603 /* Put all the stores into either the antic list, or the avail list, 6604 or both. */ 6605 rtx store_list = ptr->stores; 6606 ptr->stores = NULL_RTX; 6607 6608 for (st = store_list; st != NULL; st = XEXP (st, 1)) 6609 { 6610 insn = XEXP (st, 0); 6611 bb = BLOCK_FOR_INSN (insn); 6612 6613 if (!store_killed_after (ptr->pattern, insn, bb)) 6614 { 6615 /* If we've already seen an availale expression in this block, 6616 we can delete the one we saw already (It occurs earlier in 6617 the block), and replace it with this one). We'll copy the 6618 old SRC expression to an unused register in case there 6619 are any side effects. */ 6620 if (TEST_BIT (ae_gen[bb->index], ptr->index)) 6621 { 6622 /* Find previous store. */ 6623 rtx st; 6624 for (st = AVAIL_STORE_LIST (ptr); st ; st = XEXP (st, 1)) 6625 if (BLOCK_FOR_INSN (XEXP (st, 0)) == bb) 6626 break; 6627 if (st) 6628 { 6629 rtx r = gen_reg_rtx (GET_MODE (ptr->pattern)); 6630 if (gcse_file) 6631 fprintf (gcse_file, "Removing redundant store:\n"); 6632 replace_store_insn (r, XEXP (st, 0), bb); 6633 XEXP (st, 0) = insn; 6634 continue; 6635 } 6636 } 6637 SET_BIT (ae_gen[bb->index], ptr->index); 6638 AVAIL_STORE_LIST (ptr) = alloc_INSN_LIST (insn, 6639 AVAIL_STORE_LIST (ptr)); 6640 } 6641 6642 if (!store_killed_before (ptr->pattern, insn, bb)) 6643 { 6644 SET_BIT (st_antloc[BLOCK_NUM (insn)], ptr->index); 6645 ANTIC_STORE_LIST (ptr) = alloc_INSN_LIST (insn, 6646 ANTIC_STORE_LIST (ptr)); 6647 } 6648 } 6649 6650 /* Free the original list of store insns. */ 6651 free_INSN_LIST_list (&store_list); 6652 } 6653 6654 ae_kill = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, num_stores); 6655 sbitmap_vector_zero (ae_kill, n_basic_blocks); 6656 6657 transp = (sbitmap *) sbitmap_vector_alloc (n_basic_blocks, num_stores); 6658 sbitmap_vector_zero (transp, n_basic_blocks); 6659 6660 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr)) 6661 for (b = 0; b < n_basic_blocks; b++) 6662 { 6663 if (store_killed_after (ptr->pattern, BLOCK_HEAD (b), BASIC_BLOCK (b))) 6664 { 6665 /* The anticipatable expression is not killed if it's gen'd. */ 6666 /* 6667 We leave this check out for now. If we have a code sequence 6668 in a block which looks like: 6669 ST MEMa = x 6670 L y = MEMa 6671 ST MEMa = z 6672 We should flag this as having an ANTIC expression, NOT 6673 transparent, NOT killed, and AVAIL. 6674 Unfortunately, since we haven't re-written all loads to 6675 use the reaching reg, we'll end up doing an incorrect 6676 Load in the middle here if we push the store down. It happens in 6677 gcc.c-torture/execute/960311-1.c with -O3 6678 If we always kill it in this case, we'll sometimes do 6679 uneccessary work, but it shouldn't actually hurt anything. 6680 if (!TEST_BIT (ae_gen[b], ptr->index)). */ 6681 SET_BIT (ae_kill[b], ptr->index); 6682 } 6683 else 6684 SET_BIT (transp[b], ptr->index); 6685 } 6686 6687 /* Any block with no exits calls some non-returning function, so 6688 we better mark the store killed here, or we might not store to 6689 it at all. If we knew it was abort, we wouldn't have to store, 6690 but we don't know that for sure. */ 6691 if (gcse_file) 6692 { 6693 fprintf (gcse_file, "ST_avail and ST_antic (shown under loads..)\n"); 6694 print_ldst_list (gcse_file); 6695 dump_sbitmap_vector (gcse_file, "st_antloc", "", st_antloc, n_basic_blocks); 6696 dump_sbitmap_vector (gcse_file, "st_kill", "", ae_kill, n_basic_blocks); 6697 dump_sbitmap_vector (gcse_file, "Transpt", "", transp, n_basic_blocks); 6698 dump_sbitmap_vector (gcse_file, "st_avloc", "", ae_gen, n_basic_blocks); 6699 } 6700} 6701 6702/* Insert an instruction at the begining of a basic block, and update 6703 the BLOCK_HEAD if needed. */ 6704 6705static void 6706insert_insn_start_bb (insn, bb) 6707 rtx insn; 6708 basic_block bb; 6709{ 6710 /* Insert at start of successor block. */ 6711 rtx prev = PREV_INSN (bb->head); 6712 rtx before = bb->head; 6713 while (before != 0) 6714 { 6715 if (GET_CODE (before) != CODE_LABEL 6716 && (GET_CODE (before) != NOTE 6717 || NOTE_LINE_NUMBER (before) != NOTE_INSN_BASIC_BLOCK)) 6718 break; 6719 prev = before; 6720 if (prev == bb->end) 6721 break; 6722 before = NEXT_INSN (before); 6723 } 6724 6725 insn = emit_insn_after (insn, prev); 6726 6727 if (gcse_file) 6728 { 6729 fprintf (gcse_file, "STORE_MOTION insert store at start of BB %d:\n", 6730 bb->index); 6731 print_inline_rtx (gcse_file, insn, 6); 6732 fprintf (gcse_file, "\n"); 6733 } 6734} 6735 6736/* This routine will insert a store on an edge. EXPR is the ldst entry for 6737 the memory reference, and E is the edge to insert it on. Returns non-zero 6738 if an edge insertion was performed. */ 6739 6740static int 6741insert_store (expr, e) 6742 struct ls_expr * expr; 6743 edge e; 6744{ 6745 rtx reg, insn; 6746 basic_block bb; 6747 edge tmp; 6748 6749 /* We did all the deleted before this insert, so if we didn't delete a 6750 store, then we haven't set the reaching reg yet either. */ 6751 if (expr->reaching_reg == NULL_RTX) 6752 return 0; 6753 6754 reg = expr->reaching_reg; 6755 insn = gen_move_insn (expr->pattern, reg); 6756 6757 /* If we are inserting this expression on ALL predecessor edges of a BB, 6758 insert it at the start of the BB, and reset the insert bits on the other 6759 edges so we don't try to insert it on the other edges. */ 6760 bb = e->dest; 6761 for (tmp = e->dest->pred; tmp ; tmp = tmp->pred_next) 6762 { 6763 int index = EDGE_INDEX (edge_list, tmp->src, tmp->dest); 6764 if (index == EDGE_INDEX_NO_EDGE) 6765 abort (); 6766 if (! TEST_BIT (pre_insert_map[index], expr->index)) 6767 break; 6768 } 6769 6770 /* If tmp is NULL, we found an insertion on every edge, blank the 6771 insertion vector for these edges, and insert at the start of the BB. */ 6772 if (!tmp && bb != EXIT_BLOCK_PTR) 6773 { 6774 for (tmp = e->dest->pred; tmp ; tmp = tmp->pred_next) 6775 { 6776 int index = EDGE_INDEX (edge_list, tmp->src, tmp->dest); 6777 RESET_BIT (pre_insert_map[index], expr->index); 6778 } 6779 insert_insn_start_bb (insn, bb); 6780 return 0; 6781 } 6782 6783 /* We can't insert on this edge, so we'll insert at the head of the 6784 successors block. See Morgan, sec 10.5. */ 6785 if ((e->flags & EDGE_ABNORMAL) == EDGE_ABNORMAL) 6786 { 6787 insert_insn_start_bb (insn, bb); 6788 return 0; 6789 } 6790 6791 insert_insn_on_edge (insn, e); 6792 6793 if (gcse_file) 6794 { 6795 fprintf (gcse_file, "STORE_MOTION insert insn on edge (%d, %d):\n", 6796 e->src->index, e->dest->index); 6797 print_inline_rtx (gcse_file, insn, 6); 6798 fprintf (gcse_file, "\n"); 6799 } 6800 6801 return 1; 6802} 6803 6804/* This routine will replace a store with a SET to a specified register. */ 6805 6806static void 6807replace_store_insn (reg, del, bb) 6808 rtx reg, del; 6809 basic_block bb; 6810{ 6811 rtx insn; 6812 6813 insn = gen_move_insn (reg, SET_SRC (PATTERN (del))); 6814 insn = emit_insn_after (insn, del); 6815 6816 if (gcse_file) 6817 { 6818 fprintf (gcse_file, 6819 "STORE_MOTION delete insn in BB %d:\n ", bb->index); 6820 print_inline_rtx (gcse_file, del, 6); 6821 fprintf (gcse_file, "\nSTORE MOTION replaced with insn:\n "); 6822 print_inline_rtx (gcse_file, insn, 6); 6823 fprintf (gcse_file, "\n"); 6824 } 6825 6826 delete_insn (del); 6827} 6828 6829 6830/* Delete a store, but copy the value that would have been stored into 6831 the reaching_reg for later storing. */ 6832 6833static void 6834delete_store (expr, bb) 6835 struct ls_expr * expr; 6836 basic_block bb; 6837{ 6838 rtx reg, i, del; 6839 6840 if (expr->reaching_reg == NULL_RTX) 6841 expr->reaching_reg = gen_reg_rtx (GET_MODE (expr->pattern)); 6842 6843 6844 /* If there is more than 1 store, the earlier ones will be dead, 6845 but it doesn't hurt to replace them here. */ 6846 reg = expr->reaching_reg; 6847 6848 for (i = AVAIL_STORE_LIST (expr); i; i = XEXP (i, 1)) 6849 { 6850 del = XEXP (i, 0); 6851 if (BLOCK_FOR_INSN (del) == bb) 6852 { 6853 /* We know there is only one since we deleted redundant 6854 ones during the available computation. */ 6855 replace_store_insn (reg, del, bb); 6856 break; 6857 } 6858 } 6859} 6860 6861/* Free memory used by store motion. */ 6862 6863static void 6864free_store_memory () 6865{ 6866 free_ldst_mems (); 6867 6868 if (ae_gen) 6869 sbitmap_vector_free (ae_gen); 6870 if (ae_kill) 6871 sbitmap_vector_free (ae_kill); 6872 if (transp) 6873 sbitmap_vector_free (transp); 6874 if (st_antloc) 6875 sbitmap_vector_free (st_antloc); 6876 if (pre_insert_map) 6877 sbitmap_vector_free (pre_insert_map); 6878 if (pre_delete_map) 6879 sbitmap_vector_free (pre_delete_map); 6880 if (reg_set_in_block) 6881 sbitmap_vector_free (reg_set_in_block); 6882 6883 ae_gen = ae_kill = transp = st_antloc = NULL; 6884 pre_insert_map = pre_delete_map = reg_set_in_block = NULL; 6885} 6886 6887/* Perform store motion. Much like gcse, except we move expressions the 6888 other way by looking at the flowgraph in reverse. */ 6889 6890static void 6891store_motion () 6892{ 6893 int x; 6894 struct ls_expr * ptr; 6895 int update_flow = 0; 6896 6897 if (gcse_file) 6898 { 6899 fprintf (gcse_file, "before store motion\n"); 6900 print_rtl (gcse_file, get_insns ()); 6901 } 6902 6903 6904 init_alias_analysis (); 6905 6906 /* Find all the stores that are live to the end of their block. */ 6907 num_stores = compute_store_table (); 6908 if (num_stores == 0) 6909 { 6910 sbitmap_vector_free (reg_set_in_block); 6911 end_alias_analysis (); 6912 return; 6913 } 6914 6915 /* Now compute whats actually available to move. */ 6916 add_noreturn_fake_exit_edges (); 6917 build_store_vectors (); 6918 6919 edge_list = pre_edge_rev_lcm (gcse_file, num_stores, transp, ae_gen, 6920 st_antloc, ae_kill, &pre_insert_map, 6921 &pre_delete_map); 6922 6923 /* Now we want to insert the new stores which are going to be needed. */ 6924 for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr)) 6925 { 6926 for (x = 0; x < n_basic_blocks; x++) 6927 if (TEST_BIT (pre_delete_map[x], ptr->index)) 6928 delete_store (ptr, BASIC_BLOCK (x)); 6929 6930 for (x = 0; x < NUM_EDGES (edge_list); x++) 6931 if (TEST_BIT (pre_insert_map[x], ptr->index)) 6932 update_flow |= insert_store (ptr, INDEX_EDGE (edge_list, x)); 6933 } 6934 6935 if (update_flow) 6936 commit_edge_insertions (); 6937 6938 free_store_memory (); 6939 free_edge_list (edge_list); 6940 remove_fake_edges (); 6941 end_alias_analysis (); 6942} 6943