1132718Skan/* Loop unrolling and peeling. 2169689Skan Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. 3132718Skan 4132718SkanThis file is part of GCC. 5132718Skan 6132718SkanGCC is free software; you can redistribute it and/or modify it under 7132718Skanthe terms of the GNU General Public License as published by the Free 8132718SkanSoftware Foundation; either version 2, or (at your option) any later 9132718Skanversion. 10132718Skan 11132718SkanGCC is distributed in the hope that it will be useful, but WITHOUT ANY 12132718SkanWARRANTY; without even the implied warranty of MERCHANTABILITY or 13132718SkanFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14132718Skanfor more details. 15132718Skan 16132718SkanYou should have received a copy of the GNU General Public License 17132718Skanalong with GCC; see the file COPYING. If not, write to the Free 18169689SkanSoftware Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 19169689Skan02110-1301, USA. */ 20132718Skan 21132718Skan#include "config.h" 22132718Skan#include "system.h" 23132718Skan#include "coretypes.h" 24132718Skan#include "tm.h" 25132718Skan#include "rtl.h" 26132718Skan#include "hard-reg-set.h" 27169689Skan#include "obstack.h" 28132718Skan#include "basic-block.h" 29132718Skan#include "cfgloop.h" 30132718Skan#include "cfglayout.h" 31132718Skan#include "params.h" 32132718Skan#include "output.h" 33132718Skan#include "expr.h" 34169689Skan#include "hashtab.h" 35169689Skan#include "recog.h" 36132718Skan 37132718Skan/* This pass performs loop unrolling and peeling. We only perform these 38132718Skan optimizations on innermost loops (with single exception) because 39132718Skan the impact on performance is greatest here, and we want to avoid 40132718Skan unnecessary code size growth. The gain is caused by greater sequentiality 41132718Skan of code, better code to optimize for further passes and in some cases 42132718Skan by fewer testings of exit conditions. The main problem is code growth, 43132718Skan that impacts performance negatively due to effect of caches. 44132718Skan 45132718Skan What we do: 46132718Skan 47132718Skan -- complete peeling of once-rolling loops; this is the above mentioned 48132718Skan exception, as this causes loop to be cancelled completely and 49132718Skan does not cause code growth 50132718Skan -- complete peeling of loops that roll (small) constant times. 51132718Skan -- simple peeling of first iterations of loops that do not roll much 52132718Skan (according to profile feedback) 53132718Skan -- unrolling of loops that roll constant times; this is almost always 54132718Skan win, as we get rid of exit condition tests. 55132718Skan -- unrolling of loops that roll number of times that we can compute 56132718Skan in runtime; we also get rid of exit condition tests here, but there 57132718Skan is the extra expense for calculating the number of iterations 58132718Skan -- simple unrolling of remaining loops; this is performed only if we 59132718Skan are asked to, as the gain is questionable in this case and often 60132718Skan it may even slow down the code 61132718Skan For more detailed descriptions of each of those, see comments at 62132718Skan appropriate function below. 63132718Skan 64132718Skan There is a lot of parameters (defined and described in params.def) that 65132718Skan control how much we unroll/peel. 66132718Skan 67132718Skan ??? A great problem is that we don't have a good way how to determine 68132718Skan how many times we should unroll the loop; the experiments I have made 69132718Skan showed that this choice may affect performance in order of several %. 70132718Skan */ 71132718Skan 72169689Skan/* Information about induction variables to split. */ 73169689Skan 74169689Skanstruct iv_to_split 75169689Skan{ 76169689Skan rtx insn; /* The insn in that the induction variable occurs. */ 77169689Skan rtx base_var; /* The variable on that the values in the further 78169689Skan iterations are based. */ 79169689Skan rtx step; /* Step of the induction variable. */ 80169689Skan unsigned n_loc; 81169689Skan unsigned loc[3]; /* Location where the definition of the induction 82169689Skan variable occurs in the insn. For example if 83169689Skan N_LOC is 2, the expression is located at 84169689Skan XEXP (XEXP (single_set, loc[0]), loc[1]). */ 85169689Skan}; 86169689Skan 87169689Skan/* Information about accumulators to expand. */ 88169689Skan 89169689Skanstruct var_to_expand 90169689Skan{ 91169689Skan rtx insn; /* The insn in that the variable expansion occurs. */ 92169689Skan rtx reg; /* The accumulator which is expanded. */ 93169689Skan VEC(rtx,heap) *var_expansions; /* The copies of the accumulator which is expanded. */ 94169689Skan enum rtx_code op; /* The type of the accumulation - addition, subtraction 95169689Skan or multiplication. */ 96169689Skan int expansion_count; /* Count the number of expansions generated so far. */ 97169689Skan int reuse_expansion; /* The expansion we intend to reuse to expand 98169689Skan the accumulator. If REUSE_EXPANSION is 0 reuse 99169689Skan the original accumulator. Else use 100169689Skan var_expansions[REUSE_EXPANSION - 1]. */ 101169689Skan}; 102169689Skan 103169689Skan/* Information about optimization applied in 104169689Skan the unrolled loop. */ 105169689Skan 106169689Skanstruct opt_info 107169689Skan{ 108169689Skan htab_t insns_to_split; /* A hashtable of insns to split. */ 109169689Skan htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators 110169689Skan to expand. */ 111169689Skan unsigned first_new_block; /* The first basic block that was 112169689Skan duplicated. */ 113169689Skan basic_block loop_exit; /* The loop exit basic block. */ 114169689Skan basic_block loop_preheader; /* The loop preheader basic block. */ 115169689Skan}; 116169689Skan 117132718Skanstatic void decide_unrolling_and_peeling (struct loops *, int); 118132718Skanstatic void peel_loops_completely (struct loops *, int); 119132718Skanstatic void decide_peel_simple (struct loop *, int); 120132718Skanstatic void decide_peel_once_rolling (struct loop *, int); 121132718Skanstatic void decide_peel_completely (struct loop *, int); 122132718Skanstatic void decide_unroll_stupid (struct loop *, int); 123132718Skanstatic void decide_unroll_constant_iterations (struct loop *, int); 124132718Skanstatic void decide_unroll_runtime_iterations (struct loop *, int); 125132718Skanstatic void peel_loop_simple (struct loops *, struct loop *); 126132718Skanstatic void peel_loop_completely (struct loops *, struct loop *); 127132718Skanstatic void unroll_loop_stupid (struct loops *, struct loop *); 128132718Skanstatic void unroll_loop_constant_iterations (struct loops *, struct loop *); 129132718Skanstatic void unroll_loop_runtime_iterations (struct loops *, struct loop *); 130169689Skanstatic struct opt_info *analyze_insns_in_loop (struct loop *); 131169689Skanstatic void opt_info_start_duplication (struct opt_info *); 132169689Skanstatic void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool); 133169689Skanstatic void free_opt_info (struct opt_info *); 134169689Skanstatic struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx); 135169689Skanstatic bool referenced_in_one_insn_in_loop_p (struct loop *, rtx); 136169689Skanstatic struct iv_to_split *analyze_iv_to_split_insn (rtx); 137169689Skanstatic void expand_var_during_unrolling (struct var_to_expand *, rtx); 138169689Skanstatic int insert_var_expansion_initialization (void **, void *); 139169689Skanstatic int combine_var_copies_in_loop_exit (void **, void *); 140169689Skanstatic int release_var_copies (void **, void *); 141169689Skanstatic rtx get_expansion (struct var_to_expand *); 142132718Skan 143132718Skan/* Unroll and/or peel (depending on FLAGS) LOOPS. */ 144132718Skanvoid 145132718Skanunroll_and_peel_loops (struct loops *loops, int flags) 146132718Skan{ 147132718Skan struct loop *loop, *next; 148169689Skan bool check; 149132718Skan 150132718Skan /* First perform complete loop peeling (it is almost surely a win, 151132718Skan and affects parameters for further decision a lot). */ 152132718Skan peel_loops_completely (loops, flags); 153132718Skan 154132718Skan /* Now decide rest of unrolling and peeling. */ 155132718Skan decide_unrolling_and_peeling (loops, flags); 156132718Skan 157132718Skan loop = loops->tree_root; 158132718Skan while (loop->inner) 159132718Skan loop = loop->inner; 160132718Skan 161132718Skan /* Scan the loops, inner ones first. */ 162132718Skan while (loop != loops->tree_root) 163132718Skan { 164132718Skan if (loop->next) 165132718Skan { 166132718Skan next = loop->next; 167132718Skan while (next->inner) 168132718Skan next = next->inner; 169132718Skan } 170132718Skan else 171132718Skan next = loop->outer; 172132718Skan 173169689Skan check = true; 174132718Skan /* And perform the appropriate transformations. */ 175132718Skan switch (loop->lpt_decision.decision) 176132718Skan { 177132718Skan case LPT_PEEL_COMPLETELY: 178132718Skan /* Already done. */ 179169689Skan gcc_unreachable (); 180132718Skan case LPT_PEEL_SIMPLE: 181132718Skan peel_loop_simple (loops, loop); 182132718Skan break; 183132718Skan case LPT_UNROLL_CONSTANT: 184132718Skan unroll_loop_constant_iterations (loops, loop); 185132718Skan break; 186132718Skan case LPT_UNROLL_RUNTIME: 187132718Skan unroll_loop_runtime_iterations (loops, loop); 188132718Skan break; 189132718Skan case LPT_UNROLL_STUPID: 190132718Skan unroll_loop_stupid (loops, loop); 191132718Skan break; 192132718Skan case LPT_NONE: 193169689Skan check = false; 194132718Skan break; 195132718Skan default: 196169689Skan gcc_unreachable (); 197132718Skan } 198132718Skan if (check) 199132718Skan { 200132718Skan#ifdef ENABLE_CHECKING 201132718Skan verify_dominators (CDI_DOMINATORS); 202132718Skan verify_loop_structure (loops); 203132718Skan#endif 204132718Skan } 205132718Skan loop = next; 206132718Skan } 207169689Skan 208169689Skan iv_analysis_done (); 209132718Skan} 210132718Skan 211169689Skan/* Check whether exit of the LOOP is at the end of loop body. */ 212169689Skan 213169689Skanstatic bool 214169689Skanloop_exit_at_end_p (struct loop *loop) 215169689Skan{ 216169689Skan struct niter_desc *desc = get_simple_loop_desc (loop); 217169689Skan rtx insn; 218169689Skan 219169689Skan if (desc->in_edge->dest != loop->latch) 220169689Skan return false; 221169689Skan 222169689Skan /* Check that the latch is empty. */ 223169689Skan FOR_BB_INSNS (loop->latch, insn) 224169689Skan { 225169689Skan if (INSN_P (insn)) 226169689Skan return false; 227169689Skan } 228169689Skan 229169689Skan return true; 230169689Skan} 231169689Skan 232132718Skan/* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */ 233132718Skanstatic void 234132718Skanpeel_loops_completely (struct loops *loops, int flags) 235132718Skan{ 236169689Skan struct loop *loop; 237169689Skan unsigned i; 238132718Skan 239169689Skan /* Scan the loops, the inner ones first. */ 240169689Skan for (i = loops->num - 1; i > 0; i--) 241132718Skan { 242169689Skan loop = loops->parray[i]; 243169689Skan if (!loop) 244169689Skan continue; 245132718Skan 246132718Skan loop->lpt_decision.decision = LPT_NONE; 247132718Skan 248169689Skan if (dump_file) 249169689Skan fprintf (dump_file, 250169689Skan "\n;; *** Considering loop %d for complete peeling ***\n", 251132718Skan loop->num); 252132718Skan 253132718Skan loop->ninsns = num_loop_insns (loop); 254132718Skan 255132718Skan decide_peel_once_rolling (loop, flags); 256132718Skan if (loop->lpt_decision.decision == LPT_NONE) 257132718Skan decide_peel_completely (loop, flags); 258132718Skan 259132718Skan if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY) 260132718Skan { 261132718Skan peel_loop_completely (loops, loop); 262132718Skan#ifdef ENABLE_CHECKING 263132718Skan verify_dominators (CDI_DOMINATORS); 264132718Skan verify_loop_structure (loops); 265132718Skan#endif 266132718Skan } 267132718Skan } 268132718Skan} 269132718Skan 270132718Skan/* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much. */ 271132718Skanstatic void 272132718Skandecide_unrolling_and_peeling (struct loops *loops, int flags) 273132718Skan{ 274132718Skan struct loop *loop = loops->tree_root, *next; 275132718Skan 276132718Skan while (loop->inner) 277132718Skan loop = loop->inner; 278132718Skan 279132718Skan /* Scan the loops, inner ones first. */ 280132718Skan while (loop != loops->tree_root) 281132718Skan { 282132718Skan if (loop->next) 283132718Skan { 284132718Skan next = loop->next; 285132718Skan while (next->inner) 286132718Skan next = next->inner; 287132718Skan } 288132718Skan else 289132718Skan next = loop->outer; 290132718Skan 291132718Skan loop->lpt_decision.decision = LPT_NONE; 292132718Skan 293169689Skan if (dump_file) 294169689Skan fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num); 295132718Skan 296132718Skan /* Do not peel cold areas. */ 297132718Skan if (!maybe_hot_bb_p (loop->header)) 298132718Skan { 299169689Skan if (dump_file) 300169689Skan fprintf (dump_file, ";; Not considering loop, cold area\n"); 301132718Skan loop = next; 302132718Skan continue; 303132718Skan } 304132718Skan 305132718Skan /* Can the loop be manipulated? */ 306132718Skan if (!can_duplicate_loop_p (loop)) 307132718Skan { 308169689Skan if (dump_file) 309169689Skan fprintf (dump_file, 310132718Skan ";; Not considering loop, cannot duplicate\n"); 311132718Skan loop = next; 312132718Skan continue; 313132718Skan } 314132718Skan 315132718Skan /* Skip non-innermost loops. */ 316132718Skan if (loop->inner) 317132718Skan { 318169689Skan if (dump_file) 319169689Skan fprintf (dump_file, ";; Not considering loop, is not innermost\n"); 320132718Skan loop = next; 321132718Skan continue; 322132718Skan } 323132718Skan 324132718Skan loop->ninsns = num_loop_insns (loop); 325132718Skan loop->av_ninsns = average_num_loop_insns (loop); 326132718Skan 327132718Skan /* Try transformations one by one in decreasing order of 328132718Skan priority. */ 329132718Skan 330132718Skan decide_unroll_constant_iterations (loop, flags); 331132718Skan if (loop->lpt_decision.decision == LPT_NONE) 332132718Skan decide_unroll_runtime_iterations (loop, flags); 333132718Skan if (loop->lpt_decision.decision == LPT_NONE) 334132718Skan decide_unroll_stupid (loop, flags); 335132718Skan if (loop->lpt_decision.decision == LPT_NONE) 336132718Skan decide_peel_simple (loop, flags); 337132718Skan 338132718Skan loop = next; 339132718Skan } 340132718Skan} 341132718Skan 342132718Skan/* Decide whether the LOOP is once rolling and suitable for complete 343132718Skan peeling. */ 344132718Skanstatic void 345132718Skandecide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED) 346132718Skan{ 347169689Skan struct niter_desc *desc; 348132718Skan 349169689Skan if (dump_file) 350169689Skan fprintf (dump_file, "\n;; Considering peeling once rolling loop\n"); 351169689Skan 352132718Skan /* Is the loop small enough? */ 353132718Skan if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns) 354132718Skan { 355169689Skan if (dump_file) 356169689Skan fprintf (dump_file, ";; Not considering loop, is too big\n"); 357132718Skan return; 358132718Skan } 359132718Skan 360132718Skan /* Check for simple loops. */ 361169689Skan desc = get_simple_loop_desc (loop); 362132718Skan 363132718Skan /* Check number of iterations. */ 364169689Skan if (!desc->simple_p 365169689Skan || desc->assumptions 366169689Skan || desc->infinite 367169689Skan || !desc->const_iter 368169689Skan || desc->niter != 0) 369132718Skan { 370169689Skan if (dump_file) 371169689Skan fprintf (dump_file, 372169689Skan ";; Unable to prove that the loop rolls exactly once\n"); 373132718Skan return; 374132718Skan } 375132718Skan 376132718Skan /* Success. */ 377169689Skan if (dump_file) 378169689Skan fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n"); 379132718Skan loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; 380132718Skan} 381132718Skan 382132718Skan/* Decide whether the LOOP is suitable for complete peeling. */ 383132718Skanstatic void 384132718Skandecide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED) 385132718Skan{ 386132718Skan unsigned npeel; 387169689Skan struct niter_desc *desc; 388132718Skan 389169689Skan if (dump_file) 390169689Skan fprintf (dump_file, "\n;; Considering peeling completely\n"); 391132718Skan 392132718Skan /* Skip non-innermost loops. */ 393132718Skan if (loop->inner) 394132718Skan { 395169689Skan if (dump_file) 396169689Skan fprintf (dump_file, ";; Not considering loop, is not innermost\n"); 397132718Skan return; 398132718Skan } 399132718Skan 400132718Skan /* Do not peel cold areas. */ 401132718Skan if (!maybe_hot_bb_p (loop->header)) 402132718Skan { 403169689Skan if (dump_file) 404169689Skan fprintf (dump_file, ";; Not considering loop, cold area\n"); 405132718Skan return; 406132718Skan } 407132718Skan 408132718Skan /* Can the loop be manipulated? */ 409132718Skan if (!can_duplicate_loop_p (loop)) 410132718Skan { 411169689Skan if (dump_file) 412169689Skan fprintf (dump_file, 413132718Skan ";; Not considering loop, cannot duplicate\n"); 414132718Skan return; 415132718Skan } 416132718Skan 417132718Skan /* npeel = number of iterations to peel. */ 418132718Skan npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns; 419132718Skan if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) 420132718Skan npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); 421132718Skan 422132718Skan /* Is the loop small enough? */ 423132718Skan if (!npeel) 424132718Skan { 425169689Skan if (dump_file) 426169689Skan fprintf (dump_file, ";; Not considering loop, is too big\n"); 427132718Skan return; 428132718Skan } 429132718Skan 430132718Skan /* Check for simple loops. */ 431169689Skan desc = get_simple_loop_desc (loop); 432132718Skan 433132718Skan /* Check number of iterations. */ 434169689Skan if (!desc->simple_p 435169689Skan || desc->assumptions 436169689Skan || !desc->const_iter 437169689Skan || desc->infinite) 438132718Skan { 439169689Skan if (dump_file) 440169689Skan fprintf (dump_file, 441169689Skan ";; Unable to prove that the loop iterates constant times\n"); 442132718Skan return; 443132718Skan } 444132718Skan 445169689Skan if (desc->niter > npeel - 1) 446132718Skan { 447169689Skan if (dump_file) 448132718Skan { 449169689Skan fprintf (dump_file, 450169689Skan ";; Not peeling loop completely, rolls too much ("); 451169689Skan fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); 452169689Skan fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel); 453132718Skan } 454132718Skan return; 455132718Skan } 456132718Skan 457132718Skan /* Success. */ 458169689Skan if (dump_file) 459169689Skan fprintf (dump_file, ";; Decided to peel loop completely\n"); 460132718Skan loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; 461132718Skan} 462132718Skan 463132718Skan/* Peel all iterations of LOOP, remove exit edges and cancel the loop 464132718Skan completely. The transformation done: 465132718Skan 466132718Skan for (i = 0; i < 4; i++) 467132718Skan body; 468132718Skan 469132718Skan ==> 470132718Skan 471132718Skan i = 0; 472132718Skan body; i++; 473132718Skan body; i++; 474132718Skan body; i++; 475132718Skan body; i++; 476132718Skan */ 477132718Skanstatic void 478132718Skanpeel_loop_completely (struct loops *loops, struct loop *loop) 479132718Skan{ 480132718Skan sbitmap wont_exit; 481132718Skan unsigned HOST_WIDE_INT npeel; 482132718Skan unsigned n_remove_edges, i; 483169689Skan edge *remove_edges, ein; 484169689Skan struct niter_desc *desc = get_simple_loop_desc (loop); 485169689Skan struct opt_info *opt_info = NULL; 486169689Skan 487132718Skan npeel = desc->niter; 488132718Skan 489132718Skan if (npeel) 490132718Skan { 491169689Skan bool ok; 492169689Skan 493132718Skan wont_exit = sbitmap_alloc (npeel + 1); 494132718Skan sbitmap_ones (wont_exit); 495132718Skan RESET_BIT (wont_exit, 0); 496169689Skan if (desc->noloop_assumptions) 497132718Skan RESET_BIT (wont_exit, 1); 498132718Skan 499169689Skan remove_edges = XCNEWVEC (edge, npeel); 500132718Skan n_remove_edges = 0; 501132718Skan 502169689Skan if (flag_split_ivs_in_unroller) 503169689Skan opt_info = analyze_insns_in_loop (loop); 504169689Skan 505169689Skan opt_info_start_duplication (opt_info); 506169689Skan ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 507169689Skan loops, npeel, 508169689Skan wont_exit, desc->out_edge, 509169689Skan remove_edges, &n_remove_edges, 510169689Skan DLTHE_FLAG_UPDATE_FREQ 511169689Skan | DLTHE_FLAG_COMPLETTE_PEEL 512169689Skan | (opt_info 513169689Skan ? DLTHE_RECORD_COPY_NUMBER : 0)); 514169689Skan gcc_assert (ok); 515132718Skan 516132718Skan free (wont_exit); 517169689Skan 518169689Skan if (opt_info) 519169689Skan { 520169689Skan apply_opt_in_copies (opt_info, npeel, false, true); 521169689Skan free_opt_info (opt_info); 522169689Skan } 523132718Skan 524132718Skan /* Remove the exit edges. */ 525132718Skan for (i = 0; i < n_remove_edges; i++) 526132718Skan remove_path (loops, remove_edges[i]); 527132718Skan free (remove_edges); 528132718Skan } 529132718Skan 530169689Skan ein = desc->in_edge; 531169689Skan free_simple_loop_desc (loop); 532132718Skan 533132718Skan /* Now remove the unreachable part of the last iteration and cancel 534132718Skan the loop. */ 535169689Skan remove_path (loops, ein); 536132718Skan 537169689Skan if (dump_file) 538169689Skan fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel); 539132718Skan} 540132718Skan 541169689Skan/* Decide whether to unroll LOOP iterating constant number of times 542169689Skan and how much. */ 543169689Skan 544132718Skanstatic void 545132718Skandecide_unroll_constant_iterations (struct loop *loop, int flags) 546132718Skan{ 547169689Skan unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i; 548169689Skan struct niter_desc *desc; 549132718Skan 550132718Skan if (!(flags & UAP_UNROLL)) 551132718Skan { 552132718Skan /* We were not asked to, just return back silently. */ 553132718Skan return; 554132718Skan } 555132718Skan 556169689Skan if (dump_file) 557169689Skan fprintf (dump_file, 558169689Skan "\n;; Considering unrolling loop with constant " 559169689Skan "number of iterations\n"); 560132718Skan 561132718Skan /* nunroll = total number of copies of the original loop body in 562132718Skan unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ 563132718Skan nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; 564169689Skan nunroll_by_av 565169689Skan = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; 566132718Skan if (nunroll > nunroll_by_av) 567132718Skan nunroll = nunroll_by_av; 568132718Skan if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) 569132718Skan nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); 570132718Skan 571132718Skan /* Skip big loops. */ 572132718Skan if (nunroll <= 1) 573132718Skan { 574169689Skan if (dump_file) 575169689Skan fprintf (dump_file, ";; Not considering loop, is too big\n"); 576132718Skan return; 577132718Skan } 578132718Skan 579132718Skan /* Check for simple loops. */ 580169689Skan desc = get_simple_loop_desc (loop); 581132718Skan 582132718Skan /* Check number of iterations. */ 583169689Skan if (!desc->simple_p || !desc->const_iter || desc->assumptions) 584132718Skan { 585169689Skan if (dump_file) 586169689Skan fprintf (dump_file, 587169689Skan ";; Unable to prove that the loop iterates constant times\n"); 588132718Skan return; 589132718Skan } 590132718Skan 591132718Skan /* Check whether the loop rolls enough to consider. */ 592169689Skan if (desc->niter < 2 * nunroll) 593132718Skan { 594169689Skan if (dump_file) 595169689Skan fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); 596132718Skan return; 597132718Skan } 598132718Skan 599132718Skan /* Success; now compute number of iterations to unroll. We alter 600132718Skan nunroll so that as few as possible copies of loop body are 601132718Skan necessary, while still not decreasing the number of unrollings 602132718Skan too much (at most by 1). */ 603132718Skan best_copies = 2 * nunroll + 10; 604132718Skan 605132718Skan i = 2 * nunroll + 2; 606169689Skan if (i - 1 >= desc->niter) 607169689Skan i = desc->niter - 2; 608132718Skan 609132718Skan for (; i >= nunroll - 1; i--) 610132718Skan { 611169689Skan unsigned exit_mod = desc->niter % (i + 1); 612132718Skan 613169689Skan if (!loop_exit_at_end_p (loop)) 614132718Skan n_copies = exit_mod + i + 1; 615169689Skan else if (exit_mod != (unsigned) i 616169689Skan || desc->noloop_assumptions != NULL_RTX) 617132718Skan n_copies = exit_mod + i + 2; 618132718Skan else 619132718Skan n_copies = i + 1; 620132718Skan 621132718Skan if (n_copies < best_copies) 622132718Skan { 623132718Skan best_copies = n_copies; 624132718Skan best_unroll = i; 625132718Skan } 626132718Skan } 627132718Skan 628169689Skan if (dump_file) 629169689Skan fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n", 630132718Skan best_unroll + 1, best_copies, nunroll); 631132718Skan 632132718Skan loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; 633132718Skan loop->lpt_decision.times = best_unroll; 634169689Skan 635169689Skan if (dump_file) 636169689Skan fprintf (dump_file, 637169689Skan ";; Decided to unroll the constant times rolling loop, %d times.\n", 638169689Skan loop->lpt_decision.times); 639132718Skan} 640132718Skan 641132718Skan/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1 642132718Skan times. The transformation does this: 643132718Skan 644132718Skan for (i = 0; i < 102; i++) 645132718Skan body; 646132718Skan 647132718Skan ==> 648132718Skan 649132718Skan i = 0; 650132718Skan body; i++; 651132718Skan body; i++; 652132718Skan while (i < 102) 653132718Skan { 654132718Skan body; i++; 655132718Skan body; i++; 656132718Skan body; i++; 657132718Skan body; i++; 658132718Skan } 659132718Skan */ 660132718Skanstatic void 661132718Skanunroll_loop_constant_iterations (struct loops *loops, struct loop *loop) 662132718Skan{ 663132718Skan unsigned HOST_WIDE_INT niter; 664132718Skan unsigned exit_mod; 665132718Skan sbitmap wont_exit; 666132718Skan unsigned n_remove_edges, i; 667132718Skan edge *remove_edges; 668132718Skan unsigned max_unroll = loop->lpt_decision.times; 669169689Skan struct niter_desc *desc = get_simple_loop_desc (loop); 670169689Skan bool exit_at_end = loop_exit_at_end_p (loop); 671169689Skan struct opt_info *opt_info = NULL; 672169689Skan bool ok; 673169689Skan 674132718Skan niter = desc->niter; 675132718Skan 676169689Skan /* Should not get here (such loop should be peeled instead). */ 677169689Skan gcc_assert (niter > max_unroll + 1); 678132718Skan 679132718Skan exit_mod = niter % (max_unroll + 1); 680132718Skan 681132718Skan wont_exit = sbitmap_alloc (max_unroll + 1); 682132718Skan sbitmap_ones (wont_exit); 683132718Skan 684169689Skan remove_edges = XCNEWVEC (edge, max_unroll + exit_mod + 1); 685132718Skan n_remove_edges = 0; 686169689Skan if (flag_split_ivs_in_unroller 687169689Skan || flag_variable_expansion_in_unroller) 688169689Skan opt_info = analyze_insns_in_loop (loop); 689169689Skan 690169689Skan if (!exit_at_end) 691132718Skan { 692169689Skan /* The exit is not at the end of the loop; leave exit test 693132718Skan in the first copy, so that the loops that start with test 694132718Skan of exit condition have continuous body after unrolling. */ 695132718Skan 696169689Skan if (dump_file) 697169689Skan fprintf (dump_file, ";; Condition on beginning of loop.\n"); 698132718Skan 699132718Skan /* Peel exit_mod iterations. */ 700132718Skan RESET_BIT (wont_exit, 0); 701169689Skan if (desc->noloop_assumptions) 702132718Skan RESET_BIT (wont_exit, 1); 703132718Skan 704169689Skan if (exit_mod) 705169689Skan { 706169689Skan opt_info_start_duplication (opt_info); 707169689Skan ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 708169689Skan loops, exit_mod, 709169689Skan wont_exit, desc->out_edge, 710169689Skan remove_edges, &n_remove_edges, 711169689Skan DLTHE_FLAG_UPDATE_FREQ 712169689Skan | (opt_info && exit_mod > 1 713169689Skan ? DLTHE_RECORD_COPY_NUMBER 714169689Skan : 0)); 715169689Skan gcc_assert (ok); 716132718Skan 717169689Skan if (opt_info && exit_mod > 1) 718169689Skan apply_opt_in_copies (opt_info, exit_mod, false, false); 719169689Skan 720169689Skan desc->noloop_assumptions = NULL_RTX; 721169689Skan desc->niter -= exit_mod; 722169689Skan desc->niter_max -= exit_mod; 723169689Skan } 724169689Skan 725132718Skan SET_BIT (wont_exit, 1); 726132718Skan } 727132718Skan else 728132718Skan { 729132718Skan /* Leave exit test in last copy, for the same reason as above if 730132718Skan the loop tests the condition at the end of loop body. */ 731132718Skan 732169689Skan if (dump_file) 733169689Skan fprintf (dump_file, ";; Condition on end of loop.\n"); 734132718Skan 735132718Skan /* We know that niter >= max_unroll + 2; so we do not need to care of 736132718Skan case when we would exit before reaching the loop. So just peel 737169689Skan exit_mod + 1 iterations. */ 738169689Skan if (exit_mod != max_unroll 739169689Skan || desc->noloop_assumptions) 740132718Skan { 741132718Skan RESET_BIT (wont_exit, 0); 742169689Skan if (desc->noloop_assumptions) 743132718Skan RESET_BIT (wont_exit, 1); 744169689Skan 745169689Skan opt_info_start_duplication (opt_info); 746169689Skan ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 747169689Skan loops, exit_mod + 1, 748169689Skan wont_exit, desc->out_edge, 749169689Skan remove_edges, &n_remove_edges, 750169689Skan DLTHE_FLAG_UPDATE_FREQ 751169689Skan | (opt_info && exit_mod > 0 752169689Skan ? DLTHE_RECORD_COPY_NUMBER 753169689Skan : 0)); 754169689Skan gcc_assert (ok); 755169689Skan 756169689Skan if (opt_info && exit_mod > 0) 757169689Skan apply_opt_in_copies (opt_info, exit_mod + 1, false, false); 758132718Skan 759169689Skan desc->niter -= exit_mod + 1; 760169689Skan desc->niter_max -= exit_mod + 1; 761169689Skan desc->noloop_assumptions = NULL_RTX; 762132718Skan 763132718Skan SET_BIT (wont_exit, 0); 764132718Skan SET_BIT (wont_exit, 1); 765132718Skan } 766132718Skan 767132718Skan RESET_BIT (wont_exit, max_unroll); 768132718Skan } 769132718Skan 770132718Skan /* Now unroll the loop. */ 771169689Skan 772169689Skan opt_info_start_duplication (opt_info); 773169689Skan ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 774169689Skan loops, max_unroll, 775169689Skan wont_exit, desc->out_edge, 776169689Skan remove_edges, &n_remove_edges, 777169689Skan DLTHE_FLAG_UPDATE_FREQ 778169689Skan | (opt_info 779169689Skan ? DLTHE_RECORD_COPY_NUMBER 780169689Skan : 0)); 781169689Skan gcc_assert (ok); 782132718Skan 783169689Skan if (opt_info) 784169689Skan { 785169689Skan apply_opt_in_copies (opt_info, max_unroll, true, true); 786169689Skan free_opt_info (opt_info); 787169689Skan } 788169689Skan 789132718Skan free (wont_exit); 790132718Skan 791169689Skan if (exit_at_end) 792169689Skan { 793169689Skan basic_block exit_block = get_bb_copy (desc->in_edge->src); 794169689Skan /* Find a new in and out edge; they are in the last copy we have made. */ 795169689Skan 796169689Skan if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) 797169689Skan { 798169689Skan desc->out_edge = EDGE_SUCC (exit_block, 0); 799169689Skan desc->in_edge = EDGE_SUCC (exit_block, 1); 800169689Skan } 801169689Skan else 802169689Skan { 803169689Skan desc->out_edge = EDGE_SUCC (exit_block, 1); 804169689Skan desc->in_edge = EDGE_SUCC (exit_block, 0); 805169689Skan } 806169689Skan } 807132718Skan 808169689Skan desc->niter /= max_unroll + 1; 809169689Skan desc->niter_max /= max_unroll + 1; 810169689Skan desc->niter_expr = GEN_INT (desc->niter); 811169689Skan 812132718Skan /* Remove the edges. */ 813132718Skan for (i = 0; i < n_remove_edges; i++) 814132718Skan remove_path (loops, remove_edges[i]); 815132718Skan free (remove_edges); 816132718Skan 817169689Skan if (dump_file) 818169689Skan fprintf (dump_file, 819169689Skan ";; Unrolled loop %d times, constant # of iterations %i insns\n", 820169689Skan max_unroll, num_loop_insns (loop)); 821132718Skan} 822132718Skan 823132718Skan/* Decide whether to unroll LOOP iterating runtime computable number of times 824132718Skan and how much. */ 825132718Skanstatic void 826132718Skandecide_unroll_runtime_iterations (struct loop *loop, int flags) 827132718Skan{ 828132718Skan unsigned nunroll, nunroll_by_av, i; 829169689Skan struct niter_desc *desc; 830132718Skan 831132718Skan if (!(flags & UAP_UNROLL)) 832132718Skan { 833132718Skan /* We were not asked to, just return back silently. */ 834132718Skan return; 835132718Skan } 836132718Skan 837169689Skan if (dump_file) 838169689Skan fprintf (dump_file, 839169689Skan "\n;; Considering unrolling loop with runtime " 840169689Skan "computable number of iterations\n"); 841132718Skan 842132718Skan /* nunroll = total number of copies of the original loop body in 843132718Skan unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ 844132718Skan nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; 845132718Skan nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; 846132718Skan if (nunroll > nunroll_by_av) 847132718Skan nunroll = nunroll_by_av; 848132718Skan if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) 849132718Skan nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); 850132718Skan 851132718Skan /* Skip big loops. */ 852132718Skan if (nunroll <= 1) 853132718Skan { 854169689Skan if (dump_file) 855169689Skan fprintf (dump_file, ";; Not considering loop, is too big\n"); 856132718Skan return; 857132718Skan } 858132718Skan 859132718Skan /* Check for simple loops. */ 860169689Skan desc = get_simple_loop_desc (loop); 861132718Skan 862132718Skan /* Check simpleness. */ 863169689Skan if (!desc->simple_p || desc->assumptions) 864132718Skan { 865169689Skan if (dump_file) 866169689Skan fprintf (dump_file, 867169689Skan ";; Unable to prove that the number of iterations " 868169689Skan "can be counted in runtime\n"); 869132718Skan return; 870132718Skan } 871132718Skan 872169689Skan if (desc->const_iter) 873132718Skan { 874169689Skan if (dump_file) 875169689Skan fprintf (dump_file, ";; Loop iterates constant times\n"); 876132718Skan return; 877132718Skan } 878132718Skan 879132718Skan /* If we have profile feedback, check whether the loop rolls. */ 880132718Skan if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll) 881132718Skan { 882169689Skan if (dump_file) 883169689Skan fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); 884132718Skan return; 885132718Skan } 886132718Skan 887132718Skan /* Success; now force nunroll to be power of 2, as we are unable to 888132718Skan cope with overflows in computation of number of iterations. */ 889169689Skan for (i = 1; 2 * i <= nunroll; i *= 2) 890169689Skan continue; 891132718Skan 892132718Skan loop->lpt_decision.decision = LPT_UNROLL_RUNTIME; 893132718Skan loop->lpt_decision.times = i - 1; 894169689Skan 895169689Skan if (dump_file) 896169689Skan fprintf (dump_file, 897169689Skan ";; Decided to unroll the runtime computable " 898169689Skan "times rolling loop, %d times.\n", 899169689Skan loop->lpt_decision.times); 900132718Skan} 901132718Skan 902132718Skan/* Unroll LOOP for that we are able to count number of iterations in runtime 903132718Skan LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some 904132718Skan extra care for case n < 0): 905132718Skan 906132718Skan for (i = 0; i < n; i++) 907132718Skan body; 908132718Skan 909132718Skan ==> 910132718Skan 911132718Skan i = 0; 912132718Skan mod = n % 4; 913132718Skan 914132718Skan switch (mod) 915132718Skan { 916132718Skan case 3: 917132718Skan body; i++; 918132718Skan case 2: 919132718Skan body; i++; 920132718Skan case 1: 921132718Skan body; i++; 922132718Skan case 0: ; 923132718Skan } 924132718Skan 925132718Skan while (i < n) 926132718Skan { 927132718Skan body; i++; 928132718Skan body; i++; 929132718Skan body; i++; 930132718Skan body; i++; 931132718Skan } 932132718Skan */ 933132718Skanstatic void 934132718Skanunroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) 935132718Skan{ 936169689Skan rtx old_niter, niter, init_code, branch_code, tmp; 937132718Skan unsigned i, j, p; 938132718Skan basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch; 939132718Skan unsigned n_dom_bbs; 940132718Skan sbitmap wont_exit; 941132718Skan int may_exit_copy; 942132718Skan unsigned n_peel, n_remove_edges; 943132718Skan edge *remove_edges, e; 944132718Skan bool extra_zero_check, last_may_exit; 945132718Skan unsigned max_unroll = loop->lpt_decision.times; 946169689Skan struct niter_desc *desc = get_simple_loop_desc (loop); 947169689Skan bool exit_at_end = loop_exit_at_end_p (loop); 948169689Skan struct opt_info *opt_info = NULL; 949169689Skan bool ok; 950169689Skan 951169689Skan if (flag_split_ivs_in_unroller 952169689Skan || flag_variable_expansion_in_unroller) 953169689Skan opt_info = analyze_insns_in_loop (loop); 954169689Skan 955132718Skan /* Remember blocks whose dominators will have to be updated. */ 956169689Skan dom_bbs = XCNEWVEC (basic_block, n_basic_blocks); 957132718Skan n_dom_bbs = 0; 958132718Skan 959132718Skan body = get_loop_body (loop); 960132718Skan for (i = 0; i < loop->num_nodes; i++) 961132718Skan { 962132718Skan unsigned nldom; 963132718Skan basic_block *ldom; 964132718Skan 965132718Skan nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom); 966132718Skan for (j = 0; j < nldom; j++) 967132718Skan if (!flow_bb_inside_loop_p (loop, ldom[j])) 968132718Skan dom_bbs[n_dom_bbs++] = ldom[j]; 969132718Skan 970132718Skan free (ldom); 971132718Skan } 972132718Skan free (body); 973132718Skan 974169689Skan if (!exit_at_end) 975132718Skan { 976132718Skan /* Leave exit in first copy (for explanation why see comment in 977132718Skan unroll_loop_constant_iterations). */ 978132718Skan may_exit_copy = 0; 979132718Skan n_peel = max_unroll - 1; 980132718Skan extra_zero_check = true; 981132718Skan last_may_exit = false; 982132718Skan } 983132718Skan else 984132718Skan { 985132718Skan /* Leave exit in last copy (for explanation why see comment in 986132718Skan unroll_loop_constant_iterations). */ 987132718Skan may_exit_copy = max_unroll; 988132718Skan n_peel = max_unroll; 989132718Skan extra_zero_check = false; 990132718Skan last_may_exit = true; 991132718Skan } 992132718Skan 993132718Skan /* Get expression for number of iterations. */ 994132718Skan start_sequence (); 995169689Skan old_niter = niter = gen_reg_rtx (desc->mode); 996169689Skan tmp = force_operand (copy_rtx (desc->niter_expr), niter); 997169689Skan if (tmp != niter) 998169689Skan emit_move_insn (niter, tmp); 999132718Skan 1000132718Skan /* Count modulo by ANDing it with max_unroll; we use the fact that 1001132718Skan the number of unrollings is a power of two, and thus this is correct 1002132718Skan even if there is overflow in the computation. */ 1003169689Skan niter = expand_simple_binop (desc->mode, AND, 1004132718Skan niter, 1005132718Skan GEN_INT (max_unroll), 1006132718Skan NULL_RTX, 0, OPTAB_LIB_WIDEN); 1007132718Skan 1008132718Skan init_code = get_insns (); 1009132718Skan end_sequence (); 1010132718Skan 1011132718Skan /* Precondition the loop. */ 1012132718Skan loop_split_edge_with (loop_preheader_edge (loop), init_code); 1013132718Skan 1014169689Skan remove_edges = XCNEWVEC (edge, max_unroll + n_peel + 1); 1015132718Skan n_remove_edges = 0; 1016132718Skan 1017132718Skan wont_exit = sbitmap_alloc (max_unroll + 2); 1018132718Skan 1019132718Skan /* Peel the first copy of loop body (almost always we must leave exit test 1020132718Skan here; the only exception is when we have extra zero check and the number 1021169689Skan of iterations is reliable. Also record the place of (possible) extra 1022169689Skan zero check. */ 1023132718Skan sbitmap_zero (wont_exit); 1024169689Skan if (extra_zero_check 1025169689Skan && !desc->noloop_assumptions) 1026132718Skan SET_BIT (wont_exit, 1); 1027132718Skan ezc_swtch = loop_preheader_edge (loop)->src; 1028169689Skan ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 1029169689Skan loops, 1, 1030169689Skan wont_exit, desc->out_edge, 1031169689Skan remove_edges, &n_remove_edges, 1032169689Skan DLTHE_FLAG_UPDATE_FREQ); 1033169689Skan gcc_assert (ok); 1034132718Skan 1035132718Skan /* Record the place where switch will be built for preconditioning. */ 1036132718Skan swtch = loop_split_edge_with (loop_preheader_edge (loop), 1037132718Skan NULL_RTX); 1038132718Skan 1039132718Skan for (i = 0; i < n_peel; i++) 1040132718Skan { 1041132718Skan /* Peel the copy. */ 1042132718Skan sbitmap_zero (wont_exit); 1043132718Skan if (i != n_peel - 1 || !last_may_exit) 1044132718Skan SET_BIT (wont_exit, 1); 1045169689Skan ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 1046169689Skan loops, 1, 1047169689Skan wont_exit, desc->out_edge, 1048169689Skan remove_edges, &n_remove_edges, 1049169689Skan DLTHE_FLAG_UPDATE_FREQ); 1050169689Skan gcc_assert (ok); 1051132718Skan 1052132718Skan /* Create item for switch. */ 1053132718Skan j = n_peel - i - (extra_zero_check ? 0 : 1); 1054132718Skan p = REG_BR_PROB_BASE / (i + 2); 1055132718Skan 1056169689Skan preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); 1057169689Skan branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ, 1058169689Skan block_label (preheader), p, 1059132718Skan NULL_RTX); 1060132718Skan 1061169689Skan swtch = loop_split_edge_with (single_pred_edge (swtch), branch_code); 1062132718Skan set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); 1063169689Skan single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p; 1064132718Skan e = make_edge (swtch, preheader, 1065169689Skan single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); 1066132718Skan e->probability = p; 1067132718Skan } 1068132718Skan 1069132718Skan if (extra_zero_check) 1070132718Skan { 1071132718Skan /* Add branch for zero iterations. */ 1072132718Skan p = REG_BR_PROB_BASE / (max_unroll + 1); 1073132718Skan swtch = ezc_swtch; 1074132718Skan preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); 1075169689Skan branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ, 1076169689Skan block_label (preheader), p, 1077169689Skan NULL_RTX); 1078132718Skan 1079169689Skan swtch = loop_split_edge_with (single_succ_edge (swtch), branch_code); 1080132718Skan set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); 1081169689Skan single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p; 1082132718Skan e = make_edge (swtch, preheader, 1083169689Skan single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); 1084132718Skan e->probability = p; 1085132718Skan } 1086132718Skan 1087132718Skan /* Recount dominators for outer blocks. */ 1088132718Skan iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs); 1089132718Skan 1090132718Skan /* And unroll loop. */ 1091132718Skan 1092132718Skan sbitmap_ones (wont_exit); 1093132718Skan RESET_BIT (wont_exit, may_exit_copy); 1094169689Skan opt_info_start_duplication (opt_info); 1095169689Skan 1096169689Skan ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 1097169689Skan loops, max_unroll, 1098169689Skan wont_exit, desc->out_edge, 1099169689Skan remove_edges, &n_remove_edges, 1100169689Skan DLTHE_FLAG_UPDATE_FREQ 1101169689Skan | (opt_info 1102169689Skan ? DLTHE_RECORD_COPY_NUMBER 1103169689Skan : 0)); 1104169689Skan gcc_assert (ok); 1105169689Skan 1106169689Skan if (opt_info) 1107169689Skan { 1108169689Skan apply_opt_in_copies (opt_info, max_unroll, true, true); 1109169689Skan free_opt_info (opt_info); 1110169689Skan } 1111132718Skan 1112132718Skan free (wont_exit); 1113132718Skan 1114169689Skan if (exit_at_end) 1115169689Skan { 1116169689Skan basic_block exit_block = get_bb_copy (desc->in_edge->src); 1117169689Skan /* Find a new in and out edge; they are in the last copy we have 1118169689Skan made. */ 1119169689Skan 1120169689Skan if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) 1121169689Skan { 1122169689Skan desc->out_edge = EDGE_SUCC (exit_block, 0); 1123169689Skan desc->in_edge = EDGE_SUCC (exit_block, 1); 1124169689Skan } 1125169689Skan else 1126169689Skan { 1127169689Skan desc->out_edge = EDGE_SUCC (exit_block, 1); 1128169689Skan desc->in_edge = EDGE_SUCC (exit_block, 0); 1129169689Skan } 1130169689Skan } 1131132718Skan 1132132718Skan /* Remove the edges. */ 1133132718Skan for (i = 0; i < n_remove_edges; i++) 1134132718Skan remove_path (loops, remove_edges[i]); 1135132718Skan free (remove_edges); 1136132718Skan 1137169689Skan /* We must be careful when updating the number of iterations due to 1138169689Skan preconditioning and the fact that the value must be valid at entry 1139169689Skan of the loop. After passing through the above code, we see that 1140169689Skan the correct new number of iterations is this: */ 1141169689Skan gcc_assert (!desc->const_iter); 1142169689Skan desc->niter_expr = 1143169689Skan simplify_gen_binary (UDIV, desc->mode, old_niter, 1144169689Skan GEN_INT (max_unroll + 1)); 1145169689Skan desc->niter_max /= max_unroll + 1; 1146169689Skan if (exit_at_end) 1147169689Skan { 1148169689Skan desc->niter_expr = 1149169689Skan simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx); 1150169689Skan desc->noloop_assumptions = NULL_RTX; 1151169689Skan desc->niter_max--; 1152169689Skan } 1153169689Skan 1154169689Skan if (dump_file) 1155169689Skan fprintf (dump_file, 1156169689Skan ";; Unrolled loop %d times, counting # of iterations " 1157169689Skan "in runtime, %i insns\n", 1158132718Skan max_unroll, num_loop_insns (loop)); 1159169689Skan 1160169689Skan if (dom_bbs) 1161169689Skan free (dom_bbs); 1162132718Skan} 1163132718Skan 1164132718Skan/* Decide whether to simply peel LOOP and how much. */ 1165132718Skanstatic void 1166132718Skandecide_peel_simple (struct loop *loop, int flags) 1167132718Skan{ 1168132718Skan unsigned npeel; 1169169689Skan struct niter_desc *desc; 1170132718Skan 1171132718Skan if (!(flags & UAP_PEEL)) 1172132718Skan { 1173132718Skan /* We were not asked to, just return back silently. */ 1174132718Skan return; 1175132718Skan } 1176132718Skan 1177169689Skan if (dump_file) 1178169689Skan fprintf (dump_file, "\n;; Considering simply peeling loop\n"); 1179132718Skan 1180132718Skan /* npeel = number of iterations to peel. */ 1181132718Skan npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns; 1182132718Skan if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES)) 1183132718Skan npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES); 1184132718Skan 1185132718Skan /* Skip big loops. */ 1186132718Skan if (!npeel) 1187132718Skan { 1188169689Skan if (dump_file) 1189169689Skan fprintf (dump_file, ";; Not considering loop, is too big\n"); 1190132718Skan return; 1191132718Skan } 1192132718Skan 1193132718Skan /* Check for simple loops. */ 1194169689Skan desc = get_simple_loop_desc (loop); 1195132718Skan 1196132718Skan /* Check number of iterations. */ 1197169689Skan if (desc->simple_p && !desc->assumptions && desc->const_iter) 1198132718Skan { 1199169689Skan if (dump_file) 1200169689Skan fprintf (dump_file, ";; Loop iterates constant times\n"); 1201132718Skan return; 1202132718Skan } 1203132718Skan 1204132718Skan /* Do not simply peel loops with branches inside -- it increases number 1205132718Skan of mispredicts. */ 1206169689Skan if (num_loop_branches (loop) > 1) 1207132718Skan { 1208169689Skan if (dump_file) 1209169689Skan fprintf (dump_file, ";; Not peeling, contains branches\n"); 1210132718Skan return; 1211132718Skan } 1212132718Skan 1213132718Skan if (loop->header->count) 1214132718Skan { 1215132718Skan unsigned niter = expected_loop_iterations (loop); 1216132718Skan if (niter + 1 > npeel) 1217132718Skan { 1218169689Skan if (dump_file) 1219132718Skan { 1220169689Skan fprintf (dump_file, ";; Not peeling loop, rolls too much ("); 1221169689Skan fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, 1222169689Skan (HOST_WIDEST_INT) (niter + 1)); 1223169689Skan fprintf (dump_file, " iterations > %d [maximum peelings])\n", 1224169689Skan npeel); 1225132718Skan } 1226132718Skan return; 1227132718Skan } 1228132718Skan npeel = niter + 1; 1229132718Skan } 1230132718Skan else 1231132718Skan { 1232132718Skan /* For now we have no good heuristics to decide whether loop peeling 1233132718Skan will be effective, so disable it. */ 1234169689Skan if (dump_file) 1235169689Skan fprintf (dump_file, 1236132718Skan ";; Not peeling loop, no evidence it will be profitable\n"); 1237132718Skan return; 1238132718Skan } 1239132718Skan 1240132718Skan /* Success. */ 1241132718Skan loop->lpt_decision.decision = LPT_PEEL_SIMPLE; 1242132718Skan loop->lpt_decision.times = npeel; 1243169689Skan 1244169689Skan if (dump_file) 1245169689Skan fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n", 1246169689Skan loop->lpt_decision.times); 1247132718Skan} 1248132718Skan 1249132718Skan/* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: 1250132718Skan while (cond) 1251132718Skan body; 1252132718Skan 1253132718Skan ==> 1254132718Skan 1255132718Skan if (!cond) goto end; 1256132718Skan body; 1257132718Skan if (!cond) goto end; 1258132718Skan body; 1259132718Skan while (cond) 1260132718Skan body; 1261132718Skan end: ; 1262132718Skan */ 1263132718Skanstatic void 1264132718Skanpeel_loop_simple (struct loops *loops, struct loop *loop) 1265132718Skan{ 1266132718Skan sbitmap wont_exit; 1267132718Skan unsigned npeel = loop->lpt_decision.times; 1268169689Skan struct niter_desc *desc = get_simple_loop_desc (loop); 1269169689Skan struct opt_info *opt_info = NULL; 1270169689Skan bool ok; 1271169689Skan 1272169689Skan if (flag_split_ivs_in_unroller && npeel > 1) 1273169689Skan opt_info = analyze_insns_in_loop (loop); 1274169689Skan 1275132718Skan wont_exit = sbitmap_alloc (npeel + 1); 1276132718Skan sbitmap_zero (wont_exit); 1277169689Skan 1278169689Skan opt_info_start_duplication (opt_info); 1279169689Skan 1280169689Skan ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 1281169689Skan loops, npeel, wont_exit, 1282169689Skan NULL, NULL, 1283169689Skan NULL, DLTHE_FLAG_UPDATE_FREQ 1284169689Skan | (opt_info 1285169689Skan ? DLTHE_RECORD_COPY_NUMBER 1286169689Skan : 0)); 1287169689Skan gcc_assert (ok); 1288132718Skan 1289132718Skan free (wont_exit); 1290169689Skan 1291169689Skan if (opt_info) 1292169689Skan { 1293169689Skan apply_opt_in_copies (opt_info, npeel, false, false); 1294169689Skan free_opt_info (opt_info); 1295169689Skan } 1296132718Skan 1297169689Skan if (desc->simple_p) 1298169689Skan { 1299169689Skan if (desc->const_iter) 1300169689Skan { 1301169689Skan desc->niter -= npeel; 1302169689Skan desc->niter_expr = GEN_INT (desc->niter); 1303169689Skan desc->noloop_assumptions = NULL_RTX; 1304169689Skan } 1305169689Skan else 1306169689Skan { 1307169689Skan /* We cannot just update niter_expr, as its value might be clobbered 1308169689Skan inside loop. We could handle this by counting the number into 1309169689Skan temporary just like we do in runtime unrolling, but it does not 1310169689Skan seem worthwhile. */ 1311169689Skan free_simple_loop_desc (loop); 1312169689Skan } 1313169689Skan } 1314169689Skan if (dump_file) 1315169689Skan fprintf (dump_file, ";; Peeling loop %d times\n", npeel); 1316132718Skan} 1317132718Skan 1318132718Skan/* Decide whether to unroll LOOP stupidly and how much. */ 1319132718Skanstatic void 1320132718Skandecide_unroll_stupid (struct loop *loop, int flags) 1321132718Skan{ 1322132718Skan unsigned nunroll, nunroll_by_av, i; 1323169689Skan struct niter_desc *desc; 1324132718Skan 1325132718Skan if (!(flags & UAP_UNROLL_ALL)) 1326132718Skan { 1327132718Skan /* We were not asked to, just return back silently. */ 1328132718Skan return; 1329132718Skan } 1330132718Skan 1331169689Skan if (dump_file) 1332169689Skan fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n"); 1333132718Skan 1334132718Skan /* nunroll = total number of copies of the original loop body in 1335132718Skan unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ 1336132718Skan nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; 1337169689Skan nunroll_by_av 1338169689Skan = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; 1339132718Skan if (nunroll > nunroll_by_av) 1340132718Skan nunroll = nunroll_by_av; 1341132718Skan if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) 1342132718Skan nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); 1343132718Skan 1344132718Skan /* Skip big loops. */ 1345132718Skan if (nunroll <= 1) 1346132718Skan { 1347169689Skan if (dump_file) 1348169689Skan fprintf (dump_file, ";; Not considering loop, is too big\n"); 1349132718Skan return; 1350132718Skan } 1351132718Skan 1352132718Skan /* Check for simple loops. */ 1353169689Skan desc = get_simple_loop_desc (loop); 1354132718Skan 1355132718Skan /* Check simpleness. */ 1356169689Skan if (desc->simple_p && !desc->assumptions) 1357132718Skan { 1358169689Skan if (dump_file) 1359169689Skan fprintf (dump_file, ";; The loop is simple\n"); 1360132718Skan return; 1361132718Skan } 1362132718Skan 1363132718Skan /* Do not unroll loops with branches inside -- it increases number 1364132718Skan of mispredicts. */ 1365169689Skan if (num_loop_branches (loop) > 1) 1366132718Skan { 1367169689Skan if (dump_file) 1368169689Skan fprintf (dump_file, ";; Not unrolling, contains branches\n"); 1369132718Skan return; 1370132718Skan } 1371132718Skan 1372132718Skan /* If we have profile feedback, check whether the loop rolls. */ 1373169689Skan if (loop->header->count 1374169689Skan && expected_loop_iterations (loop) < 2 * nunroll) 1375132718Skan { 1376169689Skan if (dump_file) 1377169689Skan fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); 1378132718Skan return; 1379132718Skan } 1380132718Skan 1381132718Skan /* Success. Now force nunroll to be power of 2, as it seems that this 1382132718Skan improves results (partially because of better alignments, partially 1383132718Skan because of some dark magic). */ 1384169689Skan for (i = 1; 2 * i <= nunroll; i *= 2) 1385169689Skan continue; 1386132718Skan 1387132718Skan loop->lpt_decision.decision = LPT_UNROLL_STUPID; 1388132718Skan loop->lpt_decision.times = i - 1; 1389169689Skan 1390169689Skan if (dump_file) 1391169689Skan fprintf (dump_file, 1392169689Skan ";; Decided to unroll the loop stupidly, %d times.\n", 1393169689Skan loop->lpt_decision.times); 1394132718Skan} 1395132718Skan 1396132718Skan/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: 1397132718Skan while (cond) 1398132718Skan body; 1399132718Skan 1400132718Skan ==> 1401132718Skan 1402132718Skan while (cond) 1403132718Skan { 1404132718Skan body; 1405132718Skan if (!cond) break; 1406132718Skan body; 1407132718Skan if (!cond) break; 1408132718Skan body; 1409132718Skan if (!cond) break; 1410132718Skan body; 1411132718Skan } 1412132718Skan */ 1413132718Skanstatic void 1414132718Skanunroll_loop_stupid (struct loops *loops, struct loop *loop) 1415132718Skan{ 1416132718Skan sbitmap wont_exit; 1417132718Skan unsigned nunroll = loop->lpt_decision.times; 1418169689Skan struct niter_desc *desc = get_simple_loop_desc (loop); 1419169689Skan struct opt_info *opt_info = NULL; 1420169689Skan bool ok; 1421169689Skan 1422169689Skan if (flag_split_ivs_in_unroller 1423169689Skan || flag_variable_expansion_in_unroller) 1424169689Skan opt_info = analyze_insns_in_loop (loop); 1425169689Skan 1426169689Skan 1427132718Skan wont_exit = sbitmap_alloc (nunroll + 1); 1428132718Skan sbitmap_zero (wont_exit); 1429169689Skan opt_info_start_duplication (opt_info); 1430169689Skan 1431169689Skan ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 1432169689Skan loops, nunroll, wont_exit, 1433169689Skan NULL, NULL, NULL, 1434169689Skan DLTHE_FLAG_UPDATE_FREQ 1435169689Skan | (opt_info 1436169689Skan ? DLTHE_RECORD_COPY_NUMBER 1437169689Skan : 0)); 1438169689Skan gcc_assert (ok); 1439169689Skan 1440169689Skan if (opt_info) 1441169689Skan { 1442169689Skan apply_opt_in_copies (opt_info, nunroll, true, true); 1443169689Skan free_opt_info (opt_info); 1444169689Skan } 1445132718Skan 1446132718Skan free (wont_exit); 1447132718Skan 1448169689Skan if (desc->simple_p) 1449169689Skan { 1450169689Skan /* We indeed may get here provided that there are nontrivial assumptions 1451169689Skan for a loop to be really simple. We could update the counts, but the 1452169689Skan problem is that we are unable to decide which exit will be taken 1453169689Skan (not really true in case the number of iterations is constant, 1454169689Skan but noone will do anything with this information, so we do not 1455169689Skan worry about it). */ 1456169689Skan desc->simple_p = false; 1457169689Skan } 1458169689Skan 1459169689Skan if (dump_file) 1460169689Skan fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n", 1461132718Skan nunroll, num_loop_insns (loop)); 1462132718Skan} 1463132718Skan 1464169689Skan/* A hash function for information about insns to split. */ 1465146895Skan 1466169689Skanstatic hashval_t 1467169689Skansi_info_hash (const void *ivts) 1468169689Skan{ 1469169689Skan return (hashval_t) INSN_UID (((struct iv_to_split *) ivts)->insn); 1470169689Skan} 1471169689Skan 1472169689Skan/* An equality functions for information about insns to split. */ 1473169689Skan 1474169689Skanstatic int 1475169689Skansi_info_eq (const void *ivts1, const void *ivts2) 1476169689Skan{ 1477169689Skan const struct iv_to_split *i1 = ivts1; 1478169689Skan const struct iv_to_split *i2 = ivts2; 1479169689Skan 1480169689Skan return i1->insn == i2->insn; 1481169689Skan} 1482169689Skan 1483169689Skan/* Return a hash for VES, which is really a "var_to_expand *". */ 1484169689Skan 1485169689Skanstatic hashval_t 1486169689Skanve_info_hash (const void *ves) 1487169689Skan{ 1488169689Skan return (hashval_t) INSN_UID (((struct var_to_expand *) ves)->insn); 1489169689Skan} 1490169689Skan 1491169689Skan/* Return true if IVTS1 and IVTS2 (which are really both of type 1492169689Skan "var_to_expand *") refer to the same instruction. */ 1493169689Skan 1494169689Skanstatic int 1495169689Skanve_info_eq (const void *ivts1, const void *ivts2) 1496169689Skan{ 1497169689Skan const struct var_to_expand *i1 = ivts1; 1498169689Skan const struct var_to_expand *i2 = ivts2; 1499169689Skan 1500169689Skan return i1->insn == i2->insn; 1501169689Skan} 1502169689Skan 1503169689Skan/* Returns true if REG is referenced in one insn in LOOP. */ 1504169689Skan 1505169689Skanbool 1506169689Skanreferenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg) 1507169689Skan{ 1508169689Skan basic_block *body, bb; 1509169689Skan unsigned i; 1510169689Skan int count_ref = 0; 1511169689Skan rtx insn; 1512169689Skan 1513169689Skan body = get_loop_body (loop); 1514169689Skan for (i = 0; i < loop->num_nodes; i++) 1515169689Skan { 1516169689Skan bb = body[i]; 1517169689Skan 1518169689Skan FOR_BB_INSNS (bb, insn) 1519169689Skan { 1520169689Skan if (rtx_referenced_p (reg, insn)) 1521169689Skan count_ref++; 1522169689Skan } 1523169689Skan } 1524169689Skan return (count_ref == 1); 1525169689Skan} 1526169689Skan 1527169689Skan/* Determine whether INSN contains an accumulator 1528169689Skan which can be expanded into separate copies, 1529169689Skan one for each copy of the LOOP body. 1530169689Skan 1531169689Skan for (i = 0 ; i < n; i++) 1532169689Skan sum += a[i]; 1533169689Skan 1534169689Skan ==> 1535169689Skan 1536169689Skan sum += a[i] 1537169689Skan .... 1538169689Skan i = i+1; 1539169689Skan sum1 += a[i] 1540169689Skan .... 1541169689Skan i = i+1 1542169689Skan sum2 += a[i]; 1543169689Skan .... 1544169689Skan 1545169689Skan Return NULL if INSN contains no opportunity for expansion of accumulator. 1546169689Skan Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant 1547169689Skan information and return a pointer to it. 1548169689Skan*/ 1549169689Skan 1550169689Skanstatic struct var_to_expand * 1551169689Skananalyze_insn_to_expand_var (struct loop *loop, rtx insn) 1552169689Skan{ 1553169689Skan rtx set, dest, src, op1; 1554169689Skan struct var_to_expand *ves; 1555169689Skan enum machine_mode mode1, mode2; 1556169689Skan 1557169689Skan set = single_set (insn); 1558169689Skan if (!set) 1559169689Skan return NULL; 1560169689Skan 1561169689Skan dest = SET_DEST (set); 1562169689Skan src = SET_SRC (set); 1563169689Skan 1564169689Skan if (GET_CODE (src) != PLUS 1565169689Skan && GET_CODE (src) != MINUS 1566169689Skan && GET_CODE (src) != MULT) 1567169689Skan return NULL; 1568169689Skan 1569169689Skan /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn 1570169689Skan in MD. But if there is no optab to generate the insn, we can not 1571169689Skan perform the variable expansion. This can happen if an MD provides 1572169689Skan an insn but not a named pattern to generate it, for example to avoid 1573169689Skan producing code that needs additional mode switches like for x87/mmx. 1574169689Skan 1575169689Skan So we check have_insn_for which looks for an optab for the operation 1576169689Skan in SRC. If it doesn't exist, we can't perform the expansion even 1577169689Skan though INSN is valid. */ 1578169689Skan if (!have_insn_for (GET_CODE (src), GET_MODE (src))) 1579169689Skan return NULL; 1580169689Skan 1581169689Skan if (!XEXP (src, 0)) 1582169689Skan return NULL; 1583169689Skan 1584169689Skan op1 = XEXP (src, 0); 1585169689Skan 1586169689Skan if (!REG_P (dest) 1587169689Skan && !(GET_CODE (dest) == SUBREG 1588169689Skan && REG_P (SUBREG_REG (dest)))) 1589169689Skan return NULL; 1590169689Skan 1591169689Skan if (!rtx_equal_p (dest, op1)) 1592169689Skan return NULL; 1593169689Skan 1594169689Skan if (!referenced_in_one_insn_in_loop_p (loop, dest)) 1595169689Skan return NULL; 1596169689Skan 1597169689Skan if (rtx_referenced_p (dest, XEXP (src, 1))) 1598169689Skan return NULL; 1599169689Skan 1600169689Skan mode1 = GET_MODE (dest); 1601169689Skan mode2 = GET_MODE (XEXP (src, 1)); 1602169689Skan if ((FLOAT_MODE_P (mode1) 1603169689Skan || FLOAT_MODE_P (mode2)) 1604169689Skan && !flag_unsafe_math_optimizations) 1605169689Skan return NULL; 1606169689Skan 1607169689Skan /* Record the accumulator to expand. */ 1608169689Skan ves = XNEW (struct var_to_expand); 1609169689Skan ves->insn = insn; 1610169689Skan ves->var_expansions = VEC_alloc (rtx, heap, 1); 1611169689Skan ves->reg = copy_rtx (dest); 1612169689Skan ves->op = GET_CODE (src); 1613169689Skan ves->expansion_count = 0; 1614169689Skan ves->reuse_expansion = 0; 1615169689Skan return ves; 1616169689Skan} 1617169689Skan 1618169689Skan/* Determine whether there is an induction variable in INSN that 1619169689Skan we would like to split during unrolling. 1620169689Skan 1621169689Skan I.e. replace 1622169689Skan 1623169689Skan i = i + 1; 1624169689Skan ... 1625169689Skan i = i + 1; 1626169689Skan ... 1627169689Skan i = i + 1; 1628169689Skan ... 1629169689Skan 1630169689Skan type chains by 1631169689Skan 1632169689Skan i0 = i + 1 1633169689Skan ... 1634169689Skan i = i0 + 1 1635169689Skan ... 1636169689Skan i = i0 + 2 1637169689Skan ... 1638169689Skan 1639169689Skan Return NULL if INSN contains no interesting IVs. Otherwise, allocate 1640169689Skan an IV_TO_SPLIT structure, fill it with the relevant information and return a 1641169689Skan pointer to it. */ 1642169689Skan 1643169689Skanstatic struct iv_to_split * 1644169689Skananalyze_iv_to_split_insn (rtx insn) 1645169689Skan{ 1646169689Skan rtx set, dest; 1647169689Skan struct rtx_iv iv; 1648169689Skan struct iv_to_split *ivts; 1649169689Skan bool ok; 1650169689Skan 1651169689Skan /* For now we just split the basic induction variables. Later this may be 1652169689Skan extended for example by selecting also addresses of memory references. */ 1653169689Skan set = single_set (insn); 1654169689Skan if (!set) 1655169689Skan return NULL; 1656169689Skan 1657169689Skan dest = SET_DEST (set); 1658169689Skan if (!REG_P (dest)) 1659169689Skan return NULL; 1660169689Skan 1661169689Skan if (!biv_p (insn, dest)) 1662169689Skan return NULL; 1663169689Skan 1664169689Skan ok = iv_analyze_result (insn, dest, &iv); 1665169689Skan 1666169689Skan /* This used to be an assert under the assumption that if biv_p returns 1667169689Skan true that iv_analyze_result must also return true. However, that 1668169689Skan assumption is not strictly correct as evidenced by pr25569. 1669169689Skan 1670169689Skan Returning NULL when iv_analyze_result returns false is safe and 1671169689Skan avoids the problems in pr25569 until the iv_analyze_* routines 1672169689Skan can be fixed, which is apparently hard and time consuming 1673169689Skan according to their author. */ 1674169689Skan if (! ok) 1675169689Skan return NULL; 1676169689Skan 1677169689Skan if (iv.step == const0_rtx 1678169689Skan || iv.mode != iv.extend_mode) 1679169689Skan return NULL; 1680169689Skan 1681169689Skan /* Record the insn to split. */ 1682169689Skan ivts = XNEW (struct iv_to_split); 1683169689Skan ivts->insn = insn; 1684169689Skan ivts->base_var = NULL_RTX; 1685169689Skan ivts->step = iv.step; 1686169689Skan ivts->n_loc = 1; 1687169689Skan ivts->loc[0] = 1; 1688169689Skan 1689169689Skan return ivts; 1690169689Skan} 1691169689Skan 1692169689Skan/* Determines which of insns in LOOP can be optimized. 1693169689Skan Return a OPT_INFO struct with the relevant hash tables filled 1694169689Skan with all insns to be optimized. The FIRST_NEW_BLOCK field 1695169689Skan is undefined for the return value. */ 1696169689Skan 1697169689Skanstatic struct opt_info * 1698169689Skananalyze_insns_in_loop (struct loop *loop) 1699169689Skan{ 1700169689Skan basic_block *body, bb; 1701169689Skan unsigned i, num_edges = 0; 1702169689Skan struct opt_info *opt_info = XCNEW (struct opt_info); 1703169689Skan rtx insn; 1704169689Skan struct iv_to_split *ivts = NULL; 1705169689Skan struct var_to_expand *ves = NULL; 1706169689Skan PTR *slot1; 1707169689Skan PTR *slot2; 1708169689Skan edge *edges = get_loop_exit_edges (loop, &num_edges); 1709169689Skan bool can_apply = false; 1710169689Skan 1711169689Skan iv_analysis_loop_init (loop); 1712169689Skan 1713169689Skan body = get_loop_body (loop); 1714169689Skan 1715169689Skan if (flag_split_ivs_in_unroller) 1716169689Skan opt_info->insns_to_split = htab_create (5 * loop->num_nodes, 1717169689Skan si_info_hash, si_info_eq, free); 1718169689Skan 1719169689Skan /* Record the loop exit bb and loop preheader before the unrolling. */ 1720169689Skan if (!loop_preheader_edge (loop)->src) 1721169689Skan { 1722169689Skan loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); 1723169689Skan opt_info->loop_preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); 1724169689Skan } 1725169689Skan else 1726169689Skan opt_info->loop_preheader = loop_preheader_edge (loop)->src; 1727169689Skan 1728169689Skan if (num_edges == 1 1729169689Skan && !(edges[0]->flags & EDGE_COMPLEX)) 1730169689Skan { 1731169689Skan opt_info->loop_exit = loop_split_edge_with (edges[0], NULL_RTX); 1732169689Skan can_apply = true; 1733169689Skan } 1734169689Skan 1735169689Skan if (flag_variable_expansion_in_unroller 1736169689Skan && can_apply) 1737169689Skan opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes, 1738169689Skan ve_info_hash, ve_info_eq, free); 1739169689Skan 1740169689Skan for (i = 0; i < loop->num_nodes; i++) 1741169689Skan { 1742169689Skan bb = body[i]; 1743169689Skan if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) 1744169689Skan continue; 1745169689Skan 1746169689Skan FOR_BB_INSNS (bb, insn) 1747169689Skan { 1748169689Skan if (!INSN_P (insn)) 1749169689Skan continue; 1750169689Skan 1751169689Skan if (opt_info->insns_to_split) 1752169689Skan ivts = analyze_iv_to_split_insn (insn); 1753169689Skan 1754169689Skan if (ivts) 1755169689Skan { 1756169689Skan slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT); 1757169689Skan *slot1 = ivts; 1758169689Skan continue; 1759169689Skan } 1760169689Skan 1761169689Skan if (opt_info->insns_with_var_to_expand) 1762169689Skan ves = analyze_insn_to_expand_var (loop, insn); 1763169689Skan 1764169689Skan if (ves) 1765169689Skan { 1766169689Skan slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT); 1767169689Skan *slot2 = ves; 1768169689Skan } 1769169689Skan } 1770169689Skan } 1771169689Skan 1772169689Skan free (edges); 1773169689Skan free (body); 1774169689Skan return opt_info; 1775169689Skan} 1776169689Skan 1777169689Skan/* Called just before loop duplication. Records start of duplicated area 1778169689Skan to OPT_INFO. */ 1779169689Skan 1780169689Skanstatic void 1781169689Skanopt_info_start_duplication (struct opt_info *opt_info) 1782169689Skan{ 1783169689Skan if (opt_info) 1784169689Skan opt_info->first_new_block = last_basic_block; 1785169689Skan} 1786169689Skan 1787169689Skan/* Determine the number of iterations between initialization of the base 1788169689Skan variable and the current copy (N_COPY). N_COPIES is the total number 1789169689Skan of newly created copies. UNROLLING is true if we are unrolling 1790169689Skan (not peeling) the loop. */ 1791169689Skan 1792169689Skanstatic unsigned 1793169689Skandetermine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling) 1794169689Skan{ 1795169689Skan if (unrolling) 1796169689Skan { 1797169689Skan /* If we are unrolling, initialization is done in the original loop 1798169689Skan body (number 0). */ 1799169689Skan return n_copy; 1800169689Skan } 1801169689Skan else 1802169689Skan { 1803169689Skan /* If we are peeling, the copy in that the initialization occurs has 1804169689Skan number 1. The original loop (number 0) is the last. */ 1805169689Skan if (n_copy) 1806169689Skan return n_copy - 1; 1807169689Skan else 1808169689Skan return n_copies; 1809169689Skan } 1810169689Skan} 1811169689Skan 1812169689Skan/* Locate in EXPR the expression corresponding to the location recorded 1813169689Skan in IVTS, and return a pointer to the RTX for this location. */ 1814169689Skan 1815169689Skanstatic rtx * 1816169689Skanget_ivts_expr (rtx expr, struct iv_to_split *ivts) 1817169689Skan{ 1818169689Skan unsigned i; 1819169689Skan rtx *ret = &expr; 1820169689Skan 1821169689Skan for (i = 0; i < ivts->n_loc; i++) 1822169689Skan ret = &XEXP (*ret, ivts->loc[i]); 1823169689Skan 1824169689Skan return ret; 1825169689Skan} 1826169689Skan 1827169689Skan/* Allocate basic variable for the induction variable chain. Callback for 1828169689Skan htab_traverse. */ 1829169689Skan 1830169689Skanstatic int 1831169689Skanallocate_basic_variable (void **slot, void *data ATTRIBUTE_UNUSED) 1832169689Skan{ 1833169689Skan struct iv_to_split *ivts = *slot; 1834169689Skan rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts); 1835169689Skan 1836169689Skan ivts->base_var = gen_reg_rtx (GET_MODE (expr)); 1837169689Skan 1838169689Skan return 1; 1839169689Skan} 1840169689Skan 1841169689Skan/* Insert initialization of basic variable of IVTS before INSN, taking 1842169689Skan the initial value from INSN. */ 1843169689Skan 1844146895Skanstatic void 1845169689Skaninsert_base_initialization (struct iv_to_split *ivts, rtx insn) 1846132718Skan{ 1847169689Skan rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts)); 1848132718Skan rtx seq; 1849132718Skan 1850169689Skan start_sequence (); 1851169689Skan expr = force_operand (expr, ivts->base_var); 1852169689Skan if (expr != ivts->base_var) 1853169689Skan emit_move_insn (ivts->base_var, expr); 1854169689Skan seq = get_insns (); 1855169689Skan end_sequence (); 1856132718Skan 1857169689Skan emit_insn_before (seq, insn); 1858169689Skan} 1859132718Skan 1860169689Skan/* Replace the use of induction variable described in IVTS in INSN 1861169689Skan by base variable + DELTA * step. */ 1862169689Skan 1863169689Skanstatic void 1864169689Skansplit_iv (struct iv_to_split *ivts, rtx insn, unsigned delta) 1865169689Skan{ 1866169689Skan rtx expr, *loc, seq, incr, var; 1867169689Skan enum machine_mode mode = GET_MODE (ivts->base_var); 1868169689Skan rtx src, dest, set; 1869169689Skan 1870169689Skan /* Construct base + DELTA * step. */ 1871169689Skan if (!delta) 1872169689Skan expr = ivts->base_var; 1873169689Skan else 1874132718Skan { 1875169689Skan incr = simplify_gen_binary (MULT, mode, 1876169689Skan ivts->step, gen_int_mode (delta, mode)); 1877169689Skan expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var), 1878169689Skan ivts->base_var, incr); 1879132718Skan } 1880132718Skan 1881169689Skan /* Figure out where to do the replacement. */ 1882169689Skan loc = get_ivts_expr (single_set (insn), ivts); 1883132718Skan 1884169689Skan /* If we can make the replacement right away, we're done. */ 1885169689Skan if (validate_change (insn, loc, expr, 0)) 1886169689Skan return; 1887169689Skan 1888169689Skan /* Otherwise, force EXPR into a register and try again. */ 1889169689Skan start_sequence (); 1890169689Skan var = gen_reg_rtx (mode); 1891169689Skan expr = force_operand (expr, var); 1892169689Skan if (expr != var) 1893169689Skan emit_move_insn (var, expr); 1894132718Skan seq = get_insns (); 1895132718Skan end_sequence (); 1896169689Skan emit_insn_before (seq, insn); 1897169689Skan 1898169689Skan if (validate_change (insn, loc, var, 0)) 1899169689Skan return; 1900132718Skan 1901169689Skan /* The last chance. Try recreating the assignment in insn 1902169689Skan completely from scratch. */ 1903169689Skan set = single_set (insn); 1904169689Skan gcc_assert (set); 1905132718Skan 1906169689Skan start_sequence (); 1907169689Skan *loc = var; 1908169689Skan src = copy_rtx (SET_SRC (set)); 1909169689Skan dest = copy_rtx (SET_DEST (set)); 1910169689Skan src = force_operand (src, dest); 1911169689Skan if (src != dest) 1912169689Skan emit_move_insn (dest, src); 1913169689Skan seq = get_insns (); 1914169689Skan end_sequence (); 1915169689Skan 1916169689Skan emit_insn_before (seq, insn); 1917169689Skan delete_insn (insn); 1918132718Skan} 1919132718Skan 1920169689Skan 1921169689Skan/* Return one expansion of the accumulator recorded in struct VE. */ 1922169689Skan 1923169689Skanstatic rtx 1924169689Skanget_expansion (struct var_to_expand *ve) 1925132718Skan{ 1926169689Skan rtx reg; 1927132718Skan 1928169689Skan if (ve->reuse_expansion == 0) 1929169689Skan reg = ve->reg; 1930169689Skan else 1931169689Skan reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1); 1932169689Skan 1933169689Skan if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion) 1934169689Skan ve->reuse_expansion = 0; 1935169689Skan else 1936169689Skan ve->reuse_expansion++; 1937169689Skan 1938169689Skan return reg; 1939169689Skan} 1940132718Skan 1941132718Skan 1942169689Skan/* Given INSN replace the uses of the accumulator recorded in VE 1943169689Skan with a new register. */ 1944132718Skan 1945169689Skanstatic void 1946169689Skanexpand_var_during_unrolling (struct var_to_expand *ve, rtx insn) 1947169689Skan{ 1948169689Skan rtx new_reg, set; 1949169689Skan bool really_new_expansion = false; 1950169689Skan 1951169689Skan set = single_set (insn); 1952169689Skan gcc_assert (set); 1953169689Skan 1954169689Skan /* Generate a new register only if the expansion limit has not been 1955169689Skan reached. Else reuse an already existing expansion. */ 1956169689Skan if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count) 1957169689Skan { 1958169689Skan really_new_expansion = true; 1959169689Skan new_reg = gen_reg_rtx (GET_MODE (ve->reg)); 1960169689Skan } 1961169689Skan else 1962169689Skan new_reg = get_expansion (ve); 1963132718Skan 1964169689Skan validate_change (insn, &SET_DEST (set), new_reg, 1); 1965169689Skan validate_change (insn, &XEXP (SET_SRC (set), 0), new_reg, 1); 1966132718Skan 1967169689Skan if (apply_change_group ()) 1968169689Skan if (really_new_expansion) 1969169689Skan { 1970169689Skan VEC_safe_push (rtx, heap, ve->var_expansions, new_reg); 1971169689Skan ve->expansion_count++; 1972169689Skan } 1973169689Skan} 1974132718Skan 1975169689Skan/* Initialize the variable expansions in loop preheader. 1976169689Skan Callbacks for htab_traverse. PLACE_P is the loop-preheader 1977169689Skan basic block where the initialization of the expansions 1978169689Skan should take place. */ 1979169689Skan 1980169689Skanstatic int 1981169689Skaninsert_var_expansion_initialization (void **slot, void *place_p) 1982169689Skan{ 1983169689Skan struct var_to_expand *ve = *slot; 1984169689Skan basic_block place = (basic_block)place_p; 1985169689Skan rtx seq, var, zero_init, insn; 1986169689Skan unsigned i; 1987132718Skan 1988169689Skan if (VEC_length (rtx, ve->var_expansions) == 0) 1989169689Skan return 1; 1990169689Skan 1991169689Skan start_sequence (); 1992169689Skan if (ve->op == PLUS || ve->op == MINUS) 1993169689Skan for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) 1994169689Skan { 1995169689Skan zero_init = CONST0_RTX (GET_MODE (var)); 1996169689Skan emit_move_insn (var, zero_init); 1997169689Skan } 1998169689Skan else if (ve->op == MULT) 1999169689Skan for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) 2000169689Skan { 2001169689Skan zero_init = CONST1_RTX (GET_MODE (var)); 2002169689Skan emit_move_insn (var, zero_init); 2003169689Skan } 2004169689Skan 2005169689Skan seq = get_insns (); 2006169689Skan end_sequence (); 2007169689Skan 2008169689Skan insn = BB_HEAD (place); 2009169689Skan while (!NOTE_INSN_BASIC_BLOCK_P (insn)) 2010169689Skan insn = NEXT_INSN (insn); 2011169689Skan 2012169689Skan emit_insn_after (seq, insn); 2013169689Skan /* Continue traversing the hash table. */ 2014169689Skan return 1; 2015169689Skan} 2016132718Skan 2017169689Skan/* Combine the variable expansions at the loop exit. 2018169689Skan Callbacks for htab_traverse. PLACE_P is the loop exit 2019169689Skan basic block where the summation of the expansions should 2020169689Skan take place. */ 2021132718Skan 2022169689Skanstatic int 2023169689Skancombine_var_copies_in_loop_exit (void **slot, void *place_p) 2024169689Skan{ 2025169689Skan struct var_to_expand *ve = *slot; 2026169689Skan basic_block place = (basic_block)place_p; 2027169689Skan rtx sum = ve->reg; 2028169689Skan rtx expr, seq, var, insn; 2029169689Skan unsigned i; 2030169689Skan 2031169689Skan if (VEC_length (rtx, ve->var_expansions) == 0) 2032169689Skan return 1; 2033169689Skan 2034169689Skan start_sequence (); 2035169689Skan if (ve->op == PLUS || ve->op == MINUS) 2036169689Skan for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) 2037169689Skan { 2038169689Skan sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), 2039169689Skan var, sum); 2040169689Skan } 2041169689Skan else if (ve->op == MULT) 2042169689Skan for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) 2043169689Skan { 2044169689Skan sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), 2045169689Skan var, sum); 2046169689Skan } 2047169689Skan 2048169689Skan expr = force_operand (sum, ve->reg); 2049169689Skan if (expr != ve->reg) 2050169689Skan emit_move_insn (ve->reg, expr); 2051169689Skan seq = get_insns (); 2052169689Skan end_sequence (); 2053169689Skan 2054169689Skan insn = BB_HEAD (place); 2055169689Skan while (!NOTE_INSN_BASIC_BLOCK_P (insn)) 2056169689Skan insn = NEXT_INSN (insn); 2057169689Skan 2058169689Skan emit_insn_after (seq, insn); 2059169689Skan 2060169689Skan /* Continue traversing the hash table. */ 2061169689Skan return 1; 2062169689Skan} 2063169689Skan 2064169689Skan/* Apply loop optimizations in loop copies using the 2065169689Skan data which gathered during the unrolling. Structure 2066169689Skan OPT_INFO record that data. 2067169689Skan 2068169689Skan UNROLLING is true if we unrolled (not peeled) the loop. 2069169689Skan REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of 2070169689Skan the loop (as it should happen in complete unrolling, but not in ordinary 2071169689Skan peeling of the loop). */ 2072169689Skan 2073169689Skanstatic void 2074169689Skanapply_opt_in_copies (struct opt_info *opt_info, 2075169689Skan unsigned n_copies, bool unrolling, 2076169689Skan bool rewrite_original_loop) 2077169689Skan{ 2078169689Skan unsigned i, delta; 2079169689Skan basic_block bb, orig_bb; 2080169689Skan rtx insn, orig_insn, next; 2081169689Skan struct iv_to_split ivts_templ, *ivts; 2082169689Skan struct var_to_expand ve_templ, *ves; 2083169689Skan 2084169689Skan /* Sanity check -- we need to put initialization in the original loop 2085169689Skan body. */ 2086169689Skan gcc_assert (!unrolling || rewrite_original_loop); 2087169689Skan 2088169689Skan /* Allocate the basic variables (i0). */ 2089169689Skan if (opt_info->insns_to_split) 2090169689Skan htab_traverse (opt_info->insns_to_split, allocate_basic_variable, NULL); 2091169689Skan 2092169689Skan for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++) 2093169689Skan { 2094169689Skan bb = BASIC_BLOCK (i); 2095169689Skan orig_bb = get_bb_original (bb); 2096169689Skan 2097169689Skan /* bb->aux holds position in copy sequence initialized by 2098169689Skan duplicate_loop_to_header_edge. */ 2099169689Skan delta = determine_split_iv_delta ((size_t)bb->aux, n_copies, 2100169689Skan unrolling); 2101169689Skan bb->aux = 0; 2102169689Skan orig_insn = BB_HEAD (orig_bb); 2103169689Skan for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next) 2104169689Skan { 2105169689Skan next = NEXT_INSN (insn); 2106169689Skan if (!INSN_P (insn)) 2107169689Skan continue; 2108169689Skan 2109169689Skan while (!INSN_P (orig_insn)) 2110169689Skan orig_insn = NEXT_INSN (orig_insn); 2111169689Skan 2112169689Skan ivts_templ.insn = orig_insn; 2113169689Skan ve_templ.insn = orig_insn; 2114169689Skan 2115169689Skan /* Apply splitting iv optimization. */ 2116169689Skan if (opt_info->insns_to_split) 2117169689Skan { 2118169689Skan ivts = htab_find (opt_info->insns_to_split, &ivts_templ); 2119169689Skan 2120169689Skan if (ivts) 2121169689Skan { 2122169689Skan gcc_assert (GET_CODE (PATTERN (insn)) 2123169689Skan == GET_CODE (PATTERN (orig_insn))); 2124169689Skan 2125169689Skan if (!delta) 2126169689Skan insert_base_initialization (ivts, insn); 2127169689Skan split_iv (ivts, insn, delta); 2128169689Skan } 2129169689Skan } 2130169689Skan /* Apply variable expansion optimization. */ 2131169689Skan if (unrolling && opt_info->insns_with_var_to_expand) 2132169689Skan { 2133169689Skan ves = htab_find (opt_info->insns_with_var_to_expand, &ve_templ); 2134169689Skan if (ves) 2135169689Skan { 2136169689Skan gcc_assert (GET_CODE (PATTERN (insn)) 2137169689Skan == GET_CODE (PATTERN (orig_insn))); 2138169689Skan expand_var_during_unrolling (ves, insn); 2139169689Skan } 2140169689Skan } 2141169689Skan orig_insn = NEXT_INSN (orig_insn); 2142169689Skan } 2143132718Skan } 2144132718Skan 2145169689Skan if (!rewrite_original_loop) 2146169689Skan return; 2147169689Skan 2148169689Skan /* Initialize the variable expansions in the loop preheader 2149169689Skan and take care of combining them at the loop exit. */ 2150169689Skan if (opt_info->insns_with_var_to_expand) 2151132718Skan { 2152169689Skan htab_traverse (opt_info->insns_with_var_to_expand, 2153169689Skan insert_var_expansion_initialization, 2154169689Skan opt_info->loop_preheader); 2155169689Skan htab_traverse (opt_info->insns_with_var_to_expand, 2156169689Skan combine_var_copies_in_loop_exit, 2157169689Skan opt_info->loop_exit); 2158169689Skan } 2159169689Skan 2160169689Skan /* Rewrite also the original loop body. Find them as originals of the blocks 2161169689Skan in the last copied iteration, i.e. those that have 2162169689Skan get_bb_copy (get_bb_original (bb)) == bb. */ 2163169689Skan for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++) 2164169689Skan { 2165169689Skan bb = BASIC_BLOCK (i); 2166169689Skan orig_bb = get_bb_original (bb); 2167169689Skan if (get_bb_copy (orig_bb) != bb) 2168169689Skan continue; 2169169689Skan 2170169689Skan delta = determine_split_iv_delta (0, n_copies, unrolling); 2171169689Skan for (orig_insn = BB_HEAD (orig_bb); 2172169689Skan orig_insn != NEXT_INSN (BB_END (bb)); 2173169689Skan orig_insn = next) 2174169689Skan { 2175169689Skan next = NEXT_INSN (orig_insn); 2176169689Skan 2177169689Skan if (!INSN_P (orig_insn)) 2178169689Skan continue; 2179169689Skan 2180169689Skan ivts_templ.insn = orig_insn; 2181169689Skan if (opt_info->insns_to_split) 2182169689Skan { 2183169689Skan ivts = htab_find (opt_info->insns_to_split, &ivts_templ); 2184169689Skan if (ivts) 2185169689Skan { 2186169689Skan if (!delta) 2187169689Skan insert_base_initialization (ivts, orig_insn); 2188169689Skan split_iv (ivts, orig_insn, delta); 2189169689Skan continue; 2190169689Skan } 2191169689Skan } 2192169689Skan 2193169689Skan } 2194169689Skan } 2195169689Skan} 2196132718Skan 2197169689Skan/* Release the data structures used for the variable expansion 2198169689Skan optimization. Callbacks for htab_traverse. */ 2199132718Skan 2200169689Skanstatic int 2201169689Skanrelease_var_copies (void **slot, void *data ATTRIBUTE_UNUSED) 2202169689Skan{ 2203169689Skan struct var_to_expand *ve = *slot; 2204169689Skan 2205169689Skan VEC_free (rtx, heap, ve->var_expansions); 2206169689Skan 2207169689Skan /* Continue traversing the hash table. */ 2208169689Skan return 1; 2209169689Skan} 2210169689Skan 2211169689Skan/* Release OPT_INFO. */ 2212169689Skan 2213169689Skanstatic void 2214169689Skanfree_opt_info (struct opt_info *opt_info) 2215169689Skan{ 2216169689Skan if (opt_info->insns_to_split) 2217169689Skan htab_delete (opt_info->insns_to_split); 2218169689Skan if (opt_info->insns_with_var_to_expand) 2219169689Skan { 2220169689Skan htab_traverse (opt_info->insns_with_var_to_expand, 2221169689Skan release_var_copies, NULL); 2222169689Skan htab_delete (opt_info->insns_with_var_to_expand); 2223132718Skan } 2224169689Skan free (opt_info); 2225132718Skan} 2226