1/* Loop unrolling. 2 Copyright (C) 2002-2015 Free Software Foundation, Inc. 3 4This file is part of GCC. 5 6GCC is free software; you can redistribute it and/or modify it under 7the terms of the GNU General Public License as published by the Free 8Software Foundation; either version 3, or (at your option) any later 9version. 10 11GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12WARRANTY; without even the implied warranty of MERCHANTABILITY or 13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14for more details. 15 16You should have received a copy of the GNU General Public License 17along with GCC; see the file COPYING3. If not see 18<http://www.gnu.org/licenses/>. */ 19 20#include "config.h" 21#include "system.h" 22#include "coretypes.h" 23#include "tm.h" 24#include "rtl.h" 25#include "hash-set.h" 26#include "machmode.h" 27#include "vec.h" 28#include "double-int.h" 29#include "input.h" 30#include "alias.h" 31#include "symtab.h" 32#include "wide-int.h" 33#include "inchash.h" 34#include "tree.h" 35#include "hard-reg-set.h" 36#include "obstack.h" 37#include "profile.h" 38#include "predict.h" 39#include "function.h" 40#include "dominance.h" 41#include "cfg.h" 42#include "cfgrtl.h" 43#include "basic-block.h" 44#include "cfgloop.h" 45#include "params.h" 46#include "insn-codes.h" 47#include "optabs.h" 48#include "hashtab.h" 49#include "flags.h" 50#include "statistics.h" 51#include "real.h" 52#include "fixed-value.h" 53#include "insn-config.h" 54#include "expmed.h" 55#include "dojump.h" 56#include "explow.h" 57#include "calls.h" 58#include "emit-rtl.h" 59#include "varasm.h" 60#include "stmt.h" 61#include "expr.h" 62#include "hash-table.h" 63#include "recog.h" 64#include "target.h" 65#include "dumpfile.h" 66 67/* This pass performs loop unrolling. We only perform this 68 optimization on innermost loops (with single exception) because 69 the impact on performance is greatest here, and we want to avoid 70 unnecessary code size growth. The gain is caused by greater sequentiality 71 of code, better code to optimize for further passes and in some cases 72 by fewer testings of exit conditions. The main problem is code growth, 73 that impacts performance negatively due to effect of caches. 74 75 What we do: 76 77 -- unrolling of loops that roll constant times; this is almost always 78 win, as we get rid of exit condition tests. 79 -- unrolling of loops that roll number of times that we can compute 80 in runtime; we also get rid of exit condition tests here, but there 81 is the extra expense for calculating the number of iterations 82 -- simple unrolling of remaining loops; this is performed only if we 83 are asked to, as the gain is questionable in this case and often 84 it may even slow down the code 85 For more detailed descriptions of each of those, see comments at 86 appropriate function below. 87 88 There is a lot of parameters (defined and described in params.def) that 89 control how much we unroll. 90 91 ??? A great problem is that we don't have a good way how to determine 92 how many times we should unroll the loop; the experiments I have made 93 showed that this choice may affect performance in order of several %. 94 */ 95 96/* Information about induction variables to split. */ 97 98struct iv_to_split 99{ 100 rtx_insn *insn; /* The insn in that the induction variable occurs. */ 101 rtx orig_var; /* The variable (register) for the IV before split. */ 102 rtx base_var; /* The variable on that the values in the further 103 iterations are based. */ 104 rtx step; /* Step of the induction variable. */ 105 struct iv_to_split *next; /* Next entry in walking order. */ 106}; 107 108/* Information about accumulators to expand. */ 109 110struct var_to_expand 111{ 112 rtx_insn *insn; /* The insn in that the variable expansion occurs. */ 113 rtx reg; /* The accumulator which is expanded. */ 114 vec<rtx> var_expansions; /* The copies of the accumulator which is expanded. */ 115 struct var_to_expand *next; /* Next entry in walking order. */ 116 enum rtx_code op; /* The type of the accumulation - addition, subtraction 117 or multiplication. */ 118 int expansion_count; /* Count the number of expansions generated so far. */ 119 int reuse_expansion; /* The expansion we intend to reuse to expand 120 the accumulator. If REUSE_EXPANSION is 0 reuse 121 the original accumulator. Else use 122 var_expansions[REUSE_EXPANSION - 1]. */ 123}; 124 125/* Hashtable helper for iv_to_split. */ 126 127struct iv_split_hasher : typed_free_remove <iv_to_split> 128{ 129 typedef iv_to_split value_type; 130 typedef iv_to_split compare_type; 131 static inline hashval_t hash (const value_type *); 132 static inline bool equal (const value_type *, const compare_type *); 133}; 134 135 136/* A hash function for information about insns to split. */ 137 138inline hashval_t 139iv_split_hasher::hash (const value_type *ivts) 140{ 141 return (hashval_t) INSN_UID (ivts->insn); 142} 143 144/* An equality functions for information about insns to split. */ 145 146inline bool 147iv_split_hasher::equal (const value_type *i1, const compare_type *i2) 148{ 149 return i1->insn == i2->insn; 150} 151 152/* Hashtable helper for iv_to_split. */ 153 154struct var_expand_hasher : typed_free_remove <var_to_expand> 155{ 156 typedef var_to_expand value_type; 157 typedef var_to_expand compare_type; 158 static inline hashval_t hash (const value_type *); 159 static inline bool equal (const value_type *, const compare_type *); 160}; 161 162/* Return a hash for VES. */ 163 164inline hashval_t 165var_expand_hasher::hash (const value_type *ves) 166{ 167 return (hashval_t) INSN_UID (ves->insn); 168} 169 170/* Return true if I1 and I2 refer to the same instruction. */ 171 172inline bool 173var_expand_hasher::equal (const value_type *i1, const compare_type *i2) 174{ 175 return i1->insn == i2->insn; 176} 177 178/* Information about optimization applied in 179 the unrolled loop. */ 180 181struct opt_info 182{ 183 hash_table<iv_split_hasher> *insns_to_split; /* A hashtable of insns to 184 split. */ 185 struct iv_to_split *iv_to_split_head; /* The first iv to split. */ 186 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */ 187 hash_table<var_expand_hasher> *insns_with_var_to_expand; /* A hashtable of 188 insns with accumulators to expand. */ 189 struct var_to_expand *var_to_expand_head; /* The first var to expand. */ 190 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */ 191 unsigned first_new_block; /* The first basic block that was 192 duplicated. */ 193 basic_block loop_exit; /* The loop exit basic block. */ 194 basic_block loop_preheader; /* The loop preheader basic block. */ 195}; 196 197static void decide_unroll_stupid (struct loop *, int); 198static void decide_unroll_constant_iterations (struct loop *, int); 199static void decide_unroll_runtime_iterations (struct loop *, int); 200static void unroll_loop_stupid (struct loop *); 201static void decide_unrolling (int); 202static void unroll_loop_constant_iterations (struct loop *); 203static void unroll_loop_runtime_iterations (struct loop *); 204static struct opt_info *analyze_insns_in_loop (struct loop *); 205static void opt_info_start_duplication (struct opt_info *); 206static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool); 207static void free_opt_info (struct opt_info *); 208static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx_insn *); 209static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *); 210static struct iv_to_split *analyze_iv_to_split_insn (rtx_insn *); 211static void expand_var_during_unrolling (struct var_to_expand *, rtx_insn *); 212static void insert_var_expansion_initialization (struct var_to_expand *, 213 basic_block); 214static void combine_var_copies_in_loop_exit (struct var_to_expand *, 215 basic_block); 216static rtx get_expansion (struct var_to_expand *); 217 218/* Emit a message summarizing the unroll that will be 219 performed for LOOP, along with the loop's location LOCUS, if 220 appropriate given the dump or -fopt-info settings. */ 221 222static void 223report_unroll (struct loop *loop, location_t locus) 224{ 225 int report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_RTL | TDF_DETAILS; 226 227 if (loop->lpt_decision.decision == LPT_NONE) 228 return; 229 230 if (!dump_enabled_p ()) 231 return; 232 233 dump_printf_loc (report_flags, locus, 234 "loop unrolled %d times", 235 loop->lpt_decision.times); 236 if (profile_info) 237 dump_printf (report_flags, 238 " (header execution count %d)", 239 (int)loop->header->count); 240 241 dump_printf (report_flags, "\n"); 242} 243 244/* Decide whether unroll loops and how much. */ 245static void 246decide_unrolling (int flags) 247{ 248 struct loop *loop; 249 250 /* Scan the loops, inner ones first. */ 251 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) 252 { 253 loop->lpt_decision.decision = LPT_NONE; 254 location_t locus = get_loop_location (loop); 255 256 if (dump_enabled_p ()) 257 dump_printf_loc (TDF_RTL, locus, 258 ";; *** Considering loop %d at BB %d for " 259 "unrolling ***\n", 260 loop->num, loop->header->index); 261 262 /* Do not peel cold areas. */ 263 if (optimize_loop_for_size_p (loop)) 264 { 265 if (dump_file) 266 fprintf (dump_file, ";; Not considering loop, cold area\n"); 267 continue; 268 } 269 270 /* Can the loop be manipulated? */ 271 if (!can_duplicate_loop_p (loop)) 272 { 273 if (dump_file) 274 fprintf (dump_file, 275 ";; Not considering loop, cannot duplicate\n"); 276 continue; 277 } 278 279 /* Skip non-innermost loops. */ 280 if (loop->inner) 281 { 282 if (dump_file) 283 fprintf (dump_file, ";; Not considering loop, is not innermost\n"); 284 continue; 285 } 286 287 loop->ninsns = num_loop_insns (loop); 288 loop->av_ninsns = average_num_loop_insns (loop); 289 290 /* Try transformations one by one in decreasing order of 291 priority. */ 292 293 decide_unroll_constant_iterations (loop, flags); 294 if (loop->lpt_decision.decision == LPT_NONE) 295 decide_unroll_runtime_iterations (loop, flags); 296 if (loop->lpt_decision.decision == LPT_NONE) 297 decide_unroll_stupid (loop, flags); 298 299 report_unroll (loop, locus); 300 } 301} 302 303/* Unroll LOOPS. */ 304void 305unroll_loops (int flags) 306{ 307 struct loop *loop; 308 bool changed = false; 309 310 /* Now decide rest of unrolling. */ 311 decide_unrolling (flags); 312 313 /* Scan the loops, inner ones first. */ 314 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) 315 { 316 /* And perform the appropriate transformations. */ 317 switch (loop->lpt_decision.decision) 318 { 319 case LPT_UNROLL_CONSTANT: 320 unroll_loop_constant_iterations (loop); 321 changed = true; 322 break; 323 case LPT_UNROLL_RUNTIME: 324 unroll_loop_runtime_iterations (loop); 325 changed = true; 326 break; 327 case LPT_UNROLL_STUPID: 328 unroll_loop_stupid (loop); 329 changed = true; 330 break; 331 case LPT_NONE: 332 break; 333 default: 334 gcc_unreachable (); 335 } 336 } 337 338 if (changed) 339 { 340 calculate_dominance_info (CDI_DOMINATORS); 341 fix_loop_structure (NULL); 342 } 343 344 iv_analysis_done (); 345} 346 347/* Check whether exit of the LOOP is at the end of loop body. */ 348 349static bool 350loop_exit_at_end_p (struct loop *loop) 351{ 352 struct niter_desc *desc = get_simple_loop_desc (loop); 353 rtx_insn *insn; 354 355 /* We should never have conditional in latch block. */ 356 gcc_assert (desc->in_edge->dest != loop->header); 357 358 if (desc->in_edge->dest != loop->latch) 359 return false; 360 361 /* Check that the latch is empty. */ 362 FOR_BB_INSNS (loop->latch, insn) 363 { 364 if (INSN_P (insn) && active_insn_p (insn)) 365 return false; 366 } 367 368 return true; 369} 370 371/* Decide whether to unroll LOOP iterating constant number of times 372 and how much. */ 373 374static void 375decide_unroll_constant_iterations (struct loop *loop, int flags) 376{ 377 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i; 378 struct niter_desc *desc; 379 widest_int iterations; 380 381 if (!(flags & UAP_UNROLL)) 382 { 383 /* We were not asked to, just return back silently. */ 384 return; 385 } 386 387 if (dump_file) 388 fprintf (dump_file, 389 "\n;; Considering unrolling loop with constant " 390 "number of iterations\n"); 391 392 /* nunroll = total number of copies of the original loop body in 393 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ 394 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; 395 nunroll_by_av 396 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; 397 if (nunroll > nunroll_by_av) 398 nunroll = nunroll_by_av; 399 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) 400 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); 401 402 if (targetm.loop_unroll_adjust) 403 nunroll = targetm.loop_unroll_adjust (nunroll, loop); 404 405 /* Skip big loops. */ 406 if (nunroll <= 1) 407 { 408 if (dump_file) 409 fprintf (dump_file, ";; Not considering loop, is too big\n"); 410 return; 411 } 412 413 /* Check for simple loops. */ 414 desc = get_simple_loop_desc (loop); 415 416 /* Check number of iterations. */ 417 if (!desc->simple_p || !desc->const_iter || desc->assumptions) 418 { 419 if (dump_file) 420 fprintf (dump_file, 421 ";; Unable to prove that the loop iterates constant times\n"); 422 return; 423 } 424 425 /* Check whether the loop rolls enough to consider. 426 Consult also loop bounds and profile; in the case the loop has more 427 than one exit it may well loop less than determined maximal number 428 of iterations. */ 429 if (desc->niter < 2 * nunroll 430 || ((get_estimated_loop_iterations (loop, &iterations) 431 || get_max_loop_iterations (loop, &iterations)) 432 && wi::ltu_p (iterations, 2 * nunroll))) 433 { 434 if (dump_file) 435 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); 436 return; 437 } 438 439 /* Success; now compute number of iterations to unroll. We alter 440 nunroll so that as few as possible copies of loop body are 441 necessary, while still not decreasing the number of unrollings 442 too much (at most by 1). */ 443 best_copies = 2 * nunroll + 10; 444 445 i = 2 * nunroll + 2; 446 if (i - 1 >= desc->niter) 447 i = desc->niter - 2; 448 449 for (; i >= nunroll - 1; i--) 450 { 451 unsigned exit_mod = desc->niter % (i + 1); 452 453 if (!loop_exit_at_end_p (loop)) 454 n_copies = exit_mod + i + 1; 455 else if (exit_mod != (unsigned) i 456 || desc->noloop_assumptions != NULL_RTX) 457 n_copies = exit_mod + i + 2; 458 else 459 n_copies = i + 1; 460 461 if (n_copies < best_copies) 462 { 463 best_copies = n_copies; 464 best_unroll = i; 465 } 466 } 467 468 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; 469 loop->lpt_decision.times = best_unroll; 470} 471 472/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES times. 473 The transformation does this: 474 475 for (i = 0; i < 102; i++) 476 body; 477 478 ==> (LOOP->LPT_DECISION.TIMES == 3) 479 480 i = 0; 481 body; i++; 482 body; i++; 483 while (i < 102) 484 { 485 body; i++; 486 body; i++; 487 body; i++; 488 body; i++; 489 } 490 */ 491static void 492unroll_loop_constant_iterations (struct loop *loop) 493{ 494 unsigned HOST_WIDE_INT niter; 495 unsigned exit_mod; 496 sbitmap wont_exit; 497 unsigned i; 498 edge e; 499 unsigned max_unroll = loop->lpt_decision.times; 500 struct niter_desc *desc = get_simple_loop_desc (loop); 501 bool exit_at_end = loop_exit_at_end_p (loop); 502 struct opt_info *opt_info = NULL; 503 bool ok; 504 505 niter = desc->niter; 506 507 /* Should not get here (such loop should be peeled instead). */ 508 gcc_assert (niter > max_unroll + 1); 509 510 exit_mod = niter % (max_unroll + 1); 511 512 wont_exit = sbitmap_alloc (max_unroll + 1); 513 bitmap_ones (wont_exit); 514 515 auto_vec<edge> remove_edges; 516 if (flag_split_ivs_in_unroller 517 || flag_variable_expansion_in_unroller) 518 opt_info = analyze_insns_in_loop (loop); 519 520 if (!exit_at_end) 521 { 522 /* The exit is not at the end of the loop; leave exit test 523 in the first copy, so that the loops that start with test 524 of exit condition have continuous body after unrolling. */ 525 526 if (dump_file) 527 fprintf (dump_file, ";; Condition at beginning of loop.\n"); 528 529 /* Peel exit_mod iterations. */ 530 bitmap_clear_bit (wont_exit, 0); 531 if (desc->noloop_assumptions) 532 bitmap_clear_bit (wont_exit, 1); 533 534 if (exit_mod) 535 { 536 opt_info_start_duplication (opt_info); 537 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 538 exit_mod, 539 wont_exit, desc->out_edge, 540 &remove_edges, 541 DLTHE_FLAG_UPDATE_FREQ 542 | (opt_info && exit_mod > 1 543 ? DLTHE_RECORD_COPY_NUMBER 544 : 0)); 545 gcc_assert (ok); 546 547 if (opt_info && exit_mod > 1) 548 apply_opt_in_copies (opt_info, exit_mod, false, false); 549 550 desc->noloop_assumptions = NULL_RTX; 551 desc->niter -= exit_mod; 552 loop->nb_iterations_upper_bound -= exit_mod; 553 if (loop->any_estimate 554 && wi::leu_p (exit_mod, loop->nb_iterations_estimate)) 555 loop->nb_iterations_estimate -= exit_mod; 556 else 557 loop->any_estimate = false; 558 } 559 560 bitmap_set_bit (wont_exit, 1); 561 } 562 else 563 { 564 /* Leave exit test in last copy, for the same reason as above if 565 the loop tests the condition at the end of loop body. */ 566 567 if (dump_file) 568 fprintf (dump_file, ";; Condition at end of loop.\n"); 569 570 /* We know that niter >= max_unroll + 2; so we do not need to care of 571 case when we would exit before reaching the loop. So just peel 572 exit_mod + 1 iterations. */ 573 if (exit_mod != max_unroll 574 || desc->noloop_assumptions) 575 { 576 bitmap_clear_bit (wont_exit, 0); 577 if (desc->noloop_assumptions) 578 bitmap_clear_bit (wont_exit, 1); 579 580 opt_info_start_duplication (opt_info); 581 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 582 exit_mod + 1, 583 wont_exit, desc->out_edge, 584 &remove_edges, 585 DLTHE_FLAG_UPDATE_FREQ 586 | (opt_info && exit_mod > 0 587 ? DLTHE_RECORD_COPY_NUMBER 588 : 0)); 589 gcc_assert (ok); 590 591 if (opt_info && exit_mod > 0) 592 apply_opt_in_copies (opt_info, exit_mod + 1, false, false); 593 594 desc->niter -= exit_mod + 1; 595 loop->nb_iterations_upper_bound -= exit_mod + 1; 596 if (loop->any_estimate 597 && wi::leu_p (exit_mod + 1, loop->nb_iterations_estimate)) 598 loop->nb_iterations_estimate -= exit_mod + 1; 599 else 600 loop->any_estimate = false; 601 desc->noloop_assumptions = NULL_RTX; 602 603 bitmap_set_bit (wont_exit, 0); 604 bitmap_set_bit (wont_exit, 1); 605 } 606 607 bitmap_clear_bit (wont_exit, max_unroll); 608 } 609 610 /* Now unroll the loop. */ 611 612 opt_info_start_duplication (opt_info); 613 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 614 max_unroll, 615 wont_exit, desc->out_edge, 616 &remove_edges, 617 DLTHE_FLAG_UPDATE_FREQ 618 | (opt_info 619 ? DLTHE_RECORD_COPY_NUMBER 620 : 0)); 621 gcc_assert (ok); 622 623 if (opt_info) 624 { 625 apply_opt_in_copies (opt_info, max_unroll, true, true); 626 free_opt_info (opt_info); 627 } 628 629 free (wont_exit); 630 631 if (exit_at_end) 632 { 633 basic_block exit_block = get_bb_copy (desc->in_edge->src); 634 /* Find a new in and out edge; they are in the last copy we have made. */ 635 636 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) 637 { 638 desc->out_edge = EDGE_SUCC (exit_block, 0); 639 desc->in_edge = EDGE_SUCC (exit_block, 1); 640 } 641 else 642 { 643 desc->out_edge = EDGE_SUCC (exit_block, 1); 644 desc->in_edge = EDGE_SUCC (exit_block, 0); 645 } 646 } 647 648 desc->niter /= max_unroll + 1; 649 loop->nb_iterations_upper_bound 650 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1); 651 if (loop->any_estimate) 652 loop->nb_iterations_estimate 653 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1); 654 desc->niter_expr = GEN_INT (desc->niter); 655 656 /* Remove the edges. */ 657 FOR_EACH_VEC_ELT (remove_edges, i, e) 658 remove_path (e); 659 660 if (dump_file) 661 fprintf (dump_file, 662 ";; Unrolled loop %d times, constant # of iterations %i insns\n", 663 max_unroll, num_loop_insns (loop)); 664} 665 666/* Decide whether to unroll LOOP iterating runtime computable number of times 667 and how much. */ 668static void 669decide_unroll_runtime_iterations (struct loop *loop, int flags) 670{ 671 unsigned nunroll, nunroll_by_av, i; 672 struct niter_desc *desc; 673 widest_int iterations; 674 675 if (!(flags & UAP_UNROLL)) 676 { 677 /* We were not asked to, just return back silently. */ 678 return; 679 } 680 681 if (dump_file) 682 fprintf (dump_file, 683 "\n;; Considering unrolling loop with runtime " 684 "computable number of iterations\n"); 685 686 /* nunroll = total number of copies of the original loop body in 687 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ 688 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; 689 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; 690 if (nunroll > nunroll_by_av) 691 nunroll = nunroll_by_av; 692 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) 693 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); 694 695 if (targetm.loop_unroll_adjust) 696 nunroll = targetm.loop_unroll_adjust (nunroll, loop); 697 698 /* Skip big loops. */ 699 if (nunroll <= 1) 700 { 701 if (dump_file) 702 fprintf (dump_file, ";; Not considering loop, is too big\n"); 703 return; 704 } 705 706 /* Check for simple loops. */ 707 desc = get_simple_loop_desc (loop); 708 709 /* Check simpleness. */ 710 if (!desc->simple_p || desc->assumptions) 711 { 712 if (dump_file) 713 fprintf (dump_file, 714 ";; Unable to prove that the number of iterations " 715 "can be counted in runtime\n"); 716 return; 717 } 718 719 if (desc->const_iter) 720 { 721 if (dump_file) 722 fprintf (dump_file, ";; Loop iterates constant times\n"); 723 return; 724 } 725 726 /* Check whether the loop rolls. */ 727 if ((get_estimated_loop_iterations (loop, &iterations) 728 || get_max_loop_iterations (loop, &iterations)) 729 && wi::ltu_p (iterations, 2 * nunroll)) 730 { 731 if (dump_file) 732 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); 733 return; 734 } 735 736 /* Success; now force nunroll to be power of 2, as we are unable to 737 cope with overflows in computation of number of iterations. */ 738 for (i = 1; 2 * i <= nunroll; i *= 2) 739 continue; 740 741 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME; 742 loop->lpt_decision.times = i - 1; 743} 744 745/* Splits edge E and inserts the sequence of instructions INSNS on it, and 746 returns the newly created block. If INSNS is NULL_RTX, nothing is changed 747 and NULL is returned instead. */ 748 749basic_block 750split_edge_and_insert (edge e, rtx_insn *insns) 751{ 752 basic_block bb; 753 754 if (!insns) 755 return NULL; 756 bb = split_edge (e); 757 emit_insn_after (insns, BB_END (bb)); 758 759 /* ??? We used to assume that INSNS can contain control flow insns, and 760 that we had to try to find sub basic blocks in BB to maintain a valid 761 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB 762 and call break_superblocks when going out of cfglayout mode. But it 763 turns out that this never happens; and that if it does ever happen, 764 the verify_flow_info at the end of the RTL loop passes would fail. 765 766 There are two reasons why we expected we could have control flow insns 767 in INSNS. The first is when a comparison has to be done in parts, and 768 the second is when the number of iterations is computed for loops with 769 the number of iterations known at runtime. In both cases, test cases 770 to get control flow in INSNS appear to be impossible to construct: 771 772 * If do_compare_rtx_and_jump needs several branches to do comparison 773 in a mode that needs comparison by parts, we cannot analyze the 774 number of iterations of the loop, and we never get to unrolling it. 775 776 * The code in expand_divmod that was suspected to cause creation of 777 branching code seems to be only accessed for signed division. The 778 divisions used by # of iterations analysis are always unsigned. 779 Problems might arise on architectures that emits branching code 780 for some operations that may appear in the unroller (especially 781 for division), but we have no such architectures. 782 783 Considering all this, it was decided that we should for now assume 784 that INSNS can in theory contain control flow insns, but in practice 785 it never does. So we don't handle the theoretical case, and should 786 a real failure ever show up, we have a pretty good clue for how to 787 fix it. */ 788 789 return bb; 790} 791 792/* Prepare a sequence comparing OP0 with OP1 using COMP and jumping to LABEL if 793 true, with probability PROB. If CINSN is not NULL, it is the insn to copy 794 in order to create a jump. */ 795 796static rtx_insn * 797compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, rtx label, int prob, 798 rtx_insn *cinsn) 799{ 800 rtx_insn *seq, *jump; 801 rtx cond; 802 machine_mode mode; 803 804 mode = GET_MODE (op0); 805 if (mode == VOIDmode) 806 mode = GET_MODE (op1); 807 808 start_sequence (); 809 if (GET_MODE_CLASS (mode) == MODE_CC) 810 { 811 /* A hack -- there seems to be no easy generic way how to make a 812 conditional jump from a ccmode comparison. */ 813 gcc_assert (cinsn); 814 cond = XEXP (SET_SRC (pc_set (cinsn)), 0); 815 gcc_assert (GET_CODE (cond) == comp); 816 gcc_assert (rtx_equal_p (op0, XEXP (cond, 0))); 817 gcc_assert (rtx_equal_p (op1, XEXP (cond, 1))); 818 emit_jump_insn (copy_insn (PATTERN (cinsn))); 819 jump = get_last_insn (); 820 gcc_assert (JUMP_P (jump)); 821 JUMP_LABEL (jump) = JUMP_LABEL (cinsn); 822 LABEL_NUSES (JUMP_LABEL (jump))++; 823 redirect_jump (jump, label, 0); 824 } 825 else 826 { 827 gcc_assert (!cinsn); 828 829 op0 = force_operand (op0, NULL_RTX); 830 op1 = force_operand (op1, NULL_RTX); 831 do_compare_rtx_and_jump (op0, op1, comp, 0, 832 mode, NULL_RTX, NULL_RTX, label, -1); 833 jump = get_last_insn (); 834 gcc_assert (JUMP_P (jump)); 835 JUMP_LABEL (jump) = label; 836 LABEL_NUSES (label)++; 837 } 838 add_int_reg_note (jump, REG_BR_PROB, prob); 839 840 seq = get_insns (); 841 end_sequence (); 842 843 return seq; 844} 845 846/* Unroll LOOP for which we are able to count number of iterations in runtime 847 LOOP->LPT_DECISION.TIMES times. The transformation does this (with some 848 extra care for case n < 0): 849 850 for (i = 0; i < n; i++) 851 body; 852 853 ==> (LOOP->LPT_DECISION.TIMES == 3) 854 855 i = 0; 856 mod = n % 4; 857 858 switch (mod) 859 { 860 case 3: 861 body; i++; 862 case 2: 863 body; i++; 864 case 1: 865 body; i++; 866 case 0: ; 867 } 868 869 while (i < n) 870 { 871 body; i++; 872 body; i++; 873 body; i++; 874 body; i++; 875 } 876 */ 877static void 878unroll_loop_runtime_iterations (struct loop *loop) 879{ 880 rtx old_niter, niter, tmp; 881 rtx_insn *init_code, *branch_code; 882 unsigned i, j, p; 883 basic_block preheader, *body, swtch, ezc_swtch; 884 sbitmap wont_exit; 885 int may_exit_copy; 886 unsigned n_peel; 887 edge e; 888 bool extra_zero_check, last_may_exit; 889 unsigned max_unroll = loop->lpt_decision.times; 890 struct niter_desc *desc = get_simple_loop_desc (loop); 891 bool exit_at_end = loop_exit_at_end_p (loop); 892 struct opt_info *opt_info = NULL; 893 bool ok; 894 895 if (flag_split_ivs_in_unroller 896 || flag_variable_expansion_in_unroller) 897 opt_info = analyze_insns_in_loop (loop); 898 899 /* Remember blocks whose dominators will have to be updated. */ 900 auto_vec<basic_block> dom_bbs; 901 902 body = get_loop_body (loop); 903 for (i = 0; i < loop->num_nodes; i++) 904 { 905 vec<basic_block> ldom; 906 basic_block bb; 907 908 ldom = get_dominated_by (CDI_DOMINATORS, body[i]); 909 FOR_EACH_VEC_ELT (ldom, j, bb) 910 if (!flow_bb_inside_loop_p (loop, bb)) 911 dom_bbs.safe_push (bb); 912 913 ldom.release (); 914 } 915 free (body); 916 917 if (!exit_at_end) 918 { 919 /* Leave exit in first copy (for explanation why see comment in 920 unroll_loop_constant_iterations). */ 921 may_exit_copy = 0; 922 n_peel = max_unroll - 1; 923 extra_zero_check = true; 924 last_may_exit = false; 925 } 926 else 927 { 928 /* Leave exit in last copy (for explanation why see comment in 929 unroll_loop_constant_iterations). */ 930 may_exit_copy = max_unroll; 931 n_peel = max_unroll; 932 extra_zero_check = false; 933 last_may_exit = true; 934 } 935 936 /* Get expression for number of iterations. */ 937 start_sequence (); 938 old_niter = niter = gen_reg_rtx (desc->mode); 939 tmp = force_operand (copy_rtx (desc->niter_expr), niter); 940 if (tmp != niter) 941 emit_move_insn (niter, tmp); 942 943 /* Count modulo by ANDing it with max_unroll; we use the fact that 944 the number of unrollings is a power of two, and thus this is correct 945 even if there is overflow in the computation. */ 946 niter = expand_simple_binop (desc->mode, AND, 947 niter, gen_int_mode (max_unroll, desc->mode), 948 NULL_RTX, 0, OPTAB_LIB_WIDEN); 949 950 init_code = get_insns (); 951 end_sequence (); 952 unshare_all_rtl_in_chain (init_code); 953 954 /* Precondition the loop. */ 955 split_edge_and_insert (loop_preheader_edge (loop), init_code); 956 957 auto_vec<edge> remove_edges; 958 959 wont_exit = sbitmap_alloc (max_unroll + 2); 960 961 /* Peel the first copy of loop body (almost always we must leave exit test 962 here; the only exception is when we have extra zero check and the number 963 of iterations is reliable. Also record the place of (possible) extra 964 zero check. */ 965 bitmap_clear (wont_exit); 966 if (extra_zero_check 967 && !desc->noloop_assumptions) 968 bitmap_set_bit (wont_exit, 1); 969 ezc_swtch = loop_preheader_edge (loop)->src; 970 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 971 1, wont_exit, desc->out_edge, 972 &remove_edges, 973 DLTHE_FLAG_UPDATE_FREQ); 974 gcc_assert (ok); 975 976 /* Record the place where switch will be built for preconditioning. */ 977 swtch = split_edge (loop_preheader_edge (loop)); 978 979 for (i = 0; i < n_peel; i++) 980 { 981 /* Peel the copy. */ 982 bitmap_clear (wont_exit); 983 if (i != n_peel - 1 || !last_may_exit) 984 bitmap_set_bit (wont_exit, 1); 985 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 986 1, wont_exit, desc->out_edge, 987 &remove_edges, 988 DLTHE_FLAG_UPDATE_FREQ); 989 gcc_assert (ok); 990 991 /* Create item for switch. */ 992 j = n_peel - i - (extra_zero_check ? 0 : 1); 993 p = REG_BR_PROB_BASE / (i + 2); 994 995 preheader = split_edge (loop_preheader_edge (loop)); 996 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ, 997 block_label (preheader), p, 998 NULL); 999 1000 /* We rely on the fact that the compare and jump cannot be optimized out, 1001 and hence the cfg we create is correct. */ 1002 gcc_assert (branch_code != NULL_RTX); 1003 1004 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code); 1005 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); 1006 single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p; 1007 e = make_edge (swtch, preheader, 1008 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); 1009 e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p); 1010 e->probability = p; 1011 } 1012 1013 if (extra_zero_check) 1014 { 1015 /* Add branch for zero iterations. */ 1016 p = REG_BR_PROB_BASE / (max_unroll + 1); 1017 swtch = ezc_swtch; 1018 preheader = split_edge (loop_preheader_edge (loop)); 1019 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ, 1020 block_label (preheader), p, 1021 NULL); 1022 gcc_assert (branch_code != NULL_RTX); 1023 1024 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code); 1025 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); 1026 single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p; 1027 e = make_edge (swtch, preheader, 1028 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); 1029 e->count = RDIV (preheader->count * REG_BR_PROB_BASE, p); 1030 e->probability = p; 1031 } 1032 1033 /* Recount dominators for outer blocks. */ 1034 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false); 1035 1036 /* And unroll loop. */ 1037 1038 bitmap_ones (wont_exit); 1039 bitmap_clear_bit (wont_exit, may_exit_copy); 1040 opt_info_start_duplication (opt_info); 1041 1042 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 1043 max_unroll, 1044 wont_exit, desc->out_edge, 1045 &remove_edges, 1046 DLTHE_FLAG_UPDATE_FREQ 1047 | (opt_info 1048 ? DLTHE_RECORD_COPY_NUMBER 1049 : 0)); 1050 gcc_assert (ok); 1051 1052 if (opt_info) 1053 { 1054 apply_opt_in_copies (opt_info, max_unroll, true, true); 1055 free_opt_info (opt_info); 1056 } 1057 1058 free (wont_exit); 1059 1060 if (exit_at_end) 1061 { 1062 basic_block exit_block = get_bb_copy (desc->in_edge->src); 1063 /* Find a new in and out edge; they are in the last copy we have 1064 made. */ 1065 1066 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) 1067 { 1068 desc->out_edge = EDGE_SUCC (exit_block, 0); 1069 desc->in_edge = EDGE_SUCC (exit_block, 1); 1070 } 1071 else 1072 { 1073 desc->out_edge = EDGE_SUCC (exit_block, 1); 1074 desc->in_edge = EDGE_SUCC (exit_block, 0); 1075 } 1076 } 1077 1078 /* Remove the edges. */ 1079 FOR_EACH_VEC_ELT (remove_edges, i, e) 1080 remove_path (e); 1081 1082 /* We must be careful when updating the number of iterations due to 1083 preconditioning and the fact that the value must be valid at entry 1084 of the loop. After passing through the above code, we see that 1085 the correct new number of iterations is this: */ 1086 gcc_assert (!desc->const_iter); 1087 desc->niter_expr = 1088 simplify_gen_binary (UDIV, desc->mode, old_niter, 1089 gen_int_mode (max_unroll + 1, desc->mode)); 1090 loop->nb_iterations_upper_bound 1091 = wi::udiv_trunc (loop->nb_iterations_upper_bound, max_unroll + 1); 1092 if (loop->any_estimate) 1093 loop->nb_iterations_estimate 1094 = wi::udiv_trunc (loop->nb_iterations_estimate, max_unroll + 1); 1095 if (exit_at_end) 1096 { 1097 desc->niter_expr = 1098 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx); 1099 desc->noloop_assumptions = NULL_RTX; 1100 --loop->nb_iterations_upper_bound; 1101 if (loop->any_estimate 1102 && loop->nb_iterations_estimate != 0) 1103 --loop->nb_iterations_estimate; 1104 else 1105 loop->any_estimate = false; 1106 } 1107 1108 if (dump_file) 1109 fprintf (dump_file, 1110 ";; Unrolled loop %d times, counting # of iterations " 1111 "in runtime, %i insns\n", 1112 max_unroll, num_loop_insns (loop)); 1113} 1114 1115/* Decide whether to unroll LOOP stupidly and how much. */ 1116static void 1117decide_unroll_stupid (struct loop *loop, int flags) 1118{ 1119 unsigned nunroll, nunroll_by_av, i; 1120 struct niter_desc *desc; 1121 widest_int iterations; 1122 1123 if (!(flags & UAP_UNROLL_ALL)) 1124 { 1125 /* We were not asked to, just return back silently. */ 1126 return; 1127 } 1128 1129 if (dump_file) 1130 fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n"); 1131 1132 /* nunroll = total number of copies of the original loop body in 1133 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ 1134 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; 1135 nunroll_by_av 1136 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; 1137 if (nunroll > nunroll_by_av) 1138 nunroll = nunroll_by_av; 1139 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) 1140 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); 1141 1142 if (targetm.loop_unroll_adjust) 1143 nunroll = targetm.loop_unroll_adjust (nunroll, loop); 1144 1145 /* Skip big loops. */ 1146 if (nunroll <= 1) 1147 { 1148 if (dump_file) 1149 fprintf (dump_file, ";; Not considering loop, is too big\n"); 1150 return; 1151 } 1152 1153 /* Check for simple loops. */ 1154 desc = get_simple_loop_desc (loop); 1155 1156 /* Check simpleness. */ 1157 if (desc->simple_p && !desc->assumptions) 1158 { 1159 if (dump_file) 1160 fprintf (dump_file, ";; The loop is simple\n"); 1161 return; 1162 } 1163 1164 /* Do not unroll loops with branches inside -- it increases number 1165 of mispredicts. 1166 TODO: this heuristic needs tunning; call inside the loop body 1167 is also relatively good reason to not unroll. */ 1168 if (num_loop_branches (loop) > 1) 1169 { 1170 if (dump_file) 1171 fprintf (dump_file, ";; Not unrolling, contains branches\n"); 1172 return; 1173 } 1174 1175 /* Check whether the loop rolls. */ 1176 if ((get_estimated_loop_iterations (loop, &iterations) 1177 || get_max_loop_iterations (loop, &iterations)) 1178 && wi::ltu_p (iterations, 2 * nunroll)) 1179 { 1180 if (dump_file) 1181 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); 1182 return; 1183 } 1184 1185 /* Success. Now force nunroll to be power of 2, as it seems that this 1186 improves results (partially because of better alignments, partially 1187 because of some dark magic). */ 1188 for (i = 1; 2 * i <= nunroll; i *= 2) 1189 continue; 1190 1191 loop->lpt_decision.decision = LPT_UNROLL_STUPID; 1192 loop->lpt_decision.times = i - 1; 1193} 1194 1195/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation does this: 1196 1197 while (cond) 1198 body; 1199 1200 ==> (LOOP->LPT_DECISION.TIMES == 3) 1201 1202 while (cond) 1203 { 1204 body; 1205 if (!cond) break; 1206 body; 1207 if (!cond) break; 1208 body; 1209 if (!cond) break; 1210 body; 1211 } 1212 */ 1213static void 1214unroll_loop_stupid (struct loop *loop) 1215{ 1216 sbitmap wont_exit; 1217 unsigned nunroll = loop->lpt_decision.times; 1218 struct niter_desc *desc = get_simple_loop_desc (loop); 1219 struct opt_info *opt_info = NULL; 1220 bool ok; 1221 1222 if (flag_split_ivs_in_unroller 1223 || flag_variable_expansion_in_unroller) 1224 opt_info = analyze_insns_in_loop (loop); 1225 1226 1227 wont_exit = sbitmap_alloc (nunroll + 1); 1228 bitmap_clear (wont_exit); 1229 opt_info_start_duplication (opt_info); 1230 1231 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 1232 nunroll, wont_exit, 1233 NULL, NULL, 1234 DLTHE_FLAG_UPDATE_FREQ 1235 | (opt_info 1236 ? DLTHE_RECORD_COPY_NUMBER 1237 : 0)); 1238 gcc_assert (ok); 1239 1240 if (opt_info) 1241 { 1242 apply_opt_in_copies (opt_info, nunroll, true, true); 1243 free_opt_info (opt_info); 1244 } 1245 1246 free (wont_exit); 1247 1248 if (desc->simple_p) 1249 { 1250 /* We indeed may get here provided that there are nontrivial assumptions 1251 for a loop to be really simple. We could update the counts, but the 1252 problem is that we are unable to decide which exit will be taken 1253 (not really true in case the number of iterations is constant, 1254 but no one will do anything with this information, so we do not 1255 worry about it). */ 1256 desc->simple_p = false; 1257 } 1258 1259 if (dump_file) 1260 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n", 1261 nunroll, num_loop_insns (loop)); 1262} 1263 1264/* Returns true if REG is referenced in one nondebug insn in LOOP. 1265 Set *DEBUG_USES to the number of debug insns that reference the 1266 variable. */ 1267 1268static bool 1269referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg, 1270 int *debug_uses) 1271{ 1272 basic_block *body, bb; 1273 unsigned i; 1274 int count_ref = 0; 1275 rtx_insn *insn; 1276 1277 body = get_loop_body (loop); 1278 for (i = 0; i < loop->num_nodes; i++) 1279 { 1280 bb = body[i]; 1281 1282 FOR_BB_INSNS (bb, insn) 1283 if (!rtx_referenced_p (reg, insn)) 1284 continue; 1285 else if (DEBUG_INSN_P (insn)) 1286 ++*debug_uses; 1287 else if (++count_ref > 1) 1288 break; 1289 } 1290 free (body); 1291 return (count_ref == 1); 1292} 1293 1294/* Reset the DEBUG_USES debug insns in LOOP that reference REG. */ 1295 1296static void 1297reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses) 1298{ 1299 basic_block *body, bb; 1300 unsigned i; 1301 rtx_insn *insn; 1302 1303 body = get_loop_body (loop); 1304 for (i = 0; debug_uses && i < loop->num_nodes; i++) 1305 { 1306 bb = body[i]; 1307 1308 FOR_BB_INSNS (bb, insn) 1309 if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn)) 1310 continue; 1311 else 1312 { 1313 validate_change (insn, &INSN_VAR_LOCATION_LOC (insn), 1314 gen_rtx_UNKNOWN_VAR_LOC (), 0); 1315 if (!--debug_uses) 1316 break; 1317 } 1318 } 1319 free (body); 1320} 1321 1322/* Determine whether INSN contains an accumulator 1323 which can be expanded into separate copies, 1324 one for each copy of the LOOP body. 1325 1326 for (i = 0 ; i < n; i++) 1327 sum += a[i]; 1328 1329 ==> 1330 1331 sum += a[i] 1332 .... 1333 i = i+1; 1334 sum1 += a[i] 1335 .... 1336 i = i+1 1337 sum2 += a[i]; 1338 .... 1339 1340 Return NULL if INSN contains no opportunity for expansion of accumulator. 1341 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant 1342 information and return a pointer to it. 1343*/ 1344 1345static struct var_to_expand * 1346analyze_insn_to_expand_var (struct loop *loop, rtx_insn *insn) 1347{ 1348 rtx set, dest, src; 1349 struct var_to_expand *ves; 1350 unsigned accum_pos; 1351 enum rtx_code code; 1352 int debug_uses = 0; 1353 1354 set = single_set (insn); 1355 if (!set) 1356 return NULL; 1357 1358 dest = SET_DEST (set); 1359 src = SET_SRC (set); 1360 code = GET_CODE (src); 1361 1362 if (code != PLUS && code != MINUS && code != MULT && code != FMA) 1363 return NULL; 1364 1365 if (FLOAT_MODE_P (GET_MODE (dest))) 1366 { 1367 if (!flag_associative_math) 1368 return NULL; 1369 /* In the case of FMA, we're also changing the rounding. */ 1370 if (code == FMA && !flag_unsafe_math_optimizations) 1371 return NULL; 1372 } 1373 1374 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn 1375 in MD. But if there is no optab to generate the insn, we can not 1376 perform the variable expansion. This can happen if an MD provides 1377 an insn but not a named pattern to generate it, for example to avoid 1378 producing code that needs additional mode switches like for x87/mmx. 1379 1380 So we check have_insn_for which looks for an optab for the operation 1381 in SRC. If it doesn't exist, we can't perform the expansion even 1382 though INSN is valid. */ 1383 if (!have_insn_for (code, GET_MODE (src))) 1384 return NULL; 1385 1386 if (!REG_P (dest) 1387 && !(GET_CODE (dest) == SUBREG 1388 && REG_P (SUBREG_REG (dest)))) 1389 return NULL; 1390 1391 /* Find the accumulator use within the operation. */ 1392 if (code == FMA) 1393 { 1394 /* We only support accumulation via FMA in the ADD position. */ 1395 if (!rtx_equal_p (dest, XEXP (src, 2))) 1396 return NULL; 1397 accum_pos = 2; 1398 } 1399 else if (rtx_equal_p (dest, XEXP (src, 0))) 1400 accum_pos = 0; 1401 else if (rtx_equal_p (dest, XEXP (src, 1))) 1402 { 1403 /* The method of expansion that we are using; which includes the 1404 initialization of the expansions with zero and the summation of 1405 the expansions at the end of the computation will yield wrong 1406 results for (x = something - x) thus avoid using it in that case. */ 1407 if (code == MINUS) 1408 return NULL; 1409 accum_pos = 1; 1410 } 1411 else 1412 return NULL; 1413 1414 /* It must not otherwise be used. */ 1415 if (code == FMA) 1416 { 1417 if (rtx_referenced_p (dest, XEXP (src, 0)) 1418 || rtx_referenced_p (dest, XEXP (src, 1))) 1419 return NULL; 1420 } 1421 else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos))) 1422 return NULL; 1423 1424 /* It must be used in exactly one insn. */ 1425 if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses)) 1426 return NULL; 1427 1428 if (dump_file) 1429 { 1430 fprintf (dump_file, "\n;; Expanding Accumulator "); 1431 print_rtl (dump_file, dest); 1432 fprintf (dump_file, "\n"); 1433 } 1434 1435 if (debug_uses) 1436 /* Instead of resetting the debug insns, we could replace each 1437 debug use in the loop with the sum or product of all expanded 1438 accummulators. Since we'll only know of all expansions at the 1439 end, we'd have to keep track of which vars_to_expand a debug 1440 insn in the loop references, take note of each copy of the 1441 debug insn during unrolling, and when it's all done, compute 1442 the sum or product of each variable and adjust the original 1443 debug insn and each copy thereof. What a pain! */ 1444 reset_debug_uses_in_loop (loop, dest, debug_uses); 1445 1446 /* Record the accumulator to expand. */ 1447 ves = XNEW (struct var_to_expand); 1448 ves->insn = insn; 1449 ves->reg = copy_rtx (dest); 1450 ves->var_expansions.create (1); 1451 ves->next = NULL; 1452 ves->op = GET_CODE (src); 1453 ves->expansion_count = 0; 1454 ves->reuse_expansion = 0; 1455 return ves; 1456} 1457 1458/* Determine whether there is an induction variable in INSN that 1459 we would like to split during unrolling. 1460 1461 I.e. replace 1462 1463 i = i + 1; 1464 ... 1465 i = i + 1; 1466 ... 1467 i = i + 1; 1468 ... 1469 1470 type chains by 1471 1472 i0 = i + 1 1473 ... 1474 i = i0 + 1 1475 ... 1476 i = i0 + 2 1477 ... 1478 1479 Return NULL if INSN contains no interesting IVs. Otherwise, allocate 1480 an IV_TO_SPLIT structure, fill it with the relevant information and return a 1481 pointer to it. */ 1482 1483static struct iv_to_split * 1484analyze_iv_to_split_insn (rtx_insn *insn) 1485{ 1486 rtx set, dest; 1487 struct rtx_iv iv; 1488 struct iv_to_split *ivts; 1489 bool ok; 1490 1491 /* For now we just split the basic induction variables. Later this may be 1492 extended for example by selecting also addresses of memory references. */ 1493 set = single_set (insn); 1494 if (!set) 1495 return NULL; 1496 1497 dest = SET_DEST (set); 1498 if (!REG_P (dest)) 1499 return NULL; 1500 1501 if (!biv_p (insn, dest)) 1502 return NULL; 1503 1504 ok = iv_analyze_result (insn, dest, &iv); 1505 1506 /* This used to be an assert under the assumption that if biv_p returns 1507 true that iv_analyze_result must also return true. However, that 1508 assumption is not strictly correct as evidenced by pr25569. 1509 1510 Returning NULL when iv_analyze_result returns false is safe and 1511 avoids the problems in pr25569 until the iv_analyze_* routines 1512 can be fixed, which is apparently hard and time consuming 1513 according to their author. */ 1514 if (! ok) 1515 return NULL; 1516 1517 if (iv.step == const0_rtx 1518 || iv.mode != iv.extend_mode) 1519 return NULL; 1520 1521 /* Record the insn to split. */ 1522 ivts = XNEW (struct iv_to_split); 1523 ivts->insn = insn; 1524 ivts->orig_var = dest; 1525 ivts->base_var = NULL_RTX; 1526 ivts->step = iv.step; 1527 ivts->next = NULL; 1528 1529 return ivts; 1530} 1531 1532/* Determines which of insns in LOOP can be optimized. 1533 Return a OPT_INFO struct with the relevant hash tables filled 1534 with all insns to be optimized. The FIRST_NEW_BLOCK field 1535 is undefined for the return value. */ 1536 1537static struct opt_info * 1538analyze_insns_in_loop (struct loop *loop) 1539{ 1540 basic_block *body, bb; 1541 unsigned i; 1542 struct opt_info *opt_info = XCNEW (struct opt_info); 1543 rtx_insn *insn; 1544 struct iv_to_split *ivts = NULL; 1545 struct var_to_expand *ves = NULL; 1546 iv_to_split **slot1; 1547 var_to_expand **slot2; 1548 vec<edge> edges = get_loop_exit_edges (loop); 1549 edge exit; 1550 bool can_apply = false; 1551 1552 iv_analysis_loop_init (loop); 1553 1554 body = get_loop_body (loop); 1555 1556 if (flag_split_ivs_in_unroller) 1557 { 1558 opt_info->insns_to_split 1559 = new hash_table<iv_split_hasher> (5 * loop->num_nodes); 1560 opt_info->iv_to_split_head = NULL; 1561 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head; 1562 } 1563 1564 /* Record the loop exit bb and loop preheader before the unrolling. */ 1565 opt_info->loop_preheader = loop_preheader_edge (loop)->src; 1566 1567 if (edges.length () == 1) 1568 { 1569 exit = edges[0]; 1570 if (!(exit->flags & EDGE_COMPLEX)) 1571 { 1572 opt_info->loop_exit = split_edge (exit); 1573 can_apply = true; 1574 } 1575 } 1576 1577 if (flag_variable_expansion_in_unroller 1578 && can_apply) 1579 { 1580 opt_info->insns_with_var_to_expand 1581 = new hash_table<var_expand_hasher> (5 * loop->num_nodes); 1582 opt_info->var_to_expand_head = NULL; 1583 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head; 1584 } 1585 1586 for (i = 0; i < loop->num_nodes; i++) 1587 { 1588 bb = body[i]; 1589 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) 1590 continue; 1591 1592 FOR_BB_INSNS (bb, insn) 1593 { 1594 if (!INSN_P (insn)) 1595 continue; 1596 1597 if (opt_info->insns_to_split) 1598 ivts = analyze_iv_to_split_insn (insn); 1599 1600 if (ivts) 1601 { 1602 slot1 = opt_info->insns_to_split->find_slot (ivts, INSERT); 1603 gcc_assert (*slot1 == NULL); 1604 *slot1 = ivts; 1605 *opt_info->iv_to_split_tail = ivts; 1606 opt_info->iv_to_split_tail = &ivts->next; 1607 continue; 1608 } 1609 1610 if (opt_info->insns_with_var_to_expand) 1611 ves = analyze_insn_to_expand_var (loop, insn); 1612 1613 if (ves) 1614 { 1615 slot2 = opt_info->insns_with_var_to_expand->find_slot (ves, INSERT); 1616 gcc_assert (*slot2 == NULL); 1617 *slot2 = ves; 1618 *opt_info->var_to_expand_tail = ves; 1619 opt_info->var_to_expand_tail = &ves->next; 1620 } 1621 } 1622 } 1623 1624 edges.release (); 1625 free (body); 1626 return opt_info; 1627} 1628 1629/* Called just before loop duplication. Records start of duplicated area 1630 to OPT_INFO. */ 1631 1632static void 1633opt_info_start_duplication (struct opt_info *opt_info) 1634{ 1635 if (opt_info) 1636 opt_info->first_new_block = last_basic_block_for_fn (cfun); 1637} 1638 1639/* Determine the number of iterations between initialization of the base 1640 variable and the current copy (N_COPY). N_COPIES is the total number 1641 of newly created copies. UNROLLING is true if we are unrolling 1642 (not peeling) the loop. */ 1643 1644static unsigned 1645determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling) 1646{ 1647 if (unrolling) 1648 { 1649 /* If we are unrolling, initialization is done in the original loop 1650 body (number 0). */ 1651 return n_copy; 1652 } 1653 else 1654 { 1655 /* If we are peeling, the copy in that the initialization occurs has 1656 number 1. The original loop (number 0) is the last. */ 1657 if (n_copy) 1658 return n_copy - 1; 1659 else 1660 return n_copies; 1661 } 1662} 1663 1664/* Allocate basic variable for the induction variable chain. */ 1665 1666static void 1667allocate_basic_variable (struct iv_to_split *ivts) 1668{ 1669 rtx expr = SET_SRC (single_set (ivts->insn)); 1670 1671 ivts->base_var = gen_reg_rtx (GET_MODE (expr)); 1672} 1673 1674/* Insert initialization of basic variable of IVTS before INSN, taking 1675 the initial value from INSN. */ 1676 1677static void 1678insert_base_initialization (struct iv_to_split *ivts, rtx_insn *insn) 1679{ 1680 rtx expr = copy_rtx (SET_SRC (single_set (insn))); 1681 rtx_insn *seq; 1682 1683 start_sequence (); 1684 expr = force_operand (expr, ivts->base_var); 1685 if (expr != ivts->base_var) 1686 emit_move_insn (ivts->base_var, expr); 1687 seq = get_insns (); 1688 end_sequence (); 1689 1690 emit_insn_before (seq, insn); 1691} 1692 1693/* Replace the use of induction variable described in IVTS in INSN 1694 by base variable + DELTA * step. */ 1695 1696static void 1697split_iv (struct iv_to_split *ivts, rtx_insn *insn, unsigned delta) 1698{ 1699 rtx expr, *loc, incr, var; 1700 rtx_insn *seq; 1701 machine_mode mode = GET_MODE (ivts->base_var); 1702 rtx src, dest, set; 1703 1704 /* Construct base + DELTA * step. */ 1705 if (!delta) 1706 expr = ivts->base_var; 1707 else 1708 { 1709 incr = simplify_gen_binary (MULT, mode, 1710 ivts->step, gen_int_mode (delta, mode)); 1711 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var), 1712 ivts->base_var, incr); 1713 } 1714 1715 /* Figure out where to do the replacement. */ 1716 loc = &SET_SRC (single_set (insn)); 1717 1718 /* If we can make the replacement right away, we're done. */ 1719 if (validate_change (insn, loc, expr, 0)) 1720 return; 1721 1722 /* Otherwise, force EXPR into a register and try again. */ 1723 start_sequence (); 1724 var = gen_reg_rtx (mode); 1725 expr = force_operand (expr, var); 1726 if (expr != var) 1727 emit_move_insn (var, expr); 1728 seq = get_insns (); 1729 end_sequence (); 1730 emit_insn_before (seq, insn); 1731 1732 if (validate_change (insn, loc, var, 0)) 1733 return; 1734 1735 /* The last chance. Try recreating the assignment in insn 1736 completely from scratch. */ 1737 set = single_set (insn); 1738 gcc_assert (set); 1739 1740 start_sequence (); 1741 *loc = var; 1742 src = copy_rtx (SET_SRC (set)); 1743 dest = copy_rtx (SET_DEST (set)); 1744 src = force_operand (src, dest); 1745 if (src != dest) 1746 emit_move_insn (dest, src); 1747 seq = get_insns (); 1748 end_sequence (); 1749 1750 emit_insn_before (seq, insn); 1751 delete_insn (insn); 1752} 1753 1754 1755/* Return one expansion of the accumulator recorded in struct VE. */ 1756 1757static rtx 1758get_expansion (struct var_to_expand *ve) 1759{ 1760 rtx reg; 1761 1762 if (ve->reuse_expansion == 0) 1763 reg = ve->reg; 1764 else 1765 reg = ve->var_expansions[ve->reuse_expansion - 1]; 1766 1767 if (ve->var_expansions.length () == (unsigned) ve->reuse_expansion) 1768 ve->reuse_expansion = 0; 1769 else 1770 ve->reuse_expansion++; 1771 1772 return reg; 1773} 1774 1775 1776/* Given INSN replace the uses of the accumulator recorded in VE 1777 with a new register. */ 1778 1779static void 1780expand_var_during_unrolling (struct var_to_expand *ve, rtx_insn *insn) 1781{ 1782 rtx new_reg, set; 1783 bool really_new_expansion = false; 1784 1785 set = single_set (insn); 1786 gcc_assert (set); 1787 1788 /* Generate a new register only if the expansion limit has not been 1789 reached. Else reuse an already existing expansion. */ 1790 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count) 1791 { 1792 really_new_expansion = true; 1793 new_reg = gen_reg_rtx (GET_MODE (ve->reg)); 1794 } 1795 else 1796 new_reg = get_expansion (ve); 1797 1798 validate_replace_rtx_group (SET_DEST (set), new_reg, insn); 1799 if (apply_change_group ()) 1800 if (really_new_expansion) 1801 { 1802 ve->var_expansions.safe_push (new_reg); 1803 ve->expansion_count++; 1804 } 1805} 1806 1807/* Initialize the variable expansions in loop preheader. PLACE is the 1808 loop-preheader basic block where the initialization of the 1809 expansions should take place. The expansions are initialized with 1810 (-0) when the operation is plus or minus to honor sign zero. This 1811 way we can prevent cases where the sign of the final result is 1812 effected by the sign of the expansion. Here is an example to 1813 demonstrate this: 1814 1815 for (i = 0 ; i < n; i++) 1816 sum += something; 1817 1818 ==> 1819 1820 sum += something 1821 .... 1822 i = i+1; 1823 sum1 += something 1824 .... 1825 i = i+1 1826 sum2 += something; 1827 .... 1828 1829 When SUM is initialized with -zero and SOMETHING is also -zero; the 1830 final result of sum should be -zero thus the expansions sum1 and sum2 1831 should be initialized with -zero as well (otherwise we will get +zero 1832 as the final result). */ 1833 1834static void 1835insert_var_expansion_initialization (struct var_to_expand *ve, 1836 basic_block place) 1837{ 1838 rtx_insn *seq; 1839 rtx var, zero_init; 1840 unsigned i; 1841 machine_mode mode = GET_MODE (ve->reg); 1842 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode); 1843 1844 if (ve->var_expansions.length () == 0) 1845 return; 1846 1847 start_sequence (); 1848 switch (ve->op) 1849 { 1850 case FMA: 1851 /* Note that we only accumulate FMA via the ADD operand. */ 1852 case PLUS: 1853 case MINUS: 1854 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) 1855 { 1856 if (honor_signed_zero_p) 1857 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode); 1858 else 1859 zero_init = CONST0_RTX (mode); 1860 emit_move_insn (var, zero_init); 1861 } 1862 break; 1863 1864 case MULT: 1865 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) 1866 { 1867 zero_init = CONST1_RTX (GET_MODE (var)); 1868 emit_move_insn (var, zero_init); 1869 } 1870 break; 1871 1872 default: 1873 gcc_unreachable (); 1874 } 1875 1876 seq = get_insns (); 1877 end_sequence (); 1878 1879 emit_insn_after (seq, BB_END (place)); 1880} 1881 1882/* Combine the variable expansions at the loop exit. PLACE is the 1883 loop exit basic block where the summation of the expansions should 1884 take place. */ 1885 1886static void 1887combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place) 1888{ 1889 rtx sum = ve->reg; 1890 rtx expr, var; 1891 rtx_insn *seq, *insn; 1892 unsigned i; 1893 1894 if (ve->var_expansions.length () == 0) 1895 return; 1896 1897 start_sequence (); 1898 switch (ve->op) 1899 { 1900 case FMA: 1901 /* Note that we only accumulate FMA via the ADD operand. */ 1902 case PLUS: 1903 case MINUS: 1904 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) 1905 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum); 1906 break; 1907 1908 case MULT: 1909 FOR_EACH_VEC_ELT (ve->var_expansions, i, var) 1910 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum); 1911 break; 1912 1913 default: 1914 gcc_unreachable (); 1915 } 1916 1917 expr = force_operand (sum, ve->reg); 1918 if (expr != ve->reg) 1919 emit_move_insn (ve->reg, expr); 1920 seq = get_insns (); 1921 end_sequence (); 1922 1923 insn = BB_HEAD (place); 1924 while (!NOTE_INSN_BASIC_BLOCK_P (insn)) 1925 insn = NEXT_INSN (insn); 1926 1927 emit_insn_after (seq, insn); 1928} 1929 1930/* Strip away REG_EQUAL notes for IVs we're splitting. 1931 1932 Updating REG_EQUAL notes for IVs we split is tricky: We 1933 cannot tell until after unrolling, DF-rescanning, and liveness 1934 updating, whether an EQ_USE is reached by the split IV while 1935 the IV reg is still live. See PR55006. 1936 1937 ??? We cannot use remove_reg_equal_equiv_notes_for_regno, 1938 because RTL loop-iv requires us to defer rescanning insns and 1939 any notes attached to them. So resort to old techniques... */ 1940 1941static void 1942maybe_strip_eq_note_for_split_iv (struct opt_info *opt_info, rtx_insn *insn) 1943{ 1944 struct iv_to_split *ivts; 1945 rtx note = find_reg_equal_equiv_note (insn); 1946 if (! note) 1947 return; 1948 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next) 1949 if (reg_mentioned_p (ivts->orig_var, note)) 1950 { 1951 remove_note (insn, note); 1952 return; 1953 } 1954} 1955 1956/* Apply loop optimizations in loop copies using the 1957 data which gathered during the unrolling. Structure 1958 OPT_INFO record that data. 1959 1960 UNROLLING is true if we unrolled (not peeled) the loop. 1961 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of 1962 the loop (as it should happen in complete unrolling, but not in ordinary 1963 peeling of the loop). */ 1964 1965static void 1966apply_opt_in_copies (struct opt_info *opt_info, 1967 unsigned n_copies, bool unrolling, 1968 bool rewrite_original_loop) 1969{ 1970 unsigned i, delta; 1971 basic_block bb, orig_bb; 1972 rtx_insn *insn, *orig_insn, *next; 1973 struct iv_to_split ivts_templ, *ivts; 1974 struct var_to_expand ve_templ, *ves; 1975 1976 /* Sanity check -- we need to put initialization in the original loop 1977 body. */ 1978 gcc_assert (!unrolling || rewrite_original_loop); 1979 1980 /* Allocate the basic variables (i0). */ 1981 if (opt_info->insns_to_split) 1982 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next) 1983 allocate_basic_variable (ivts); 1984 1985 for (i = opt_info->first_new_block; 1986 i < (unsigned) last_basic_block_for_fn (cfun); 1987 i++) 1988 { 1989 bb = BASIC_BLOCK_FOR_FN (cfun, i); 1990 orig_bb = get_bb_original (bb); 1991 1992 /* bb->aux holds position in copy sequence initialized by 1993 duplicate_loop_to_header_edge. */ 1994 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies, 1995 unrolling); 1996 bb->aux = 0; 1997 orig_insn = BB_HEAD (orig_bb); 1998 FOR_BB_INSNS_SAFE (bb, insn, next) 1999 { 2000 if (!INSN_P (insn) 2001 || (DEBUG_INSN_P (insn) 2002 && TREE_CODE (INSN_VAR_LOCATION_DECL (insn)) == LABEL_DECL)) 2003 continue; 2004 2005 while (!INSN_P (orig_insn) 2006 || (DEBUG_INSN_P (orig_insn) 2007 && (TREE_CODE (INSN_VAR_LOCATION_DECL (orig_insn)) 2008 == LABEL_DECL))) 2009 orig_insn = NEXT_INSN (orig_insn); 2010 2011 ivts_templ.insn = orig_insn; 2012 ve_templ.insn = orig_insn; 2013 2014 /* Apply splitting iv optimization. */ 2015 if (opt_info->insns_to_split) 2016 { 2017 maybe_strip_eq_note_for_split_iv (opt_info, insn); 2018 2019 ivts = opt_info->insns_to_split->find (&ivts_templ); 2020 2021 if (ivts) 2022 { 2023 gcc_assert (GET_CODE (PATTERN (insn)) 2024 == GET_CODE (PATTERN (orig_insn))); 2025 2026 if (!delta) 2027 insert_base_initialization (ivts, insn); 2028 split_iv (ivts, insn, delta); 2029 } 2030 } 2031 /* Apply variable expansion optimization. */ 2032 if (unrolling && opt_info->insns_with_var_to_expand) 2033 { 2034 ves = (struct var_to_expand *) 2035 opt_info->insns_with_var_to_expand->find (&ve_templ); 2036 if (ves) 2037 { 2038 gcc_assert (GET_CODE (PATTERN (insn)) 2039 == GET_CODE (PATTERN (orig_insn))); 2040 expand_var_during_unrolling (ves, insn); 2041 } 2042 } 2043 orig_insn = NEXT_INSN (orig_insn); 2044 } 2045 } 2046 2047 if (!rewrite_original_loop) 2048 return; 2049 2050 /* Initialize the variable expansions in the loop preheader 2051 and take care of combining them at the loop exit. */ 2052 if (opt_info->insns_with_var_to_expand) 2053 { 2054 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) 2055 insert_var_expansion_initialization (ves, opt_info->loop_preheader); 2056 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) 2057 combine_var_copies_in_loop_exit (ves, opt_info->loop_exit); 2058 } 2059 2060 /* Rewrite also the original loop body. Find them as originals of the blocks 2061 in the last copied iteration, i.e. those that have 2062 get_bb_copy (get_bb_original (bb)) == bb. */ 2063 for (i = opt_info->first_new_block; 2064 i < (unsigned) last_basic_block_for_fn (cfun); 2065 i++) 2066 { 2067 bb = BASIC_BLOCK_FOR_FN (cfun, i); 2068 orig_bb = get_bb_original (bb); 2069 if (get_bb_copy (orig_bb) != bb) 2070 continue; 2071 2072 delta = determine_split_iv_delta (0, n_copies, unrolling); 2073 for (orig_insn = BB_HEAD (orig_bb); 2074 orig_insn != NEXT_INSN (BB_END (bb)); 2075 orig_insn = next) 2076 { 2077 next = NEXT_INSN (orig_insn); 2078 2079 if (!INSN_P (orig_insn)) 2080 continue; 2081 2082 ivts_templ.insn = orig_insn; 2083 if (opt_info->insns_to_split) 2084 { 2085 maybe_strip_eq_note_for_split_iv (opt_info, orig_insn); 2086 2087 ivts = (struct iv_to_split *) 2088 opt_info->insns_to_split->find (&ivts_templ); 2089 if (ivts) 2090 { 2091 if (!delta) 2092 insert_base_initialization (ivts, orig_insn); 2093 split_iv (ivts, orig_insn, delta); 2094 continue; 2095 } 2096 } 2097 2098 } 2099 } 2100} 2101 2102/* Release OPT_INFO. */ 2103 2104static void 2105free_opt_info (struct opt_info *opt_info) 2106{ 2107 delete opt_info->insns_to_split; 2108 opt_info->insns_to_split = NULL; 2109 if (opt_info->insns_with_var_to_expand) 2110 { 2111 struct var_to_expand *ves; 2112 2113 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) 2114 ves->var_expansions.release (); 2115 delete opt_info->insns_with_var_to_expand; 2116 opt_info->insns_with_var_to_expand = NULL; 2117 } 2118 free (opt_info); 2119} 2120