loop-unroll.c revision 132718
1132718Skan/* Loop unrolling and peeling. 2132718Skan Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc. 3132718Skan 4132718SkanThis file is part of GCC. 5132718Skan 6132718SkanGCC is free software; you can redistribute it and/or modify it under 7132718Skanthe terms of the GNU General Public License as published by the Free 8132718SkanSoftware Foundation; either version 2, or (at your option) any later 9132718Skanversion. 10132718Skan 11132718SkanGCC is distributed in the hope that it will be useful, but WITHOUT ANY 12132718SkanWARRANTY; without even the implied warranty of MERCHANTABILITY or 13132718SkanFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14132718Skanfor more details. 15132718Skan 16132718SkanYou should have received a copy of the GNU General Public License 17132718Skanalong with GCC; see the file COPYING. If not, write to the Free 18132718SkanSoftware Foundation, 59 Temple Place - Suite 330, Boston, MA 19132718Skan02111-1307, USA. */ 20132718Skan 21132718Skan#include "config.h" 22132718Skan#include "system.h" 23132718Skan#include "coretypes.h" 24132718Skan#include "tm.h" 25132718Skan#include "rtl.h" 26132718Skan#include "hard-reg-set.h" 27132718Skan#include "basic-block.h" 28132718Skan#include "cfgloop.h" 29132718Skan#include "cfglayout.h" 30132718Skan#include "params.h" 31132718Skan#include "output.h" 32132718Skan#include "expr.h" 33132718Skan/* We need to use the macro exact_log2. */ 34132718Skan#include "toplev.h" 35132718Skan 36132718Skan/* This pass performs loop unrolling and peeling. We only perform these 37132718Skan optimizations on innermost loops (with single exception) because 38132718Skan the impact on performance is greatest here, and we want to avoid 39132718Skan unnecessary code size growth. The gain is caused by greater sequentiality 40132718Skan of code, better code to optimize for further passes and in some cases 41132718Skan by fewer testings of exit conditions. The main problem is code growth, 42132718Skan that impacts performance negatively due to effect of caches. 43132718Skan 44132718Skan What we do: 45132718Skan 46132718Skan -- complete peeling of once-rolling loops; this is the above mentioned 47132718Skan exception, as this causes loop to be cancelled completely and 48132718Skan does not cause code growth 49132718Skan -- complete peeling of loops that roll (small) constant times. 50132718Skan -- simple peeling of first iterations of loops that do not roll much 51132718Skan (according to profile feedback) 52132718Skan -- unrolling of loops that roll constant times; this is almost always 53132718Skan win, as we get rid of exit condition tests. 54132718Skan -- unrolling of loops that roll number of times that we can compute 55132718Skan in runtime; we also get rid of exit condition tests here, but there 56132718Skan is the extra expense for calculating the number of iterations 57132718Skan -- simple unrolling of remaining loops; this is performed only if we 58132718Skan are asked to, as the gain is questionable in this case and often 59132718Skan it may even slow down the code 60132718Skan For more detailed descriptions of each of those, see comments at 61132718Skan appropriate function below. 62132718Skan 63132718Skan There is a lot of parameters (defined and described in params.def) that 64132718Skan control how much we unroll/peel. 65132718Skan 66132718Skan ??? A great problem is that we don't have a good way how to determine 67132718Skan how many times we should unroll the loop; the experiments I have made 68132718Skan showed that this choice may affect performance in order of several %. 69132718Skan */ 70132718Skan 71132718Skanstatic void decide_unrolling_and_peeling (struct loops *, int); 72132718Skanstatic void peel_loops_completely (struct loops *, int); 73132718Skanstatic void decide_peel_simple (struct loop *, int); 74132718Skanstatic void decide_peel_once_rolling (struct loop *, int); 75132718Skanstatic void decide_peel_completely (struct loop *, int); 76132718Skanstatic void decide_unroll_stupid (struct loop *, int); 77132718Skanstatic void decide_unroll_constant_iterations (struct loop *, int); 78132718Skanstatic void decide_unroll_runtime_iterations (struct loop *, int); 79132718Skanstatic void peel_loop_simple (struct loops *, struct loop *); 80132718Skanstatic void peel_loop_completely (struct loops *, struct loop *); 81132718Skanstatic void unroll_loop_stupid (struct loops *, struct loop *); 82132718Skanstatic void unroll_loop_constant_iterations (struct loops *, struct loop *); 83132718Skanstatic void unroll_loop_runtime_iterations (struct loops *, struct loop *); 84132718Skanstatic void expand_bct (edge, int); 85132718Skanstatic bool discard_increment (struct loop *); 86132718Skan 87132718Skan/* Unroll and/or peel (depending on FLAGS) LOOPS. */ 88132718Skanvoid 89132718Skanunroll_and_peel_loops (struct loops *loops, int flags) 90132718Skan{ 91132718Skan struct loop *loop, *next; 92132718Skan int check; 93132718Skan 94132718Skan /* First perform complete loop peeling (it is almost surely a win, 95132718Skan and affects parameters for further decision a lot). */ 96132718Skan peel_loops_completely (loops, flags); 97132718Skan 98132718Skan /* Now decide rest of unrolling and peeling. */ 99132718Skan decide_unrolling_and_peeling (loops, flags); 100132718Skan 101132718Skan loop = loops->tree_root; 102132718Skan while (loop->inner) 103132718Skan loop = loop->inner; 104132718Skan 105132718Skan /* Scan the loops, inner ones first. */ 106132718Skan while (loop != loops->tree_root) 107132718Skan { 108132718Skan if (loop->next) 109132718Skan { 110132718Skan next = loop->next; 111132718Skan while (next->inner) 112132718Skan next = next->inner; 113132718Skan } 114132718Skan else 115132718Skan next = loop->outer; 116132718Skan 117132718Skan check = 1; 118132718Skan /* And perform the appropriate transformations. */ 119132718Skan switch (loop->lpt_decision.decision) 120132718Skan { 121132718Skan case LPT_PEEL_COMPLETELY: 122132718Skan /* Already done. */ 123132718Skan abort (); 124132718Skan case LPT_PEEL_SIMPLE: 125132718Skan peel_loop_simple (loops, loop); 126132718Skan break; 127132718Skan case LPT_UNROLL_CONSTANT: 128132718Skan unroll_loop_constant_iterations (loops, loop); 129132718Skan break; 130132718Skan case LPT_UNROLL_RUNTIME: 131132718Skan unroll_loop_runtime_iterations (loops, loop); 132132718Skan break; 133132718Skan case LPT_UNROLL_STUPID: 134132718Skan unroll_loop_stupid (loops, loop); 135132718Skan break; 136132718Skan case LPT_NONE: 137132718Skan check = 0; 138132718Skan break; 139132718Skan default: 140132718Skan abort (); 141132718Skan } 142132718Skan if (check) 143132718Skan { 144132718Skan#ifdef ENABLE_CHECKING 145132718Skan verify_dominators (CDI_DOMINATORS); 146132718Skan verify_loop_structure (loops); 147132718Skan#endif 148132718Skan } 149132718Skan loop = next; 150132718Skan } 151132718Skan} 152132718Skan 153132718Skan/* Check whether to peel LOOPS (depending on FLAGS) completely and do so. */ 154132718Skanstatic void 155132718Skanpeel_loops_completely (struct loops *loops, int flags) 156132718Skan{ 157132718Skan struct loop *loop, *next; 158132718Skan 159132718Skan loop = loops->tree_root; 160132718Skan while (loop->inner) 161132718Skan loop = loop->inner; 162132718Skan 163132718Skan while (loop != loops->tree_root) 164132718Skan { 165132718Skan if (loop->next) 166132718Skan { 167132718Skan next = loop->next; 168132718Skan while (next->inner) 169132718Skan next = next->inner; 170132718Skan } 171132718Skan else 172132718Skan next = loop->outer; 173132718Skan 174132718Skan loop->lpt_decision.decision = LPT_NONE; 175132718Skan loop->has_desc = 0; 176132718Skan 177132718Skan if (rtl_dump_file) 178132718Skan fprintf (rtl_dump_file, ";; Considering loop %d for complete peeling\n", 179132718Skan loop->num); 180132718Skan 181132718Skan loop->ninsns = num_loop_insns (loop); 182132718Skan 183132718Skan decide_peel_once_rolling (loop, flags); 184132718Skan if (loop->lpt_decision.decision == LPT_NONE) 185132718Skan decide_peel_completely (loop, flags); 186132718Skan 187132718Skan if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY) 188132718Skan { 189132718Skan peel_loop_completely (loops, loop); 190132718Skan#ifdef ENABLE_CHECKING 191132718Skan verify_dominators (CDI_DOMINATORS); 192132718Skan verify_loop_structure (loops); 193132718Skan#endif 194132718Skan } 195132718Skan loop = next; 196132718Skan } 197132718Skan} 198132718Skan 199132718Skan/* Decide whether unroll or peel LOOPS (depending on FLAGS) and how much. */ 200132718Skanstatic void 201132718Skandecide_unrolling_and_peeling (struct loops *loops, int flags) 202132718Skan{ 203132718Skan struct loop *loop = loops->tree_root, *next; 204132718Skan 205132718Skan while (loop->inner) 206132718Skan loop = loop->inner; 207132718Skan 208132718Skan /* Scan the loops, inner ones first. */ 209132718Skan while (loop != loops->tree_root) 210132718Skan { 211132718Skan if (loop->next) 212132718Skan { 213132718Skan next = loop->next; 214132718Skan while (next->inner) 215132718Skan next = next->inner; 216132718Skan } 217132718Skan else 218132718Skan next = loop->outer; 219132718Skan 220132718Skan loop->lpt_decision.decision = LPT_NONE; 221132718Skan 222132718Skan if (rtl_dump_file) 223132718Skan fprintf (rtl_dump_file, ";; Considering loop %d\n", loop->num); 224132718Skan 225132718Skan /* Do not peel cold areas. */ 226132718Skan if (!maybe_hot_bb_p (loop->header)) 227132718Skan { 228132718Skan if (rtl_dump_file) 229132718Skan fprintf (rtl_dump_file, ";; Not considering loop, cold area\n"); 230132718Skan loop = next; 231132718Skan continue; 232132718Skan } 233132718Skan 234132718Skan /* Can the loop be manipulated? */ 235132718Skan if (!can_duplicate_loop_p (loop)) 236132718Skan { 237132718Skan if (rtl_dump_file) 238132718Skan fprintf (rtl_dump_file, 239132718Skan ";; Not considering loop, cannot duplicate\n"); 240132718Skan loop = next; 241132718Skan continue; 242132718Skan } 243132718Skan 244132718Skan /* Skip non-innermost loops. */ 245132718Skan if (loop->inner) 246132718Skan { 247132718Skan if (rtl_dump_file) 248132718Skan fprintf (rtl_dump_file, ";; Not considering loop, is not innermost\n"); 249132718Skan loop = next; 250132718Skan continue; 251132718Skan } 252132718Skan 253132718Skan loop->ninsns = num_loop_insns (loop); 254132718Skan loop->av_ninsns = average_num_loop_insns (loop); 255132718Skan 256132718Skan /* Try transformations one by one in decreasing order of 257132718Skan priority. */ 258132718Skan 259132718Skan decide_unroll_constant_iterations (loop, flags); 260132718Skan if (loop->lpt_decision.decision == LPT_NONE) 261132718Skan decide_unroll_runtime_iterations (loop, flags); 262132718Skan if (loop->lpt_decision.decision == LPT_NONE) 263132718Skan decide_unroll_stupid (loop, flags); 264132718Skan if (loop->lpt_decision.decision == LPT_NONE) 265132718Skan decide_peel_simple (loop, flags); 266132718Skan 267132718Skan loop = next; 268132718Skan } 269132718Skan} 270132718Skan 271132718Skan/* Decide whether the LOOP is once rolling and suitable for complete 272132718Skan peeling. */ 273132718Skanstatic void 274132718Skandecide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED) 275132718Skan{ 276132718Skan if (rtl_dump_file) 277132718Skan fprintf (rtl_dump_file, ";; Considering peeling once rolling loop\n"); 278132718Skan 279132718Skan /* Is the loop small enough? */ 280132718Skan if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns) 281132718Skan { 282132718Skan if (rtl_dump_file) 283132718Skan fprintf (rtl_dump_file, ";; Not considering loop, is too big\n"); 284132718Skan return; 285132718Skan } 286132718Skan 287132718Skan /* Check for simple loops. */ 288132718Skan loop->simple = simple_loop_p (loop, &loop->desc); 289132718Skan loop->has_desc = 1; 290132718Skan 291132718Skan /* Check number of iterations. */ 292132718Skan if (!loop->simple || !loop->desc.const_iter || loop->desc.niter != 0) 293132718Skan { 294132718Skan if (rtl_dump_file) 295132718Skan fprintf (rtl_dump_file, ";; Unable to prove that the loop rolls exactly once\n"); 296132718Skan return; 297132718Skan } 298132718Skan 299132718Skan /* Success. */ 300132718Skan if (rtl_dump_file) 301132718Skan fprintf (rtl_dump_file, ";; Decided to peel exactly once rolling loop\n"); 302132718Skan loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; 303132718Skan} 304132718Skan 305132718Skan/* Decide whether the LOOP is suitable for complete peeling. */ 306132718Skanstatic void 307132718Skandecide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED) 308132718Skan{ 309132718Skan unsigned npeel; 310132718Skan 311132718Skan if (rtl_dump_file) 312132718Skan fprintf (rtl_dump_file, ";; Considering peeling completely\n"); 313132718Skan 314132718Skan /* Skip non-innermost loops. */ 315132718Skan if (loop->inner) 316132718Skan { 317132718Skan if (rtl_dump_file) 318132718Skan fprintf (rtl_dump_file, ";; Not considering loop, is not innermost\n"); 319132718Skan return; 320132718Skan } 321132718Skan 322132718Skan /* Do not peel cold areas. */ 323132718Skan if (!maybe_hot_bb_p (loop->header)) 324132718Skan { 325132718Skan if (rtl_dump_file) 326132718Skan fprintf (rtl_dump_file, ";; Not considering loop, cold area\n"); 327132718Skan return; 328132718Skan } 329132718Skan 330132718Skan /* Can the loop be manipulated? */ 331132718Skan if (!can_duplicate_loop_p (loop)) 332132718Skan { 333132718Skan if (rtl_dump_file) 334132718Skan fprintf (rtl_dump_file, 335132718Skan ";; Not considering loop, cannot duplicate\n"); 336132718Skan return; 337132718Skan } 338132718Skan 339132718Skan /* npeel = number of iterations to peel. */ 340132718Skan npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns; 341132718Skan if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) 342132718Skan npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); 343132718Skan 344132718Skan /* Is the loop small enough? */ 345132718Skan if (!npeel) 346132718Skan { 347132718Skan if (rtl_dump_file) 348132718Skan fprintf (rtl_dump_file, ";; Not considering loop, is too big\n"); 349132718Skan return; 350132718Skan } 351132718Skan 352132718Skan /* Check for simple loops. */ 353132718Skan if (!loop->has_desc) 354132718Skan { 355132718Skan loop->simple = simple_loop_p (loop, &loop->desc); 356132718Skan loop->has_desc = 1; 357132718Skan } 358132718Skan 359132718Skan /* Check number of iterations. */ 360132718Skan if (!loop->simple || !loop->desc.const_iter) 361132718Skan { 362132718Skan if (rtl_dump_file) 363132718Skan fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n"); 364132718Skan return; 365132718Skan } 366132718Skan 367132718Skan if (loop->desc.niter > npeel - 1) 368132718Skan { 369132718Skan if (rtl_dump_file) 370132718Skan { 371132718Skan fprintf (rtl_dump_file, ";; Not peeling loop completely, rolls too much ("); 372132718Skan fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC,(HOST_WIDEST_INT) loop->desc.niter); 373132718Skan fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel); 374132718Skan } 375132718Skan return; 376132718Skan } 377132718Skan 378132718Skan /* Success. */ 379132718Skan if (rtl_dump_file) 380132718Skan fprintf (rtl_dump_file, ";; Decided to peel loop completely\n"); 381132718Skan loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; 382132718Skan} 383132718Skan 384132718Skan/* Peel all iterations of LOOP, remove exit edges and cancel the loop 385132718Skan completely. The transformation done: 386132718Skan 387132718Skan for (i = 0; i < 4; i++) 388132718Skan body; 389132718Skan 390132718Skan ==> 391132718Skan 392132718Skan i = 0; 393132718Skan body; i++; 394132718Skan body; i++; 395132718Skan body; i++; 396132718Skan body; i++; 397132718Skan */ 398132718Skanstatic void 399132718Skanpeel_loop_completely (struct loops *loops, struct loop *loop) 400132718Skan{ 401132718Skan sbitmap wont_exit; 402132718Skan unsigned HOST_WIDE_INT npeel; 403132718Skan unsigned n_remove_edges, i; 404132718Skan edge *remove_edges; 405132718Skan struct loop_desc *desc = &loop->desc; 406132718Skan bool discard_inc = false; 407132718Skan bool is_bct; 408132718Skan 409132718Skan if ((is_bct = is_bct_cond (BB_END (loop->desc.out_edge->src)))) 410132718Skan discard_inc = discard_increment (loop); 411132718Skan 412132718Skan npeel = desc->niter; 413132718Skan 414132718Skan if (npeel) 415132718Skan { 416132718Skan wont_exit = sbitmap_alloc (npeel + 1); 417132718Skan sbitmap_ones (wont_exit); 418132718Skan RESET_BIT (wont_exit, 0); 419132718Skan if (desc->may_be_zero) 420132718Skan RESET_BIT (wont_exit, 1); 421132718Skan 422132718Skan remove_edges = xcalloc (npeel, sizeof (edge)); 423132718Skan n_remove_edges = 0; 424132718Skan 425132718Skan if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 426132718Skan loops, npeel, 427132718Skan wont_exit, desc->out_edge, remove_edges, &n_remove_edges, 428132718Skan DLTHE_FLAG_UPDATE_FREQ)) 429132718Skan abort (); 430132718Skan 431132718Skan free (wont_exit); 432132718Skan 433132718Skan /* Expand the branch and count. */ 434132718Skan if (is_bct) 435132718Skan for (i = 0; i < n_remove_edges; i++) 436132718Skan expand_bct (remove_edges[i], discard_inc); 437132718Skan 438132718Skan /* Remove the exit edges. */ 439132718Skan for (i = 0; i < n_remove_edges; i++) 440132718Skan remove_path (loops, remove_edges[i]); 441132718Skan free (remove_edges); 442132718Skan } 443132718Skan 444132718Skan /* Expand the branch and count. */ 445132718Skan if (is_bct) 446132718Skan expand_bct (desc->in_edge, discard_inc); 447132718Skan 448132718Skan /* Now remove the unreachable part of the last iteration and cancel 449132718Skan the loop. */ 450132718Skan remove_path (loops, desc->in_edge); 451132718Skan 452132718Skan if (rtl_dump_file) 453132718Skan fprintf (rtl_dump_file, ";; Peeled loop completely, %d times\n", (int) npeel); 454132718Skan} 455132718Skan 456132718Skan/* Decide whether to unroll LOOP iterating constant number of times and how much. */ 457132718Skanstatic void 458132718Skandecide_unroll_constant_iterations (struct loop *loop, int flags) 459132718Skan{ 460132718Skan unsigned nunroll, nunroll_by_av, best_copies, best_unroll = -1, n_copies, i; 461132718Skan 462132718Skan if (!(flags & UAP_UNROLL)) 463132718Skan { 464132718Skan /* We were not asked to, just return back silently. */ 465132718Skan return; 466132718Skan } 467132718Skan 468132718Skan if (rtl_dump_file) 469132718Skan fprintf (rtl_dump_file, ";; Considering unrolling loop with constant number of iterations\n"); 470132718Skan 471132718Skan /* nunroll = total number of copies of the original loop body in 472132718Skan unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ 473132718Skan nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; 474132718Skan nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; 475132718Skan if (nunroll > nunroll_by_av) 476132718Skan nunroll = nunroll_by_av; 477132718Skan if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) 478132718Skan nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); 479132718Skan 480132718Skan /* Skip big loops. */ 481132718Skan if (nunroll <= 1) 482132718Skan { 483132718Skan if (rtl_dump_file) 484132718Skan fprintf (rtl_dump_file, ";; Not considering loop, is too big\n"); 485132718Skan return; 486132718Skan } 487132718Skan 488132718Skan /* Check for simple loops. */ 489132718Skan if (!loop->has_desc) 490132718Skan { 491132718Skan loop->simple = simple_loop_p (loop, &loop->desc); 492132718Skan loop->has_desc = 1; 493132718Skan } 494132718Skan 495132718Skan /* Check number of iterations. */ 496132718Skan if (!loop->simple || !loop->desc.const_iter) 497132718Skan { 498132718Skan if (rtl_dump_file) 499132718Skan fprintf (rtl_dump_file, ";; Unable to prove that the loop iterates constant times\n"); 500132718Skan return; 501132718Skan } 502132718Skan 503132718Skan /* Check whether the loop rolls enough to consider. */ 504132718Skan if (loop->desc.niter < 2 * nunroll) 505132718Skan { 506132718Skan if (rtl_dump_file) 507132718Skan fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n"); 508132718Skan return; 509132718Skan } 510132718Skan 511132718Skan /* Success; now compute number of iterations to unroll. We alter 512132718Skan nunroll so that as few as possible copies of loop body are 513132718Skan necessary, while still not decreasing the number of unrollings 514132718Skan too much (at most by 1). */ 515132718Skan best_copies = 2 * nunroll + 10; 516132718Skan 517132718Skan i = 2 * nunroll + 2; 518132718Skan if ((unsigned) i - 1 >= loop->desc.niter) 519132718Skan i = loop->desc.niter - 2; 520132718Skan 521132718Skan for (; i >= nunroll - 1; i--) 522132718Skan { 523132718Skan unsigned exit_mod = loop->desc.niter % (i + 1); 524132718Skan 525132718Skan if (loop->desc.postincr) 526132718Skan n_copies = exit_mod + i + 1; 527132718Skan else if (exit_mod != (unsigned) i || loop->desc.may_be_zero) 528132718Skan n_copies = exit_mod + i + 2; 529132718Skan else 530132718Skan n_copies = i + 1; 531132718Skan 532132718Skan if (n_copies < best_copies) 533132718Skan { 534132718Skan best_copies = n_copies; 535132718Skan best_unroll = i; 536132718Skan } 537132718Skan } 538132718Skan 539132718Skan if (rtl_dump_file) 540132718Skan fprintf (rtl_dump_file, ";; max_unroll %d (%d copies, initial %d).\n", 541132718Skan best_unroll + 1, best_copies, nunroll); 542132718Skan 543132718Skan loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; 544132718Skan loop->lpt_decision.times = best_unroll; 545132718Skan} 546132718Skan 547132718Skan/* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1 548132718Skan times. The transformation does this: 549132718Skan 550132718Skan for (i = 0; i < 102; i++) 551132718Skan body; 552132718Skan 553132718Skan ==> 554132718Skan 555132718Skan i = 0; 556132718Skan body; i++; 557132718Skan body; i++; 558132718Skan while (i < 102) 559132718Skan { 560132718Skan body; i++; 561132718Skan body; i++; 562132718Skan body; i++; 563132718Skan body; i++; 564132718Skan } 565132718Skan */ 566132718Skanstatic void 567132718Skanunroll_loop_constant_iterations (struct loops *loops, struct loop *loop) 568132718Skan{ 569132718Skan unsigned HOST_WIDE_INT niter; 570132718Skan unsigned exit_mod; 571132718Skan sbitmap wont_exit; 572132718Skan unsigned n_remove_edges, i; 573132718Skan edge *remove_edges; 574132718Skan unsigned max_unroll = loop->lpt_decision.times; 575132718Skan struct loop_desc *desc = &loop->desc; 576132718Skan bool discard_inc = false; 577132718Skan bool is_bct; 578132718Skan 579132718Skan niter = desc->niter; 580132718Skan 581132718Skan if (niter <= (unsigned) max_unroll + 1) 582132718Skan abort (); /* Should not get here (such loop should be peeled instead). */ 583132718Skan 584132718Skan exit_mod = niter % (max_unroll + 1); 585132718Skan 586132718Skan wont_exit = sbitmap_alloc (max_unroll + 1); 587132718Skan sbitmap_ones (wont_exit); 588132718Skan 589132718Skan remove_edges = xcalloc (max_unroll + exit_mod + 1, sizeof (edge)); 590132718Skan n_remove_edges = 0; 591132718Skan 592132718Skan /* For a loop ending with a branch and count for which the increment 593132718Skan of the count register will be discarded, adjust the initialization of 594132718Skan the count register. */ 595132718Skan if ((is_bct = is_bct_cond (BB_END (desc->out_edge->src))) 596132718Skan && (discard_inc = discard_increment (loop))) 597132718Skan { 598132718Skan rtx ini_var; 599132718Skan 600132718Skan rtx init_code; 601132718Skan int n_peel, new_bct_value; 602132718Skan 603132718Skan /* Get expression for number of iterations. */ 604132718Skan start_sequence (); 605132718Skan 606132718Skan n_peel = (niter+1) % (max_unroll+1); 607132718Skan new_bct_value = (niter+1 - n_peel) / (max_unroll+1) ; 608132718Skan ini_var = GEN_INT (new_bct_value); 609132718Skan 610132718Skan emit_move_insn (desc->var, ini_var); 611132718Skan init_code = get_insns (); 612132718Skan end_sequence (); 613132718Skan 614132718Skan loop_split_edge_with (loop_preheader_edge (loop), init_code); 615132718Skan } 616132718Skan 617132718Skan if (desc->postincr) 618132718Skan { 619132718Skan /* Counter is incremented after the exit test; leave exit test 620132718Skan in the first copy, so that the loops that start with test 621132718Skan of exit condition have continuous body after unrolling. */ 622132718Skan 623132718Skan if (rtl_dump_file) 624132718Skan fprintf (rtl_dump_file, ";; Condition on beginning of loop.\n"); 625132718Skan 626132718Skan /* Peel exit_mod iterations. */ 627132718Skan RESET_BIT (wont_exit, 0); 628132718Skan if (desc->may_be_zero) 629132718Skan RESET_BIT (wont_exit, 1); 630132718Skan 631132718Skan if (exit_mod 632132718Skan && !duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 633132718Skan loops, exit_mod, 634132718Skan wont_exit, desc->out_edge, remove_edges, &n_remove_edges, 635132718Skan DLTHE_FLAG_UPDATE_FREQ)) 636132718Skan abort (); 637132718Skan 638132718Skan SET_BIT (wont_exit, 1); 639132718Skan } 640132718Skan else 641132718Skan { 642132718Skan /* Leave exit test in last copy, for the same reason as above if 643132718Skan the loop tests the condition at the end of loop body. */ 644132718Skan 645132718Skan if (rtl_dump_file) 646132718Skan fprintf (rtl_dump_file, ";; Condition on end of loop.\n"); 647132718Skan 648132718Skan /* We know that niter >= max_unroll + 2; so we do not need to care of 649132718Skan case when we would exit before reaching the loop. So just peel 650132718Skan exit_mod + 1 iterations. 651132718Skan */ 652132718Skan if (exit_mod != (unsigned) max_unroll || desc->may_be_zero) 653132718Skan { 654132718Skan RESET_BIT (wont_exit, 0); 655132718Skan if (desc->may_be_zero) 656132718Skan RESET_BIT (wont_exit, 1); 657132718Skan 658132718Skan if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 659132718Skan loops, exit_mod + 1, 660132718Skan wont_exit, desc->out_edge, remove_edges, &n_remove_edges, 661132718Skan DLTHE_FLAG_UPDATE_FREQ)) 662132718Skan abort (); 663132718Skan 664132718Skan SET_BIT (wont_exit, 0); 665132718Skan SET_BIT (wont_exit, 1); 666132718Skan } 667132718Skan 668132718Skan RESET_BIT (wont_exit, max_unroll); 669132718Skan } 670132718Skan 671132718Skan /* Now unroll the loop. */ 672132718Skan if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 673132718Skan loops, max_unroll, 674132718Skan wont_exit, desc->out_edge, remove_edges, &n_remove_edges, 675132718Skan DLTHE_FLAG_UPDATE_FREQ)) 676132718Skan abort (); 677132718Skan 678132718Skan free (wont_exit); 679132718Skan 680132718Skan /* Expand the branch and count. */ 681132718Skan if (is_bct) 682132718Skan for (i = 0; i < n_remove_edges; i++) 683132718Skan expand_bct (remove_edges[i], discard_inc); 684132718Skan 685132718Skan /* Remove the edges. */ 686132718Skan for (i = 0; i < n_remove_edges; i++) 687132718Skan remove_path (loops, remove_edges[i]); 688132718Skan free (remove_edges); 689132718Skan 690132718Skan if (rtl_dump_file) 691132718Skan fprintf (rtl_dump_file, ";; Unrolled loop %d times, constant # of iterations %i insns\n",max_unroll, num_loop_insns (loop)); 692132718Skan} 693132718Skan 694132718Skan/* Decide whether to unroll LOOP iterating runtime computable number of times 695132718Skan and how much. */ 696132718Skanstatic void 697132718Skandecide_unroll_runtime_iterations (struct loop *loop, int flags) 698132718Skan{ 699132718Skan unsigned nunroll, nunroll_by_av, i; 700132718Skan 701132718Skan if (!(flags & UAP_UNROLL)) 702132718Skan { 703132718Skan /* We were not asked to, just return back silently. */ 704132718Skan return; 705132718Skan } 706132718Skan 707132718Skan if (rtl_dump_file) 708132718Skan fprintf (rtl_dump_file, ";; Considering unrolling loop with runtime computable number of iterations\n"); 709132718Skan 710132718Skan /* nunroll = total number of copies of the original loop body in 711132718Skan unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ 712132718Skan nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; 713132718Skan nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; 714132718Skan if (nunroll > nunroll_by_av) 715132718Skan nunroll = nunroll_by_av; 716132718Skan if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) 717132718Skan nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); 718132718Skan 719132718Skan /* Skip big loops. */ 720132718Skan if (nunroll <= 1) 721132718Skan { 722132718Skan if (rtl_dump_file) 723132718Skan fprintf (rtl_dump_file, ";; Not considering loop, is too big\n"); 724132718Skan return; 725132718Skan } 726132718Skan 727132718Skan /* Check for simple loops. */ 728132718Skan if (!loop->has_desc) 729132718Skan { 730132718Skan loop->simple = simple_loop_p (loop, &loop->desc); 731132718Skan loop->has_desc = 1; 732132718Skan } 733132718Skan 734132718Skan /* Check simpleness. */ 735132718Skan if (!loop->simple) 736132718Skan { 737132718Skan if (rtl_dump_file) 738132718Skan fprintf (rtl_dump_file, ";; Unable to prove that the number of iterations can be counted in runtime\n"); 739132718Skan return; 740132718Skan } 741132718Skan 742132718Skan if (loop->desc.const_iter) 743132718Skan { 744132718Skan if (rtl_dump_file) 745132718Skan fprintf (rtl_dump_file, ";; Loop iterates constant times\n"); 746132718Skan return; 747132718Skan } 748132718Skan 749132718Skan /* If we have profile feedback, check whether the loop rolls. */ 750132718Skan if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll) 751132718Skan { 752132718Skan if (rtl_dump_file) 753132718Skan fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n"); 754132718Skan return; 755132718Skan } 756132718Skan 757132718Skan /* Success; now force nunroll to be power of 2, as we are unable to 758132718Skan cope with overflows in computation of number of iterations. */ 759132718Skan for (i = 1; 2 * i <= nunroll; i *= 2); 760132718Skan 761132718Skan loop->lpt_decision.decision = LPT_UNROLL_RUNTIME; 762132718Skan loop->lpt_decision.times = i - 1; 763132718Skan} 764132718Skan 765132718Skan/* Unroll LOOP for that we are able to count number of iterations in runtime 766132718Skan LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some 767132718Skan extra care for case n < 0): 768132718Skan 769132718Skan for (i = 0; i < n; i++) 770132718Skan body; 771132718Skan 772132718Skan ==> 773132718Skan 774132718Skan i = 0; 775132718Skan mod = n % 4; 776132718Skan 777132718Skan switch (mod) 778132718Skan { 779132718Skan case 3: 780132718Skan body; i++; 781132718Skan case 2: 782132718Skan body; i++; 783132718Skan case 1: 784132718Skan body; i++; 785132718Skan case 0: ; 786132718Skan } 787132718Skan 788132718Skan while (i < n) 789132718Skan { 790132718Skan body; i++; 791132718Skan body; i++; 792132718Skan body; i++; 793132718Skan body; i++; 794132718Skan } 795132718Skan */ 796132718Skanstatic void 797132718Skanunroll_loop_runtime_iterations (struct loops *loops, struct loop *loop) 798132718Skan{ 799132718Skan rtx niter, init_code, branch_code, jump, label; 800132718Skan unsigned i, j, p; 801132718Skan basic_block preheader, *body, *dom_bbs, swtch, ezc_swtch; 802132718Skan unsigned n_dom_bbs; 803132718Skan sbitmap wont_exit; 804132718Skan int may_exit_copy; 805132718Skan unsigned n_peel, n_remove_edges; 806132718Skan edge *remove_edges, e; 807132718Skan bool extra_zero_check, last_may_exit; 808132718Skan unsigned max_unroll = loop->lpt_decision.times; 809132718Skan struct loop_desc *desc = &loop->desc; 810132718Skan bool discard_inc = false; 811132718Skan bool is_bct; 812132718Skan 813132718Skan /* Remember blocks whose dominators will have to be updated. */ 814132718Skan dom_bbs = xcalloc (n_basic_blocks, sizeof (basic_block)); 815132718Skan n_dom_bbs = 0; 816132718Skan 817132718Skan body = get_loop_body (loop); 818132718Skan for (i = 0; i < loop->num_nodes; i++) 819132718Skan { 820132718Skan unsigned nldom; 821132718Skan basic_block *ldom; 822132718Skan 823132718Skan nldom = get_dominated_by (CDI_DOMINATORS, body[i], &ldom); 824132718Skan for (j = 0; j < nldom; j++) 825132718Skan if (!flow_bb_inside_loop_p (loop, ldom[j])) 826132718Skan dom_bbs[n_dom_bbs++] = ldom[j]; 827132718Skan 828132718Skan free (ldom); 829132718Skan } 830132718Skan free (body); 831132718Skan 832132718Skan if (desc->postincr) 833132718Skan { 834132718Skan /* Leave exit in first copy (for explanation why see comment in 835132718Skan unroll_loop_constant_iterations). */ 836132718Skan may_exit_copy = 0; 837132718Skan n_peel = max_unroll - 1; 838132718Skan extra_zero_check = true; 839132718Skan last_may_exit = false; 840132718Skan } 841132718Skan else 842132718Skan { 843132718Skan /* Leave exit in last copy (for explanation why see comment in 844132718Skan unroll_loop_constant_iterations). */ 845132718Skan may_exit_copy = max_unroll; 846132718Skan n_peel = max_unroll; 847132718Skan extra_zero_check = false; 848132718Skan last_may_exit = true; 849132718Skan } 850132718Skan 851132718Skan /* Get expression for number of iterations. */ 852132718Skan start_sequence (); 853132718Skan niter = count_loop_iterations (desc, NULL, NULL); 854132718Skan if (!niter) 855132718Skan abort (); 856132718Skan niter = force_operand (niter, NULL); 857132718Skan 858132718Skan /* Count modulo by ANDing it with max_unroll; we use the fact that 859132718Skan the number of unrollings is a power of two, and thus this is correct 860132718Skan even if there is overflow in the computation. */ 861132718Skan niter = expand_simple_binop (GET_MODE (desc->var), AND, 862132718Skan niter, 863132718Skan GEN_INT (max_unroll), 864132718Skan NULL_RTX, 0, OPTAB_LIB_WIDEN); 865132718Skan 866132718Skan /* For a loop ending with a branch and count for which the increment 867132718Skan of the count register will be discarded, adjust the initialization of 868132718Skan the count register. */ 869132718Skan if ((is_bct = is_bct_cond (BB_END (desc->out_edge->src))) 870132718Skan && (discard_inc = discard_increment (loop))) 871132718Skan { 872132718Skan rtx count, count2, count_unroll_mod; 873132718Skan int count_unroll; 874132718Skan 875132718Skan /* start_sequence (); */ 876132718Skan 877132718Skan count = count_loop_iterations (desc, NULL, NULL); 878132718Skan 879132718Skan count_unroll = loop->lpt_decision.times+1; 880132718Skan 881132718Skan 882132718Skan 883132718Skan count_unroll_mod = GEN_INT (exact_log2 (count_unroll)); 884132718Skan count = expand_simple_binop (GET_MODE (desc->var), LSHIFTRT, 885132718Skan count, count_unroll_mod, 886132718Skan 0, 0, OPTAB_LIB_WIDEN); 887132718Skan 888132718Skan count2 = expand_simple_binop (GET_MODE (desc->var), PLUS, 889132718Skan count, GEN_INT (2), 890132718Skan 0, 0, OPTAB_LIB_WIDEN); 891132718Skan 892132718Skan emit_move_insn (desc->var, count2); 893132718Skan } 894132718Skan 895132718Skan init_code = get_insns (); 896132718Skan end_sequence (); 897132718Skan 898132718Skan /* Precondition the loop. */ 899132718Skan loop_split_edge_with (loop_preheader_edge (loop), init_code); 900132718Skan 901132718Skan remove_edges = xcalloc (max_unroll + n_peel + 1, sizeof (edge)); 902132718Skan n_remove_edges = 0; 903132718Skan 904132718Skan wont_exit = sbitmap_alloc (max_unroll + 2); 905132718Skan 906132718Skan /* Peel the first copy of loop body (almost always we must leave exit test 907132718Skan here; the only exception is when we have extra zero check and the number 908132718Skan of iterations is reliable (i.e. comes out of NE condition). Also record 909132718Skan the place of (possible) extra zero check. */ 910132718Skan sbitmap_zero (wont_exit); 911132718Skan if (extra_zero_check && desc->cond == NE) 912132718Skan SET_BIT (wont_exit, 1); 913132718Skan ezc_swtch = loop_preheader_edge (loop)->src; 914132718Skan if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 915132718Skan loops, 1, 916132718Skan wont_exit, desc->out_edge, remove_edges, &n_remove_edges, 917132718Skan DLTHE_FLAG_UPDATE_FREQ)) 918132718Skan abort (); 919132718Skan 920132718Skan /* Record the place where switch will be built for preconditioning. */ 921132718Skan swtch = loop_split_edge_with (loop_preheader_edge (loop), 922132718Skan NULL_RTX); 923132718Skan 924132718Skan for (i = 0; i < n_peel; i++) 925132718Skan { 926132718Skan /* Peel the copy. */ 927132718Skan sbitmap_zero (wont_exit); 928132718Skan if (i != n_peel - 1 || !last_may_exit) 929132718Skan SET_BIT (wont_exit, 1); 930132718Skan if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 931132718Skan loops, 1, 932132718Skan wont_exit, desc->out_edge, remove_edges, &n_remove_edges, 933132718Skan DLTHE_FLAG_UPDATE_FREQ)) 934132718Skan abort (); 935132718Skan 936132718Skan /* Create item for switch. */ 937132718Skan j = n_peel - i - (extra_zero_check ? 0 : 1); 938132718Skan p = REG_BR_PROB_BASE / (i + 2); 939132718Skan 940132718Skan /* If modulo is zero do not jumo to the header of the unrolled loops. 941132718Skan Jump instead to the last branch and count that precedes it. */ 942132718Skan if (is_bct && discard_inc && (j == 0)) 943132718Skan { 944132718Skan basic_block lastbb = loop_preheader_edge(loop)->src; 945132718Skan rtx split_after; 946132718Skan 947132718Skan /* Skip dummy basic blocks generated during the unrolling. */ 948132718Skan while (!is_bct_cond (BB_END (lastbb))) 949132718Skan lastbb = lastbb->pred->src; 950132718Skan 951132718Skan split_after = PREV_INSN (BB_END (lastbb)); 952132718Skan 953132718Skan preheader = split_loop_bb (lastbb , split_after)->dest; 954132718Skan } 955132718Skan else 956132718Skan preheader = loop_split_edge_with (loop_preheader_edge (loop), 957132718Skan NULL_RTX); 958132718Skan label = block_label (preheader); 959132718Skan start_sequence (); 960132718Skan do_compare_rtx_and_jump (copy_rtx (niter), GEN_INT (j), EQ, 0, 961132718Skan GET_MODE (desc->var), NULL_RTX, NULL_RTX, 962132718Skan label); 963132718Skan jump = get_last_insn (); 964132718Skan JUMP_LABEL (jump) = label; 965132718Skan REG_NOTES (jump) 966132718Skan = gen_rtx_EXPR_LIST (REG_BR_PROB, 967132718Skan GEN_INT (p), REG_NOTES (jump)); 968132718Skan 969132718Skan LABEL_NUSES (label)++; 970132718Skan branch_code = get_insns (); 971132718Skan end_sequence (); 972132718Skan 973132718Skan swtch = loop_split_edge_with (swtch->pred, branch_code); 974132718Skan set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); 975132718Skan swtch->succ->probability = REG_BR_PROB_BASE - p; 976132718Skan e = make_edge (swtch, preheader, 977132718Skan swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP); 978132718Skan e->probability = p; 979132718Skan } 980132718Skan 981132718Skan if (extra_zero_check) 982132718Skan { 983132718Skan /* Add branch for zero iterations. */ 984132718Skan p = REG_BR_PROB_BASE / (max_unroll + 1); 985132718Skan swtch = ezc_swtch; 986132718Skan preheader = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX); 987132718Skan label = block_label (preheader); 988132718Skan start_sequence (); 989132718Skan do_compare_rtx_and_jump (copy_rtx (niter), const0_rtx, EQ, 0, 990132718Skan GET_MODE (desc->var), NULL_RTX, NULL_RTX, 991132718Skan label); 992132718Skan jump = get_last_insn (); 993132718Skan JUMP_LABEL (jump) = label; 994132718Skan REG_NOTES (jump) 995132718Skan = gen_rtx_EXPR_LIST (REG_BR_PROB, 996132718Skan GEN_INT (p), REG_NOTES (jump)); 997132718Skan 998132718Skan LABEL_NUSES (label)++; 999132718Skan branch_code = get_insns (); 1000132718Skan end_sequence (); 1001132718Skan 1002132718Skan swtch = loop_split_edge_with (swtch->succ, branch_code); 1003132718Skan set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); 1004132718Skan swtch->succ->probability = REG_BR_PROB_BASE - p; 1005132718Skan e = make_edge (swtch, preheader, 1006132718Skan swtch->succ->flags & EDGE_IRREDUCIBLE_LOOP); 1007132718Skan e->probability = p; 1008132718Skan } 1009132718Skan 1010132718Skan /* Recount dominators for outer blocks. */ 1011132718Skan iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, n_dom_bbs); 1012132718Skan 1013132718Skan /* And unroll loop. */ 1014132718Skan 1015132718Skan sbitmap_ones (wont_exit); 1016132718Skan RESET_BIT (wont_exit, may_exit_copy); 1017132718Skan 1018132718Skan if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 1019132718Skan loops, max_unroll, 1020132718Skan wont_exit, desc->out_edge, remove_edges, &n_remove_edges, 1021132718Skan DLTHE_FLAG_UPDATE_FREQ)) 1022132718Skan abort (); 1023132718Skan 1024132718Skan free (wont_exit); 1025132718Skan 1026132718Skan /* Expand the branch and count. */ 1027132718Skan if (is_bct) 1028132718Skan for (i = 0; i < n_remove_edges; i++) 1029132718Skan expand_bct (remove_edges[i], discard_inc); 1030132718Skan 1031132718Skan /* Remove the edges. */ 1032132718Skan for (i = 0; i < n_remove_edges; i++) 1033132718Skan remove_path (loops, remove_edges[i]); 1034132718Skan free (remove_edges); 1035132718Skan 1036132718Skan if (rtl_dump_file) 1037132718Skan fprintf (rtl_dump_file, 1038132718Skan ";; Unrolled loop %d times, counting # of iterations in runtime, %i insns\n", 1039132718Skan max_unroll, num_loop_insns (loop)); 1040132718Skan} 1041132718Skan 1042132718Skan/* Decide whether to simply peel LOOP and how much. */ 1043132718Skanstatic void 1044132718Skandecide_peel_simple (struct loop *loop, int flags) 1045132718Skan{ 1046132718Skan unsigned npeel; 1047132718Skan 1048132718Skan if (!(flags & UAP_PEEL)) 1049132718Skan { 1050132718Skan /* We were not asked to, just return back silently. */ 1051132718Skan return; 1052132718Skan } 1053132718Skan 1054132718Skan if (rtl_dump_file) 1055132718Skan fprintf (rtl_dump_file, ";; Considering simply peeling loop\n"); 1056132718Skan 1057132718Skan /* npeel = number of iterations to peel. */ 1058132718Skan npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns; 1059132718Skan if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES)) 1060132718Skan npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES); 1061132718Skan 1062132718Skan /* Skip big loops. */ 1063132718Skan if (!npeel) 1064132718Skan { 1065132718Skan if (rtl_dump_file) 1066132718Skan fprintf (rtl_dump_file, ";; Not considering loop, is too big\n"); 1067132718Skan return; 1068132718Skan } 1069132718Skan 1070132718Skan /* Check for simple loops. */ 1071132718Skan if (!loop->has_desc) 1072132718Skan { 1073132718Skan loop->simple = simple_loop_p (loop, &loop->desc); 1074132718Skan loop->has_desc = 1; 1075132718Skan } 1076132718Skan 1077132718Skan /* Check number of iterations. */ 1078132718Skan if (loop->simple && loop->desc.const_iter) 1079132718Skan { 1080132718Skan if (rtl_dump_file) 1081132718Skan fprintf (rtl_dump_file, ";; Loop iterates constant times\n"); 1082132718Skan return; 1083132718Skan } 1084132718Skan 1085132718Skan /* Do not simply peel loops with branches inside -- it increases number 1086132718Skan of mispredicts. */ 1087132718Skan if (loop->desc.n_branches > 1) 1088132718Skan { 1089132718Skan if (rtl_dump_file) 1090132718Skan fprintf (rtl_dump_file, ";; Not peeling, contains branches\n"); 1091132718Skan return; 1092132718Skan } 1093132718Skan 1094132718Skan if (loop->header->count) 1095132718Skan { 1096132718Skan unsigned niter = expected_loop_iterations (loop); 1097132718Skan if (niter + 1 > npeel) 1098132718Skan { 1099132718Skan if (rtl_dump_file) 1100132718Skan { 1101132718Skan fprintf (rtl_dump_file, ";; Not peeling loop, rolls too much ("); 1102132718Skan fprintf (rtl_dump_file, HOST_WIDEST_INT_PRINT_DEC, (HOST_WIDEST_INT) (niter + 1)); 1103132718Skan fprintf (rtl_dump_file, " iterations > %d [maximum peelings])\n", npeel); 1104132718Skan } 1105132718Skan return; 1106132718Skan } 1107132718Skan npeel = niter + 1; 1108132718Skan } 1109132718Skan else 1110132718Skan { 1111132718Skan /* For now we have no good heuristics to decide whether loop peeling 1112132718Skan will be effective, so disable it. */ 1113132718Skan if (rtl_dump_file) 1114132718Skan fprintf (rtl_dump_file, 1115132718Skan ";; Not peeling loop, no evidence it will be profitable\n"); 1116132718Skan return; 1117132718Skan } 1118132718Skan 1119132718Skan /* Success. */ 1120132718Skan loop->lpt_decision.decision = LPT_PEEL_SIMPLE; 1121132718Skan loop->lpt_decision.times = npeel; 1122132718Skan} 1123132718Skan 1124132718Skan/* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: 1125132718Skan while (cond) 1126132718Skan body; 1127132718Skan 1128132718Skan ==> 1129132718Skan 1130132718Skan if (!cond) goto end; 1131132718Skan body; 1132132718Skan if (!cond) goto end; 1133132718Skan body; 1134132718Skan while (cond) 1135132718Skan body; 1136132718Skan end: ; 1137132718Skan */ 1138132718Skanstatic void 1139132718Skanpeel_loop_simple (struct loops *loops, struct loop *loop) 1140132718Skan{ 1141132718Skan sbitmap wont_exit; 1142132718Skan unsigned npeel = loop->lpt_decision.times; 1143132718Skan 1144132718Skan wont_exit = sbitmap_alloc (npeel + 1); 1145132718Skan sbitmap_zero (wont_exit); 1146132718Skan 1147132718Skan if (!duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), 1148132718Skan loops, npeel, wont_exit, NULL, NULL, NULL, 1149132718Skan DLTHE_FLAG_UPDATE_FREQ)) 1150132718Skan abort (); 1151132718Skan 1152132718Skan free (wont_exit); 1153132718Skan 1154132718Skan if (rtl_dump_file) 1155132718Skan fprintf (rtl_dump_file, ";; Peeling loop %d times\n", npeel); 1156132718Skan} 1157132718Skan 1158132718Skan/* Decide whether to unroll LOOP stupidly and how much. */ 1159132718Skanstatic void 1160132718Skandecide_unroll_stupid (struct loop *loop, int flags) 1161132718Skan{ 1162132718Skan unsigned nunroll, nunroll_by_av, i; 1163132718Skan 1164132718Skan if (!(flags & UAP_UNROLL_ALL)) 1165132718Skan { 1166132718Skan /* We were not asked to, just return back silently. */ 1167132718Skan return; 1168132718Skan } 1169132718Skan 1170132718Skan if (rtl_dump_file) 1171132718Skan fprintf (rtl_dump_file, ";; Considering unrolling loop stupidly\n"); 1172132718Skan 1173132718Skan /* nunroll = total number of copies of the original loop body in 1174132718Skan unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ 1175132718Skan nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; 1176132718Skan nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; 1177132718Skan if (nunroll > nunroll_by_av) 1178132718Skan nunroll = nunroll_by_av; 1179132718Skan if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) 1180132718Skan nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); 1181132718Skan 1182132718Skan /* Skip big loops. */ 1183132718Skan if (nunroll <= 1) 1184132718Skan { 1185132718Skan if (rtl_dump_file) 1186132718Skan fprintf (rtl_dump_file, ";; Not considering loop, is too big\n"); 1187132718Skan return; 1188132718Skan } 1189132718Skan 1190132718Skan /* Check for simple loops. */ 1191132718Skan if (!loop->has_desc) 1192132718Skan { 1193132718Skan loop->simple = simple_loop_p (loop, &loop->desc); 1194132718Skan loop->has_desc = 1; 1195132718Skan } 1196132718Skan 1197132718Skan /* Check simpleness. */ 1198132718Skan if (loop->simple) 1199132718Skan { 1200132718Skan if (rtl_dump_file) 1201132718Skan fprintf (rtl_dump_file, ";; The loop is simple\n"); 1202132718Skan return; 1203132718Skan } 1204132718Skan 1205132718Skan /* Do not unroll loops with branches inside -- it increases number 1206132718Skan of mispredicts. */ 1207132718Skan if (loop->desc.n_branches > 1) 1208132718Skan { 1209132718Skan if (rtl_dump_file) 1210132718Skan fprintf (rtl_dump_file, ";; Not unrolling, contains branches\n"); 1211132718Skan return; 1212132718Skan } 1213132718Skan 1214132718Skan /* If we have profile feedback, check whether the loop rolls. */ 1215132718Skan if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll) 1216132718Skan { 1217132718Skan if (rtl_dump_file) 1218132718Skan fprintf (rtl_dump_file, ";; Not unrolling loop, doesn't roll\n"); 1219132718Skan return; 1220132718Skan } 1221132718Skan 1222132718Skan /* Success. Now force nunroll to be power of 2, as it seems that this 1223132718Skan improves results (partially because of better alignments, partially 1224132718Skan because of some dark magic). */ 1225132718Skan for (i = 1; 2 * i <= nunroll; i *= 2); 1226132718Skan 1227132718Skan loop->lpt_decision.decision = LPT_UNROLL_STUPID; 1228132718Skan loop->lpt_decision.times = i - 1; 1229132718Skan} 1230132718Skan 1231132718Skan/* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: 1232132718Skan while (cond) 1233132718Skan body; 1234132718Skan 1235132718Skan ==> 1236132718Skan 1237132718Skan while (cond) 1238132718Skan { 1239132718Skan body; 1240132718Skan if (!cond) break; 1241132718Skan body; 1242132718Skan if (!cond) break; 1243132718Skan body; 1244132718Skan if (!cond) break; 1245132718Skan body; 1246132718Skan } 1247132718Skan */ 1248132718Skanstatic void 1249132718Skanunroll_loop_stupid (struct loops *loops, struct loop *loop) 1250132718Skan{ 1251132718Skan sbitmap wont_exit; 1252132718Skan unsigned nunroll = loop->lpt_decision.times; 1253132718Skan 1254132718Skan wont_exit = sbitmap_alloc (nunroll + 1); 1255132718Skan sbitmap_zero (wont_exit); 1256132718Skan 1257132718Skan if (!duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), 1258132718Skan loops, nunroll, wont_exit, NULL, NULL, NULL, 1259132718Skan DLTHE_FLAG_UPDATE_FREQ)) 1260132718Skan abort (); 1261132718Skan 1262132718Skan free (wont_exit); 1263132718Skan 1264132718Skan if (rtl_dump_file) 1265132718Skan fprintf (rtl_dump_file, ";; Unrolled loop %d times, %i insns\n", 1266132718Skan nunroll, num_loop_insns (loop)); 1267132718Skan} 1268132718Skan 1269132718Skan/* Expand a bct instruction in a branch and an increment. 1270132718Skan If flag_inc is set, the induction variable does not need to be 1271132718Skan incremented. */ 1272132718Skanvoid expand_bct (edge e, int flag_inc) 1273132718Skan{ 1274132718Skan rtx bct_insn = BB_END (e->src); 1275132718Skan rtx cmp; 1276132718Skan rtx inc; 1277132718Skan rtx seq; 1278132718Skan 1279132718Skan rtx tgt; 1280132718Skan rtx condition; 1281132718Skan rtx label; 1282132718Skan rtx reg; 1283132718Skan rtx jump; 1284132718Skan rtx pattern = PATTERN(bct_insn); 1285132718Skan 1286132718Skan if (!(is_bct_cond(bct_insn))) 1287132718Skan return; 1288132718Skan 1289132718Skan inc = get_var_set_from_bct (bct_insn); 1290132718Skan cmp = XVECEXP (pattern, 0, 0); 1291132718Skan reg = SET_DEST (inc); 1292132718Skan 1293132718Skan start_sequence (); 1294132718Skan if (!flag_inc) 1295132718Skan { 1296132718Skan tgt = force_operand (XEXP (inc, 1), XEXP (inc, 0)); 1297132718Skan if (tgt != XEXP (inc, 0)) 1298132718Skan emit_move_insn (XEXP (inc, 0), tgt); 1299132718Skan } 1300132718Skan 1301132718Skan condition = XEXP (SET_SRC (cmp), 0); 1302132718Skan label = XEXP (SET_SRC (cmp), 1); 1303132718Skan 1304132718Skan do_compare_rtx_and_jump (copy_rtx (reg), XEXP (condition, 1), 1305132718Skan GET_CODE (condition), 0, 1306132718Skan GET_MODE (reg), NULL_RTX, NULL_RTX, 1307132718Skan label); 1308132718Skan jump = get_last_insn (); 1309132718Skan JUMP_LABEL (jump) = label; 1310132718Skan seq = get_insns (); 1311132718Skan end_sequence (); 1312132718Skan emit_insn_after (seq, bct_insn); 1313132718Skan 1314132718Skan delete_insn (bct_insn); 1315132718Skan 1316132718Skan return; 1317132718Skan} 1318132718Skan 1319132718Skan/* Check that the increment of the count register can be discarded. */ 1320132718Skanbool 1321132718Skandiscard_increment (struct loop *loop) 1322132718Skan{ 1323132718Skan struct loop_desc *desc = &loop->desc; 1324132718Skan rtx inc, set_src, reg; 1325132718Skan rtx bct_insn; 1326132718Skan unsigned int i; 1327132718Skan basic_block *body; 1328132718Skan 1329132718Skan bct_insn = BB_END (desc->out_edge->src); 1330132718Skan if (!is_bct_cond (bct_insn)) 1331132718Skan abort(); 1332132718Skan 1333132718Skan inc = get_var_set_from_bct (bct_insn); 1334132718Skan 1335132718Skan /* Check that inc is of the form reg = reg - 1. */ 1336132718Skan reg = SET_DEST (inc); 1337132718Skan set_src = SET_SRC (inc); 1338132718Skan 1339132718Skan if (GET_CODE (set_src) != PLUS) 1340132718Skan return false; 1341132718Skan 1342132718Skan if (!rtx_equal_p (XEXP (set_src, 0), reg)) 1343132718Skan return false; 1344132718Skan 1345132718Skan if (!CONSTANT_P (XEXP (set_src, 1))) 1346132718Skan return false; 1347132718Skan 1348132718Skan if (INTVAL (XEXP (set_src, 1)) != -1) 1349132718Skan return false; 1350132718Skan 1351132718Skan /* We need to check that the register has no other uses beside the branch and 1352132718Skan count. */ 1353132718Skan body = get_loop_body (loop); 1354132718Skan for(i=0; i < loop->num_nodes; i++) 1355132718Skan { 1356132718Skan if (reg_mentioned_p (desc->var, BB_HEAD (body[i]))) 1357132718Skan return false; 1358132718Skan 1359132718Skan if (body[i] != desc->out_edge->src) 1360132718Skan if (reg_mentioned_p (desc->var, BB_END (body[i]))) 1361132718Skan return false; 1362132718Skan 1363132718Skan if (reg_used_between_p (desc->var, BB_HEAD (body[i]), BB_END (body[i]))) 1364132718Skan return false; 1365132718Skan } 1366132718Skan 1367132718Skan /* Check that the branch and count ends the latch. */ 1368132718Skan if (desc->out_edge->src != loop->latch) 1369132718Skan { 1370132718Skan rtx insn; 1371132718Skan 1372132718Skan /* Latch is a dummy block generated by loop-init. */ 1373132718Skan if (BRANCH_EDGE(desc->out_edge->src)->dest != loop->latch) 1374132718Skan return false; 1375132718Skan 1376132718Skan for (insn = BB_HEAD (loop->latch); insn != NEXT_INSN (BB_END (loop->latch)); 1377132718Skan insn = NEXT_INSN (insn)) 1378132718Skan if (INSN_P (insn)) return false; 1379132718Skan } 1380132718Skan 1381132718Skan return true; 1382132718Skan} 1383132718Skan 1384