1169689Skan/* Thread edges through blocks and update the control flow and SSA graphs. 2169689Skan Copyright (C) 2004, 2005, 2006 Free Software Foundation, Inc. 3169689Skan 4169689SkanThis file is part of GCC. 5169689Skan 6169689SkanGCC is free software; you can redistribute it and/or modify 7169689Skanit under the terms of the GNU General Public License as published by 8169689Skanthe Free Software Foundation; either version 2, or (at your option) 9169689Skanany later version. 10169689Skan 11169689SkanGCC is distributed in the hope that it will be useful, 12169689Skanbut WITHOUT ANY WARRANTY; without even the implied warranty of 13169689SkanMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14169689SkanGNU General Public License for more details. 15169689Skan 16169689SkanYou should have received a copy of the GNU General Public License 17169689Skanalong with GCC; see the file COPYING. If not, write to 18169689Skanthe Free Software Foundation, 51 Franklin Street, Fifth Floor, 19169689SkanBoston, MA 02110-1301, USA. */ 20169689Skan 21169689Skan#include "config.h" 22169689Skan#include "system.h" 23169689Skan#include "coretypes.h" 24169689Skan#include "tm.h" 25169689Skan#include "tree.h" 26169689Skan#include "flags.h" 27169689Skan#include "rtl.h" 28169689Skan#include "tm_p.h" 29169689Skan#include "ggc.h" 30169689Skan#include "basic-block.h" 31169689Skan#include "output.h" 32169689Skan#include "expr.h" 33169689Skan#include "function.h" 34169689Skan#include "diagnostic.h" 35169689Skan#include "tree-flow.h" 36169689Skan#include "tree-dump.h" 37169689Skan#include "tree-pass.h" 38169689Skan#include "cfgloop.h" 39169689Skan 40169689Skan/* Given a block B, update the CFG and SSA graph to reflect redirecting 41169689Skan one or more in-edges to B to instead reach the destination of an 42169689Skan out-edge from B while preserving any side effects in B. 43169689Skan 44169689Skan i.e., given A->B and B->C, change A->B to be A->C yet still preserve the 45169689Skan side effects of executing B. 46169689Skan 47169689Skan 1. Make a copy of B (including its outgoing edges and statements). Call 48169689Skan the copy B'. Note B' has no incoming edges or PHIs at this time. 49169689Skan 50169689Skan 2. Remove the control statement at the end of B' and all outgoing edges 51169689Skan except B'->C. 52169689Skan 53169689Skan 3. Add a new argument to each PHI in C with the same value as the existing 54169689Skan argument associated with edge B->C. Associate the new PHI arguments 55169689Skan with the edge B'->C. 56169689Skan 57169689Skan 4. For each PHI in B, find or create a PHI in B' with an identical 58169689Skan PHI_RESULT. Add an argument to the PHI in B' which has the same 59169689Skan value as the PHI in B associated with the edge A->B. Associate 60169689Skan the new argument in the PHI in B' with the edge A->B. 61169689Skan 62169689Skan 5. Change the edge A->B to A->B'. 63169689Skan 64169689Skan 5a. This automatically deletes any PHI arguments associated with the 65169689Skan edge A->B in B. 66169689Skan 67169689Skan 5b. This automatically associates each new argument added in step 4 68169689Skan with the edge A->B'. 69169689Skan 70169689Skan 6. Repeat for other incoming edges into B. 71169689Skan 72169689Skan 7. Put the duplicated resources in B and all the B' blocks into SSA form. 73169689Skan 74169689Skan Note that block duplication can be minimized by first collecting the 75169689Skan the set of unique destination blocks that the incoming edges should 76169689Skan be threaded to. Block duplication can be further minimized by using 77169689Skan B instead of creating B' for one destination if all edges into B are 78169689Skan going to be threaded to a successor of B. 79169689Skan 80169689Skan We further reduce the number of edges and statements we create by 81169689Skan not copying all the outgoing edges and the control statement in 82169689Skan step #1. We instead create a template block without the outgoing 83169689Skan edges and duplicate the template. */ 84169689Skan 85169689Skan 86169689Skan/* Steps #5 and #6 of the above algorithm are best implemented by walking 87169689Skan all the incoming edges which thread to the same destination edge at 88169689Skan the same time. That avoids lots of table lookups to get information 89169689Skan for the destination edge. 90169689Skan 91169689Skan To realize that implementation we create a list of incoming edges 92169689Skan which thread to the same outgoing edge. Thus to implement steps 93169689Skan #5 and #6 we traverse our hash table of outgoing edge information. 94169689Skan For each entry we walk the list of incoming edges which thread to 95169689Skan the current outgoing edge. */ 96169689Skan 97169689Skanstruct el 98169689Skan{ 99169689Skan edge e; 100169689Skan struct el *next; 101169689Skan}; 102169689Skan 103169689Skan/* Main data structure recording information regarding B's duplicate 104169689Skan blocks. */ 105169689Skan 106169689Skan/* We need to efficiently record the unique thread destinations of this 107169689Skan block and specific information associated with those destinations. We 108169689Skan may have many incoming edges threaded to the same outgoing edge. This 109169689Skan can be naturally implemented with a hash table. */ 110169689Skan 111169689Skanstruct redirection_data 112169689Skan{ 113169689Skan /* A duplicate of B with the trailing control statement removed and which 114169689Skan targets a single successor of B. */ 115169689Skan basic_block dup_block; 116169689Skan 117169689Skan /* An outgoing edge from B. DUP_BLOCK will have OUTGOING_EDGE->dest as 118169689Skan its single successor. */ 119169689Skan edge outgoing_edge; 120169689Skan 121169689Skan /* A list of incoming edges which we want to thread to 122169689Skan OUTGOING_EDGE->dest. */ 123169689Skan struct el *incoming_edges; 124169689Skan 125169689Skan /* Flag indicating whether or not we should create a duplicate block 126169689Skan for this thread destination. This is only true if we are threading 127169689Skan all incoming edges and thus are using BB itself as a duplicate block. */ 128169689Skan bool do_not_duplicate; 129169689Skan}; 130169689Skan 131169689Skan/* Main data structure to hold information for duplicates of BB. */ 132169689Skanstatic htab_t redirection_data; 133169689Skan 134169689Skan/* Data structure of information to pass to hash table traversal routines. */ 135169689Skanstruct local_info 136169689Skan{ 137169689Skan /* The current block we are working on. */ 138169689Skan basic_block bb; 139169689Skan 140169689Skan /* A template copy of BB with no outgoing edges or control statement that 141169689Skan we use for creating copies. */ 142169689Skan basic_block template_block; 143169689Skan 144169689Skan /* TRUE if we thread one or more jumps, FALSE otherwise. */ 145169689Skan bool jumps_threaded; 146169689Skan}; 147169689Skan 148169689Skan/* Passes which use the jump threading code register jump threading 149169689Skan opportunities as they are discovered. We keep the registered 150169689Skan jump threading opportunities in this vector as edge pairs 151169689Skan (original_edge, target_edge). */ 152169689SkanDEF_VEC_ALLOC_P(edge,heap); 153169689Skanstatic VEC(edge,heap) *threaded_edges; 154169689Skan 155169689Skan 156169689Skan/* Jump threading statistics. */ 157169689Skan 158169689Skanstruct thread_stats_d 159169689Skan{ 160169689Skan unsigned long num_threaded_edges; 161169689Skan}; 162169689Skan 163169689Skanstruct thread_stats_d thread_stats; 164169689Skan 165169689Skan 166169689Skan/* Remove the last statement in block BB if it is a control statement 167169689Skan Also remove all outgoing edges except the edge which reaches DEST_BB. 168169689Skan If DEST_BB is NULL, then remove all outgoing edges. */ 169169689Skan 170169689Skanstatic void 171169689Skanremove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb) 172169689Skan{ 173169689Skan block_stmt_iterator bsi; 174169689Skan edge e; 175169689Skan edge_iterator ei; 176169689Skan 177169689Skan bsi = bsi_last (bb); 178169689Skan 179169689Skan /* If the duplicate ends with a control statement, then remove it. 180169689Skan 181169689Skan Note that if we are duplicating the template block rather than the 182169689Skan original basic block, then the duplicate might not have any real 183169689Skan statements in it. */ 184169689Skan if (!bsi_end_p (bsi) 185169689Skan && bsi_stmt (bsi) 186169689Skan && (TREE_CODE (bsi_stmt (bsi)) == COND_EXPR 187169689Skan || TREE_CODE (bsi_stmt (bsi)) == GOTO_EXPR 188169689Skan || TREE_CODE (bsi_stmt (bsi)) == SWITCH_EXPR)) 189169689Skan bsi_remove (&bsi, true); 190169689Skan 191169689Skan for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); ) 192169689Skan { 193169689Skan if (e->dest != dest_bb) 194169689Skan remove_edge (e); 195169689Skan else 196169689Skan ei_next (&ei); 197169689Skan } 198169689Skan} 199169689Skan 200169689Skan/* Create a duplicate of BB which only reaches the destination of the edge 201169689Skan stored in RD. Record the duplicate block in RD. */ 202169689Skan 203169689Skanstatic void 204169689Skancreate_block_for_threading (basic_block bb, struct redirection_data *rd) 205169689Skan{ 206169689Skan /* We can use the generic block duplication code and simply remove 207169689Skan the stuff we do not need. */ 208169689Skan rd->dup_block = duplicate_block (bb, NULL, NULL); 209169689Skan 210169689Skan /* Zero out the profile, since the block is unreachable for now. */ 211169689Skan rd->dup_block->frequency = 0; 212169689Skan rd->dup_block->count = 0; 213169689Skan 214169689Skan /* The call to duplicate_block will copy everything, including the 215169689Skan useless COND_EXPR or SWITCH_EXPR at the end of BB. We just remove 216169689Skan the useless COND_EXPR or SWITCH_EXPR here rather than having a 217169689Skan specialized block copier. We also remove all outgoing edges 218169689Skan from the duplicate block. The appropriate edge will be created 219169689Skan later. */ 220169689Skan remove_ctrl_stmt_and_useless_edges (rd->dup_block, NULL); 221169689Skan} 222169689Skan 223169689Skan/* Hashing and equality routines for our hash table. */ 224169689Skanstatic hashval_t 225169689Skanredirection_data_hash (const void *p) 226169689Skan{ 227169689Skan edge e = ((struct redirection_data *)p)->outgoing_edge; 228169689Skan return e->dest->index; 229169689Skan} 230169689Skan 231169689Skanstatic int 232169689Skanredirection_data_eq (const void *p1, const void *p2) 233169689Skan{ 234169689Skan edge e1 = ((struct redirection_data *)p1)->outgoing_edge; 235169689Skan edge e2 = ((struct redirection_data *)p2)->outgoing_edge; 236169689Skan 237169689Skan return e1 == e2; 238169689Skan} 239169689Skan 240169689Skan/* Given an outgoing edge E lookup and return its entry in our hash table. 241169689Skan 242169689Skan If INSERT is true, then we insert the entry into the hash table if 243169689Skan it is not already present. INCOMING_EDGE is added to the list of incoming 244169689Skan edges associated with E in the hash table. */ 245169689Skan 246169689Skanstatic struct redirection_data * 247169689Skanlookup_redirection_data (edge e, edge incoming_edge, enum insert_option insert) 248169689Skan{ 249169689Skan void **slot; 250169689Skan struct redirection_data *elt; 251169689Skan 252169689Skan /* Build a hash table element so we can see if E is already 253169689Skan in the table. */ 254169689Skan elt = XNEW (struct redirection_data); 255169689Skan elt->outgoing_edge = e; 256169689Skan elt->dup_block = NULL; 257169689Skan elt->do_not_duplicate = false; 258169689Skan elt->incoming_edges = NULL; 259169689Skan 260169689Skan slot = htab_find_slot (redirection_data, elt, insert); 261169689Skan 262169689Skan /* This will only happen if INSERT is false and the entry is not 263169689Skan in the hash table. */ 264169689Skan if (slot == NULL) 265169689Skan { 266169689Skan free (elt); 267169689Skan return NULL; 268169689Skan } 269169689Skan 270169689Skan /* This will only happen if E was not in the hash table and 271169689Skan INSERT is true. */ 272169689Skan if (*slot == NULL) 273169689Skan { 274169689Skan *slot = (void *)elt; 275169689Skan elt->incoming_edges = XNEW (struct el); 276169689Skan elt->incoming_edges->e = incoming_edge; 277169689Skan elt->incoming_edges->next = NULL; 278169689Skan return elt; 279169689Skan } 280169689Skan /* E was in the hash table. */ 281169689Skan else 282169689Skan { 283169689Skan /* Free ELT as we do not need it anymore, we will extract the 284169689Skan relevant entry from the hash table itself. */ 285169689Skan free (elt); 286169689Skan 287169689Skan /* Get the entry stored in the hash table. */ 288169689Skan elt = (struct redirection_data *) *slot; 289169689Skan 290169689Skan /* If insertion was requested, then we need to add INCOMING_EDGE 291169689Skan to the list of incoming edges associated with E. */ 292169689Skan if (insert) 293169689Skan { 294169689Skan struct el *el = XNEW (struct el); 295169689Skan el->next = elt->incoming_edges; 296169689Skan el->e = incoming_edge; 297169689Skan elt->incoming_edges = el; 298169689Skan } 299169689Skan 300169689Skan return elt; 301169689Skan } 302169689Skan} 303169689Skan 304169689Skan/* Given a duplicate block and its single destination (both stored 305169689Skan in RD). Create an edge between the duplicate and its single 306169689Skan destination. 307169689Skan 308169689Skan Add an additional argument to any PHI nodes at the single 309169689Skan destination. */ 310169689Skan 311169689Skanstatic void 312169689Skancreate_edge_and_update_destination_phis (struct redirection_data *rd) 313169689Skan{ 314169689Skan edge e = make_edge (rd->dup_block, rd->outgoing_edge->dest, EDGE_FALLTHRU); 315169689Skan tree phi; 316169689Skan 317169689Skan e->probability = REG_BR_PROB_BASE; 318169689Skan e->count = rd->dup_block->count; 319169689Skan 320169689Skan /* If there are any PHI nodes at the destination of the outgoing edge 321169689Skan from the duplicate block, then we will need to add a new argument 322169689Skan to them. The argument should have the same value as the argument 323169689Skan associated with the outgoing edge stored in RD. */ 324169689Skan for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi)) 325169689Skan { 326169689Skan int indx = rd->outgoing_edge->dest_idx; 327169689Skan add_phi_arg (phi, PHI_ARG_DEF (phi, indx), e); 328169689Skan } 329169689Skan} 330169689Skan 331169689Skan/* Hash table traversal callback routine to create duplicate blocks. */ 332169689Skan 333169689Skanstatic int 334169689Skancreate_duplicates (void **slot, void *data) 335169689Skan{ 336169689Skan struct redirection_data *rd = (struct redirection_data *) *slot; 337169689Skan struct local_info *local_info = (struct local_info *)data; 338169689Skan 339169689Skan /* If this entry should not have a duplicate created, then there's 340169689Skan nothing to do. */ 341169689Skan if (rd->do_not_duplicate) 342169689Skan return 1; 343169689Skan 344169689Skan /* Create a template block if we have not done so already. Otherwise 345169689Skan use the template to create a new block. */ 346169689Skan if (local_info->template_block == NULL) 347169689Skan { 348169689Skan create_block_for_threading (local_info->bb, rd); 349169689Skan local_info->template_block = rd->dup_block; 350169689Skan 351169689Skan /* We do not create any outgoing edges for the template. We will 352169689Skan take care of that in a later traversal. That way we do not 353169689Skan create edges that are going to just be deleted. */ 354169689Skan } 355169689Skan else 356169689Skan { 357169689Skan create_block_for_threading (local_info->template_block, rd); 358169689Skan 359169689Skan /* Go ahead and wire up outgoing edges and update PHIs for the duplicate 360169689Skan block. */ 361169689Skan create_edge_and_update_destination_phis (rd); 362169689Skan } 363169689Skan 364169689Skan /* Keep walking the hash table. */ 365169689Skan return 1; 366169689Skan} 367169689Skan 368169689Skan/* We did not create any outgoing edges for the template block during 369169689Skan block creation. This hash table traversal callback creates the 370169689Skan outgoing edge for the template block. */ 371169689Skan 372169689Skanstatic int 373169689Skanfixup_template_block (void **slot, void *data) 374169689Skan{ 375169689Skan struct redirection_data *rd = (struct redirection_data *) *slot; 376169689Skan struct local_info *local_info = (struct local_info *)data; 377169689Skan 378169689Skan /* If this is the template block, then create its outgoing edges 379169689Skan and halt the hash table traversal. */ 380169689Skan if (rd->dup_block && rd->dup_block == local_info->template_block) 381169689Skan { 382169689Skan create_edge_and_update_destination_phis (rd); 383169689Skan return 0; 384169689Skan } 385169689Skan 386169689Skan return 1; 387169689Skan} 388169689Skan 389169689Skan/* Not all jump threading requests are useful. In particular some 390169689Skan jump threading requests can create irreducible regions which are 391169689Skan undesirable. 392169689Skan 393169689Skan This routine will examine the BB's incoming edges for jump threading 394169689Skan requests which, if acted upon, would create irreducible regions. Any 395169689Skan such jump threading requests found will be pruned away. */ 396169689Skan 397169689Skanstatic void 398169689Skanprune_undesirable_thread_requests (basic_block bb) 399169689Skan{ 400169689Skan edge e; 401169689Skan edge_iterator ei; 402169689Skan bool may_create_irreducible_region = false; 403169689Skan unsigned int num_outgoing_edges_into_loop = 0; 404169689Skan 405169689Skan /* For the heuristics below, we need to know if BB has more than 406169689Skan one outgoing edge into a loop. */ 407169689Skan FOR_EACH_EDGE (e, ei, bb->succs) 408169689Skan num_outgoing_edges_into_loop += ((e->flags & EDGE_LOOP_EXIT) == 0); 409169689Skan 410169689Skan if (num_outgoing_edges_into_loop > 1) 411169689Skan { 412169689Skan edge backedge = NULL; 413169689Skan 414169689Skan /* Consider the effect of threading the edge (0, 1) to 2 on the left 415169689Skan CFG to produce the right CFG: 416169689Skan 417169689Skan 418169689Skan 0 0 419169689Skan | | 420169689Skan 1<--+ 2<--------+ 421169689Skan / \ | | | 422169689Skan 2 3 | 4<----+ | 423169689Skan \ / | / \ | | 424169689Skan 4---+ E 1-- | --+ 425169689Skan | | | 426169689Skan E 3---+ 427169689Skan 428169689Skan 429169689Skan Threading the (0, 1) edge to 2 effectively creates two loops 430169689Skan (2, 4, 1) and (4, 1, 3) which are neither disjoint nor nested. 431169689Skan This is not good. 432169689Skan 433169689Skan However, we do need to be able to thread (0, 1) to 2 or 3 434169689Skan in the left CFG below (which creates the middle and right 435169689Skan CFGs with nested loops). 436169689Skan 437169689Skan 0 0 0 438169689Skan | | | 439169689Skan 1<--+ 2<----+ 3<-+<-+ 440169689Skan /| | | | | | | 441169689Skan 2 | | 3<-+ | 1--+ | 442169689Skan \| | | | | | | 443169689Skan 3---+ 1--+--+ 2-----+ 444169689Skan 445169689Skan 446169689Skan A safe heuristic appears to be to only allow threading if BB 447169689Skan has a single incoming backedge from one of its direct successors. */ 448169689Skan 449169689Skan FOR_EACH_EDGE (e, ei, bb->preds) 450169689Skan { 451169689Skan if (e->flags & EDGE_DFS_BACK) 452169689Skan { 453169689Skan if (backedge) 454169689Skan { 455169689Skan backedge = NULL; 456169689Skan break; 457169689Skan } 458169689Skan else 459169689Skan { 460169689Skan backedge = e; 461169689Skan } 462169689Skan } 463169689Skan } 464169689Skan 465169689Skan if (backedge && find_edge (bb, backedge->src)) 466169689Skan ; 467169689Skan else 468169689Skan may_create_irreducible_region = true; 469169689Skan } 470169689Skan else 471169689Skan { 472169689Skan edge dest = NULL; 473169689Skan 474169689Skan /* If we thread across the loop entry block (BB) into the 475169689Skan loop and BB is still reached from outside the loop, then 476169689Skan we would create an irreducible CFG. Consider the effect 477169689Skan of threading the edge (1, 4) to 5 on the left CFG to produce 478169689Skan the right CFG 479169689Skan 480169689Skan 0 0 481169689Skan / \ / \ 482169689Skan 1 2 1 2 483169689Skan \ / | | 484169689Skan 4<----+ 5<->4 485169689Skan / \ | | 486169689Skan E 5---+ E 487169689Skan 488169689Skan 489169689Skan Threading the (1, 4) edge to 5 creates two entry points 490169689Skan into the loop (4, 5) (one from block 1, the other from 491169689Skan block 2). A classic irreducible region. 492169689Skan 493169689Skan So look at all of BB's incoming edges which are not 494169689Skan backedges and which are not threaded to the loop exit. 495169689Skan If that subset of incoming edges do not all thread 496169689Skan to the same block, then threading any of them will create 497169689Skan an irreducible region. */ 498169689Skan 499169689Skan FOR_EACH_EDGE (e, ei, bb->preds) 500169689Skan { 501169689Skan edge e2; 502169689Skan 503169689Skan /* We ignore back edges for now. This may need refinement 504169689Skan as threading a backedge creates an inner loop which 505169689Skan we would need to verify has a single entry point. 506169689Skan 507169689Skan If all backedges thread to new locations, then this 508169689Skan block will no longer have incoming backedges and we 509169689Skan need not worry about creating irreducible regions 510169689Skan by threading through BB. I don't think this happens 511169689Skan enough in practice to worry about it. */ 512169689Skan if (e->flags & EDGE_DFS_BACK) 513169689Skan continue; 514169689Skan 515169689Skan /* If the incoming edge threads to the loop exit, then it 516169689Skan is clearly safe. */ 517169689Skan e2 = e->aux; 518169689Skan if (e2 && (e2->flags & EDGE_LOOP_EXIT)) 519169689Skan continue; 520169689Skan 521169689Skan /* E enters the loop header and is not threaded. We can 522169689Skan not allow any other incoming edges to thread into 523169689Skan the loop as that would create an irreducible region. */ 524169689Skan if (!e2) 525169689Skan { 526169689Skan may_create_irreducible_region = true; 527169689Skan break; 528169689Skan } 529169689Skan 530169689Skan /* We know that this incoming edge threads to a block inside 531169689Skan the loop. This edge must thread to the same target in 532169689Skan the loop as any previously seen threaded edges. Otherwise 533169689Skan we will create an irreducible region. */ 534169689Skan if (!dest) 535169689Skan dest = e2; 536169689Skan else if (e2 != dest) 537169689Skan { 538169689Skan may_create_irreducible_region = true; 539169689Skan break; 540169689Skan } 541169689Skan } 542169689Skan } 543169689Skan 544169689Skan /* If we might create an irreducible region, then cancel any of 545169689Skan the jump threading requests for incoming edges which are 546169689Skan not backedges and which do not thread to the exit block. */ 547169689Skan if (may_create_irreducible_region) 548169689Skan { 549169689Skan FOR_EACH_EDGE (e, ei, bb->preds) 550169689Skan { 551169689Skan edge e2; 552169689Skan 553169689Skan /* Ignore back edges. */ 554169689Skan if (e->flags & EDGE_DFS_BACK) 555169689Skan continue; 556169689Skan 557169689Skan e2 = e->aux; 558169689Skan 559169689Skan /* If this incoming edge was not threaded, then there is 560169689Skan nothing to do. */ 561169689Skan if (!e2) 562169689Skan continue; 563169689Skan 564169689Skan /* If this incoming edge threaded to the loop exit, 565169689Skan then it can be ignored as it is safe. */ 566169689Skan if (e2->flags & EDGE_LOOP_EXIT) 567169689Skan continue; 568169689Skan 569169689Skan if (e2) 570169689Skan { 571169689Skan /* This edge threaded into the loop and the jump thread 572169689Skan request must be cancelled. */ 573169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 574169689Skan fprintf (dump_file, " Not threading jump %d --> %d to %d\n", 575169689Skan e->src->index, e->dest->index, e2->dest->index); 576169689Skan e->aux = NULL; 577169689Skan } 578169689Skan } 579169689Skan } 580169689Skan} 581169689Skan 582169689Skan/* Hash table traversal callback to redirect each incoming edge 583169689Skan associated with this hash table element to its new destination. */ 584169689Skan 585169689Skanstatic int 586169689Skanredirect_edges (void **slot, void *data) 587169689Skan{ 588169689Skan struct redirection_data *rd = (struct redirection_data *) *slot; 589169689Skan struct local_info *local_info = (struct local_info *)data; 590169689Skan struct el *next, *el; 591169689Skan 592169689Skan /* Walk over all the incoming edges associated associated with this 593169689Skan hash table entry. */ 594169689Skan for (el = rd->incoming_edges; el; el = next) 595169689Skan { 596169689Skan edge e = el->e; 597169689Skan 598169689Skan /* Go ahead and free this element from the list. Doing this now 599169689Skan avoids the need for another list walk when we destroy the hash 600169689Skan table. */ 601169689Skan next = el->next; 602169689Skan free (el); 603169689Skan 604169689Skan /* Go ahead and clear E->aux. It's not needed anymore and failure 605169689Skan to clear it will cause all kinds of unpleasant problems later. */ 606169689Skan e->aux = NULL; 607169689Skan 608169689Skan thread_stats.num_threaded_edges++; 609169689Skan 610169689Skan if (rd->dup_block) 611169689Skan { 612169689Skan edge e2; 613169689Skan 614169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 615169689Skan fprintf (dump_file, " Threaded jump %d --> %d to %d\n", 616169689Skan e->src->index, e->dest->index, rd->dup_block->index); 617169689Skan 618169689Skan rd->dup_block->count += e->count; 619169689Skan rd->dup_block->frequency += EDGE_FREQUENCY (e); 620169689Skan EDGE_SUCC (rd->dup_block, 0)->count += e->count; 621169689Skan /* Redirect the incoming edge to the appropriate duplicate 622169689Skan block. */ 623169689Skan e2 = redirect_edge_and_branch (e, rd->dup_block); 624169689Skan flush_pending_stmts (e2); 625169689Skan 626169689Skan if ((dump_file && (dump_flags & TDF_DETAILS)) 627169689Skan && e->src != e2->src) 628169689Skan fprintf (dump_file, " basic block %d created\n", e2->src->index); 629169689Skan } 630169689Skan else 631169689Skan { 632169689Skan if (dump_file && (dump_flags & TDF_DETAILS)) 633169689Skan fprintf (dump_file, " Threaded jump %d --> %d to %d\n", 634169689Skan e->src->index, e->dest->index, local_info->bb->index); 635169689Skan 636169689Skan /* We are using BB as the duplicate. Remove the unnecessary 637169689Skan outgoing edges and statements from BB. */ 638169689Skan remove_ctrl_stmt_and_useless_edges (local_info->bb, 639169689Skan rd->outgoing_edge->dest); 640169689Skan 641169689Skan /* And fixup the flags on the single remaining edge. */ 642169689Skan single_succ_edge (local_info->bb)->flags 643169689Skan &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL); 644169689Skan single_succ_edge (local_info->bb)->flags |= EDGE_FALLTHRU; 645169689Skan } 646169689Skan } 647169689Skan 648169689Skan /* Indicate that we actually threaded one or more jumps. */ 649169689Skan if (rd->incoming_edges) 650169689Skan local_info->jumps_threaded = true; 651169689Skan 652169689Skan return 1; 653169689Skan} 654169689Skan 655169689Skan/* Return true if this block has no executable statements other than 656169689Skan a simple ctrl flow instruction. When the number of outgoing edges 657169689Skan is one, this is equivalent to a "forwarder" block. */ 658169689Skan 659169689Skanstatic bool 660169689Skanredirection_block_p (basic_block bb) 661169689Skan{ 662169689Skan block_stmt_iterator bsi; 663169689Skan 664169689Skan /* Advance to the first executable statement. */ 665169689Skan bsi = bsi_start (bb); 666169689Skan while (!bsi_end_p (bsi) 667169689Skan && (TREE_CODE (bsi_stmt (bsi)) == LABEL_EXPR 668169689Skan || IS_EMPTY_STMT (bsi_stmt (bsi)))) 669169689Skan bsi_next (&bsi); 670169689Skan 671169689Skan /* Check if this is an empty block. */ 672169689Skan if (bsi_end_p (bsi)) 673169689Skan return true; 674169689Skan 675169689Skan /* Test that we've reached the terminating control statement. */ 676169689Skan return bsi_stmt (bsi) 677169689Skan && (TREE_CODE (bsi_stmt (bsi)) == COND_EXPR 678169689Skan || TREE_CODE (bsi_stmt (bsi)) == GOTO_EXPR 679169689Skan || TREE_CODE (bsi_stmt (bsi)) == SWITCH_EXPR); 680169689Skan} 681169689Skan 682169689Skan/* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB 683169689Skan is reached via one or more specific incoming edges, we know which 684169689Skan outgoing edge from BB will be traversed. 685169689Skan 686169689Skan We want to redirect those incoming edges to the target of the 687169689Skan appropriate outgoing edge. Doing so avoids a conditional branch 688169689Skan and may expose new optimization opportunities. Note that we have 689169689Skan to update dominator tree and SSA graph after such changes. 690169689Skan 691169689Skan The key to keeping the SSA graph update manageable is to duplicate 692169689Skan the side effects occurring in BB so that those side effects still 693169689Skan occur on the paths which bypass BB after redirecting edges. 694169689Skan 695169689Skan We accomplish this by creating duplicates of BB and arranging for 696169689Skan the duplicates to unconditionally pass control to one specific 697169689Skan successor of BB. We then revector the incoming edges into BB to 698169689Skan the appropriate duplicate of BB. 699169689Skan 700169689Skan BB and its duplicates will have assignments to the same set of 701169689Skan SSA_NAMEs. Right now, we just call into update_ssa to update the 702169689Skan SSA graph for those names. 703169689Skan 704169689Skan We are also going to experiment with a true incremental update 705169689Skan scheme for the duplicated resources. One of the interesting 706169689Skan properties we can exploit here is that all the resources set 707169689Skan in BB will have the same IDFS, so we have one IDFS computation 708169689Skan per block with incoming threaded edges, which can lower the 709169689Skan cost of the true incremental update algorithm. */ 710169689Skan 711169689Skanstatic bool 712169689Skanthread_block (basic_block bb) 713169689Skan{ 714169689Skan /* E is an incoming edge into BB that we may or may not want to 715169689Skan redirect to a duplicate of BB. */ 716169689Skan edge e; 717169689Skan edge_iterator ei; 718169689Skan struct local_info local_info; 719169689Skan 720169689Skan /* FOUND_BACKEDGE indicates that we found an incoming backedge 721169689Skan into BB, in which case we may ignore certain jump threads 722169689Skan to avoid creating irreducible regions. */ 723169689Skan bool found_backedge = false; 724169689Skan 725169689Skan /* ALL indicates whether or not all incoming edges into BB should 726169689Skan be threaded to a duplicate of BB. */ 727169689Skan bool all = true; 728169689Skan 729169689Skan /* If optimizing for size, only thread this block if we don't have 730169689Skan to duplicate it or it's an otherwise empty redirection block. */ 731169689Skan if (optimize_size 732169689Skan && EDGE_COUNT (bb->preds) > 1 733169689Skan && !redirection_block_p (bb)) 734169689Skan { 735169689Skan FOR_EACH_EDGE (e, ei, bb->preds) 736169689Skan e->aux = NULL; 737169689Skan return false; 738169689Skan } 739169689Skan 740169689Skan /* To avoid scanning a linear array for the element we need we instead 741169689Skan use a hash table. For normal code there should be no noticeable 742169689Skan difference. However, if we have a block with a large number of 743169689Skan incoming and outgoing edges such linear searches can get expensive. */ 744169689Skan redirection_data = htab_create (EDGE_COUNT (bb->succs), 745169689Skan redirection_data_hash, 746169689Skan redirection_data_eq, 747169689Skan free); 748169689Skan 749169689Skan FOR_EACH_EDGE (e, ei, bb->preds) 750169689Skan found_backedge |= ((e->flags & EDGE_DFS_BACK) != 0); 751169689Skan 752169689Skan /* If BB has incoming backedges, then threading across BB might 753169689Skan introduce an irreducible region, which would be undesirable 754169689Skan as that inhibits various optimizations later. Prune away 755169689Skan any jump threading requests which we know will result in 756169689Skan an irreducible region. */ 757169689Skan if (found_backedge) 758169689Skan prune_undesirable_thread_requests (bb); 759169689Skan 760169689Skan /* Record each unique threaded destination into a hash table for 761169689Skan efficient lookups. */ 762169689Skan FOR_EACH_EDGE (e, ei, bb->preds) 763169689Skan { 764169689Skan if (!e->aux) 765169689Skan { 766169689Skan all = false; 767169689Skan } 768169689Skan else 769169689Skan { 770169689Skan edge e2 = e->aux; 771169689Skan update_bb_profile_for_threading (e->dest, EDGE_FREQUENCY (e), 772169689Skan e->count, e->aux); 773169689Skan 774169689Skan /* Insert the outgoing edge into the hash table if it is not 775169689Skan already in the hash table. */ 776169689Skan lookup_redirection_data (e2, e, INSERT); 777169689Skan } 778169689Skan } 779169689Skan 780169689Skan /* If we are going to thread all incoming edges to an outgoing edge, then 781169689Skan BB will become unreachable. Rather than just throwing it away, use 782169689Skan it for one of the duplicates. Mark the first incoming edge with the 783169689Skan DO_NOT_DUPLICATE attribute. */ 784169689Skan if (all) 785169689Skan { 786169689Skan edge e = EDGE_PRED (bb, 0)->aux; 787169689Skan lookup_redirection_data (e, NULL, NO_INSERT)->do_not_duplicate = true; 788169689Skan } 789169689Skan 790169689Skan /* Now create duplicates of BB. 791169689Skan 792169689Skan Note that for a block with a high outgoing degree we can waste 793169689Skan a lot of time and memory creating and destroying useless edges. 794169689Skan 795169689Skan So we first duplicate BB and remove the control structure at the 796169689Skan tail of the duplicate as well as all outgoing edges from the 797169689Skan duplicate. We then use that duplicate block as a template for 798169689Skan the rest of the duplicates. */ 799169689Skan local_info.template_block = NULL; 800169689Skan local_info.bb = bb; 801169689Skan local_info.jumps_threaded = false; 802169689Skan htab_traverse (redirection_data, create_duplicates, &local_info); 803169689Skan 804169689Skan /* The template does not have an outgoing edge. Create that outgoing 805169689Skan edge and update PHI nodes as the edge's target as necessary. 806169689Skan 807169689Skan We do this after creating all the duplicates to avoid creating 808169689Skan unnecessary edges. */ 809169689Skan htab_traverse (redirection_data, fixup_template_block, &local_info); 810169689Skan 811169689Skan /* The hash table traversals above created the duplicate blocks (and the 812169689Skan statements within the duplicate blocks). This loop creates PHI nodes for 813169689Skan the duplicated blocks and redirects the incoming edges into BB to reach 814169689Skan the duplicates of BB. */ 815169689Skan htab_traverse (redirection_data, redirect_edges, &local_info); 816169689Skan 817169689Skan /* Done with this block. Clear REDIRECTION_DATA. */ 818169689Skan htab_delete (redirection_data); 819169689Skan redirection_data = NULL; 820169689Skan 821169689Skan /* Indicate to our caller whether or not any jumps were threaded. */ 822169689Skan return local_info.jumps_threaded; 823169689Skan} 824169689Skan 825169689Skan/* Walk through the registered jump threads and convert them into a 826169689Skan form convenient for this pass. 827169689Skan 828169689Skan Any block which has incoming edges threaded to outgoing edges 829169689Skan will have its entry in THREADED_BLOCK set. 830169689Skan 831169689Skan Any threaded edge will have its new outgoing edge stored in the 832169689Skan original edge's AUX field. 833169689Skan 834169689Skan This form avoids the need to walk all the edges in the CFG to 835169689Skan discover blocks which need processing and avoids unnecessary 836169689Skan hash table lookups to map from threaded edge to new target. */ 837169689Skan 838169689Skanstatic void 839169689Skanmark_threaded_blocks (bitmap threaded_blocks) 840169689Skan{ 841169689Skan unsigned int i; 842169689Skan 843169689Skan for (i = 0; i < VEC_length (edge, threaded_edges); i += 2) 844169689Skan { 845169689Skan edge e = VEC_index (edge, threaded_edges, i); 846169689Skan edge e2 = VEC_index (edge, threaded_edges, i + 1); 847169689Skan 848169689Skan e->aux = e2; 849169689Skan bitmap_set_bit (threaded_blocks, e->dest->index); 850169689Skan } 851169689Skan} 852169689Skan 853169689Skan 854169689Skan/* Walk through all blocks and thread incoming edges to the appropriate 855169689Skan outgoing edge for each edge pair recorded in THREADED_EDGES. 856169689Skan 857169689Skan It is the caller's responsibility to fix the dominance information 858169689Skan and rewrite duplicated SSA_NAMEs back into SSA form. 859169689Skan 860169689Skan Returns true if one or more edges were threaded, false otherwise. */ 861169689Skan 862169689Skanbool 863169689Skanthread_through_all_blocks (void) 864169689Skan{ 865169689Skan bool retval = false; 866169689Skan unsigned int i; 867169689Skan bitmap_iterator bi; 868169689Skan bitmap threaded_blocks; 869169689Skan 870169689Skan if (threaded_edges == NULL) 871169689Skan return false; 872169689Skan 873169689Skan threaded_blocks = BITMAP_ALLOC (NULL); 874169689Skan memset (&thread_stats, 0, sizeof (thread_stats)); 875169689Skan 876169689Skan mark_threaded_blocks (threaded_blocks); 877169689Skan 878169689Skan EXECUTE_IF_SET_IN_BITMAP (threaded_blocks, 0, i, bi) 879169689Skan { 880169689Skan basic_block bb = BASIC_BLOCK (i); 881169689Skan 882169689Skan if (EDGE_COUNT (bb->preds) > 0) 883169689Skan retval |= thread_block (bb); 884169689Skan } 885169689Skan 886169689Skan if (dump_file && (dump_flags & TDF_STATS)) 887169689Skan fprintf (dump_file, "\nJumps threaded: %lu\n", 888169689Skan thread_stats.num_threaded_edges); 889169689Skan 890169689Skan BITMAP_FREE (threaded_blocks); 891169689Skan threaded_blocks = NULL; 892169689Skan VEC_free (edge, heap, threaded_edges); 893169689Skan threaded_edges = NULL; 894169689Skan return retval; 895169689Skan} 896169689Skan 897169689Skan/* Register a jump threading opportunity. We queue up all the jump 898169689Skan threading opportunities discovered by a pass and update the CFG 899169689Skan and SSA form all at once. 900169689Skan 901169689Skan E is the edge we can thread, E2 is the new target edge. ie, we 902169689Skan are effectively recording that E->dest can be changed to E2->dest 903169689Skan after fixing the SSA graph. */ 904169689Skan 905169689Skanvoid 906169689Skanregister_jump_thread (edge e, edge e2) 907169689Skan{ 908169689Skan if (threaded_edges == NULL) 909169689Skan threaded_edges = VEC_alloc (edge, heap, 10); 910169689Skan 911169689Skan VEC_safe_push (edge, heap, threaded_edges, e); 912169689Skan VEC_safe_push (edge, heap, threaded_edges, e2); 913169689Skan} 914