bb-reorder.c revision 285830
19313Ssos/* Basic block reordering routines for the GNU compiler. 29313Ssos Copyright (C) 2000, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. 39313Ssos 49313Ssos This file is part of GCC. 59313Ssos 69313Ssos GCC is free software; you can redistribute it and/or modify it 79313Ssos under the terms of the GNU General Public License as published by 89313Ssos the Free Software Foundation; either version 2, or (at your option) 914331Speter any later version. 109313Ssos 119313Ssos GCC is distributed in the hope that it will be useful, but WITHOUT 129313Ssos ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 139313Ssos or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 149313Ssos License for more details. 159313Ssos 169313Ssos You should have received a copy of the GNU General Public License 179313Ssos along with GCC; see the file COPYING. If not, write to the Free 189313Ssos Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 199313Ssos 02110-1301, USA. */ 209313Ssos 219313Ssos/* This (greedy) algorithm constructs traces in several rounds. 229313Ssos The construction starts from "seeds". The seed for the first round 239313Ssos is the entry point of function. When there are more than one seed 249313Ssos that one is selected first that has the lowest key in the heap 259313Ssos (see function bb_to_key). Then the algorithm repeatedly adds the most 269313Ssos probable successor to the end of a trace. Finally it connects the traces. 279313Ssos 2850477Speter There are two parameters: Branch Threshold and Exec Threshold. 299313Ssos If the edge to a successor of the actual basic block is lower than 309313Ssos Branch Threshold or the frequency of the successor is lower than 3149842Smarcel Exec Threshold the successor will be the seed in one of the next rounds. 3249842Smarcel Each round has these parameters lower than the previous one. 339313Ssos The last round has to have these parameters set to zero 349313Ssos so that the remaining blocks are picked up. 3512458Sbde 3612458Sbde The algorithm selects the most probable successor from all unvisited 379313Ssos successors and successors that have been added to this trace. 389313Ssos The other successors (that has not been "sent" to the next round) will be 3924131Sbde other seeds for this round and the secondary traces will start in them. 409313Ssos If the successor has not been visited in this trace it is added to the trace 419313Ssos (however, there is some heuristic for simple branches). 429313Ssos If the successor has been visited in this trace the loop has been found. 439313Ssos If the loop has many iterations the loop is rotated so that the 449313Ssos source block of the most probable edge going out from the loop 4512458Sbde is the last block of the trace. 4641931Sjulian If the loop has few iterations and there is no edge from the last block of 479313Ssos the loop going out from loop the loop header is duplicated. 489313Ssos Finally, the construction of the trace is terminated. 4914331Speter 509313Ssos When connecting traces it first checks whether there is an edge from the 5112652Sbde last block of one trace to the first block of another trace. 5212689Speter When there are still some unconnected traces it checks whether there exists 5312458Sbde a basic block BB such that BB is a successor of the last bb of one trace 5412689Speter and BB is a predecessor of the first block of another trace. In this case, 5512689Speter BB is duplicated and the traces are connected through this duplicate. 5612842Sbde The rest of traces are simply connected so there will be a jump to the 5712458Sbde beginning of the rest of trace. 5830855Skato 5930837Skato 6050818Smarcel References: 6150818Smarcel 6230837Skato "Software Trace Cache" 6312458Sbde A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999 6414331Speter http://citeseer.nj.nec.com/15361.html 6514331Speter 6650465Smarcel*/ 679313Ssos 6849849Smarcel#include "config.h" 6949849Smarcel#include "system.h" 7049626Smarcel#include "coretypes.h" 7149626Smarcel#include "tm.h" 7249626Smarcel#include "rtl.h" 7349626Smarcel#include "regs.h" 7449626Smarcel#include "flags.h" 7549626Smarcel#include "timevar.h" 769313Ssos#include "output.h" 7730994Sphk#include "cfglayout.h" 789313Ssos#include "fibheap.h" 799313Ssos#include "target.h" 8012858Speter#include "function.h" 819313Ssos#include "tm_p.h" 829313Ssos#include "obstack.h" 839313Ssos#include "expr.h" 8438127Sbde#include "params.h" 859313Ssos#include "toplev.h" 8638127Sbde#include "tree-pass.h" 8738127Sbde 889313Ssos#ifndef HAVE_conditional_execution 899313Ssos#define HAVE_conditional_execution 0 909313Ssos#endif 919313Ssos 9238127Sbde/* The number of rounds. In most cases there will only be 4 rounds, but 939313Ssos when partitioning hot and cold basic blocks into separate sections of 9438127Sbde the .o file there will be an extra round.*/ 9535058Sphk#define N_ROUNDS 5 9638127Sbde 9738127Sbde/* Stubs in case we don't have a return insn. 9838127Sbde We have to check at runtime too, not only compiletime. */ 9912858Speter 1009313Ssos#ifndef HAVE_return 1019313Ssos#define HAVE_return 0 1029313Ssos#define gen_return() NULL_RTX 10338127Sbde#endif 10438127Sbde 10538127Sbde 10638127Sbde/* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */ 10738127Sbdestatic int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0}; 10838127Sbde 1099313Ssos/* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */ 1109313Ssosstatic int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0}; 1119313Ssos 1129313Ssos/* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry 11330994Sphk block the edge destination is not duplicated while connecting traces. */ 1149313Ssos#define DUPLICATION_THRESHOLD 100 1159313Ssos 1169313Ssos/* Length of unconditional jump instruction. */ 1179313Ssosstatic int uncond_jump_length; 1189313Ssos 1199313Ssos/* Structure to hold needed information for each basic block. */ 1209313Ssostypedef struct bbro_basic_block_data_def 1219313Ssos{ 1229313Ssos /* Which trace is the bb start of (-1 means it is not a start of a trace). */ 1239313Ssos int start_of_trace; 1249313Ssos 1259313Ssos /* Which trace is the bb end of (-1 means it is not an end of a trace). */ 1269313Ssos int end_of_trace; 1279313Ssos 12830994Sphk /* Which trace is the bb in? */ 1299313Ssos int in_trace; 1309313Ssos 1319313Ssos /* Which heap is BB in (if any)? */ 13213503Sdyson fibheap_t heap; 13313503Sdyson 13414331Speter /* Which heap node is BB in (if any)? */ 1359313Ssos fibnode_t node; 1369313Ssos} bbro_basic_block_data; 13730994Sphk 1389313Ssos/* The current size of the following dynamic array. */ 1399313Ssosstatic int array_size; 1409313Ssos 1419313Ssos/* The array which holds needed information for basic blocks. */ 1429313Ssosstatic bbro_basic_block_data *bbd; 14312858Speter 14412858Speter/* To avoid frequent reallocation the size of arrays is greater than needed, 14512858Speter the number of elements is (not less than) 1.25 * size_wanted. */ 1469313Ssos#define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5) 1479313Ssos 14837950Sbde/* Free the memory and set the pointer to NULL. */ 1499313Ssos#define FREE(P) (gcc_assert (P), free (P), P = 0) 1509313Ssos 1519313Ssos/* Structure for holding information about a trace. */ 15212858Speterstruct trace 15330994Sphk{ 15430994Sphk /* First and last basic block of the trace. */ 1559313Ssos basic_block first, last; 15630994Sphk 1579313Ssos /* The round of the STC creation which this trace was found in. */ 1589313Ssos int round; 1599313Ssos 1609313Ssos /* The length (i.e. the number of basic blocks) of the trace. */ 1619313Ssos int length; 1629313Ssos}; 16330994Sphk 1649313Ssos/* Maximum frequency and count of one of the entry blocks. */ 1659313Ssosstatic int max_entry_frequency; 16612130Sdgstatic gcov_type max_entry_count; 16714114Speter 1689313Ssos/* Local function prototypes. */ 16914703Sbdestatic void find_traces (int *, struct trace *); 17014703Sbdestatic basic_block rotate_loop (edge, struct trace *, int); 17114703Sbdestatic void mark_bb_visited (basic_block, int); 17214703Sbdestatic void find_traces_1_round (int, int, gcov_type, struct trace *, int *, 1739313Ssos int, fibheap_t *, int); 17414331Speterstatic basic_block copy_bb (basic_block, edge, basic_block, int); 17514114Speterstatic fibheapkey_t bb_to_key (basic_block); 1769313Ssosstatic bool better_edge_p (basic_block, edge, int, int, int, int, edge); 17714331Speterstatic void connect_traces (int, struct trace *); 17814331Speterstatic bool copy_bb_p (basic_block, int); 17914331Speterstatic int get_uncond_jump_length (void); 1809313Ssosstatic bool push_to_next_round_p (basic_block, int, int, int, gcov_type); 18149959Smarcelstatic void find_rarely_executed_basic_blocks_and_crossing_edges (edge *, 1829313Ssos int *, 1839313Ssos int *); 18414114Speterstatic void add_labels_and_missing_jumps (edge *, int); 18514114Speterstatic void add_reg_crossing_jump_notes (void); 18614114Speterstatic void fix_up_fall_thru_edges (void); 18714114Speterstatic void fix_edges_for_rarely_executed_code (edge *, int); 18849523Smarcelstatic void fix_crossing_conditional_branches (void); 18946571Speterstatic void fix_crossing_unconditional_branches (void); 19046571Speter 19114114Speter/* Check to see if bb should be pushed into the next round of trace 1929313Ssos collections or not. Reasons for pushing the block forward are 1). 19312130Sdg If the block is cold, we are doing partitioning, and there will be 19414114Speter another round (cold partition blocks are not supposed to be 19514114Speter collected into traces until the very last round); or 2). There will 19614114Speter be another round, and the basic block is not "hot enough" for the 19714114Speter current round of trace collection. */ 1989313Ssos 19914114Speterstatic bool 20014114Speterpush_to_next_round_p (basic_block bb, int round, int number_of_rounds, 20114114Speter int exec_th, gcov_type count_th) 20214114Speter{ 20314114Speter bool there_exists_another_round; 20414114Speter bool block_not_hot_enough; 20514114Speter 20614114Speter there_exists_another_round = round < number_of_rounds - 1; 20712130Sdg 20814114Speter block_not_hot_enough = (bb->frequency < exec_th 20914114Speter || bb->count < count_th 21011163Sjulian || probably_never_executed_bb_p (bb)); 2119313Ssos 21214114Speter if (there_exists_another_round 21314114Speter && block_not_hot_enough) 21414114Speter return true; 21546571Speter else 21646571Speter return false; 21714114Speter} 2189313Ssos 21914114Speter/* Find the traces for Software Trace Cache. Chain each trace through 22014114Speter RBI()->next. Store the number of traces to N_TRACES and description of 22114114Speter traces to TRACES. */ 22214114Speter 22314114Speterstatic void 22411163Sjulianfind_traces (int *n_traces, struct trace *traces) 2259313Ssos{ 22614114Speter int i; 22714114Speter int number_of_rounds; 22814114Speter edge e; 22911163Sjulian edge_iterator ei; 23014114Speter fibheap_t heap; 23114114Speter 23211163Sjulian /* Add one extra round of trace collection when partitioning hot/cold 2339313Ssos basic blocks into separate sections. The last round is for all the 23414114Speter cold blocks (and ONLY the cold blocks). */ 23514114Speter 23614114Speter number_of_rounds = N_ROUNDS - 1; 23746571Speter 23846571Speter /* Insert entry points of function into heap. */ 23914114Speter heap = fibheap_new (); 2409313Ssos max_entry_frequency = 0; 24146571Speter max_entry_count = 0; 24246571Speter FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs) 24314114Speter { 2449313Ssos bbd[e->dest->index].heap = heap; 24514114Speter bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest), 24614114Speter e->dest); 24714114Speter if (e->dest->frequency > max_entry_frequency) 24822543Smpp max_entry_frequency = e->dest->frequency; 24914114Speter if (e->dest->count > max_entry_count) 25011163Sjulian max_entry_count = e->dest->count; 25114114Speter } 25214114Speter 25314114Speter /* Find the traces. */ 25414114Speter for (i = 0; i < number_of_rounds; i++) 25512130Sdg { 2569313Ssos gcov_type count_threshold; 25714114Speter 2589313Ssos if (dump_file) 2599313Ssos fprintf (dump_file, "STC - round %d\n", i + 1); 2609313Ssos 2619313Ssos if (max_entry_count < INT_MAX / 1000) 26214114Speter count_threshold = max_entry_count * exec_threshold[i] / 1000; 26314114Speter else 26414114Speter count_threshold = max_entry_count / 1000 * exec_threshold[i]; 26514114Speter 2669313Ssos find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000, 26714114Speter max_entry_frequency * exec_threshold[i] / 1000, 26814114Speter count_threshold, traces, n_traces, i, &heap, 2699313Ssos number_of_rounds); 2709313Ssos } 2719313Ssos fibheap_delete (heap); 2729313Ssos 2739313Ssos if (dump_file) 2749313Ssos { 2759313Ssos for (i = 0; i < *n_traces; i++) 2769313Ssos { 2779313Ssos basic_block bb; 2789313Ssos fprintf (dump_file, "Trace %d (round %d): ", i + 1, 2799313Ssos traces[i].round + 1); 28014114Speter for (bb = traces[i].first; bb != traces[i].last; bb = bb->aux) 28114114Speter fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency); 2829313Ssos fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency); 2839313Ssos } 2849313Ssos fflush (dump_file); 28514114Speter } 2869313Ssos} 28714114Speter 28814114Speter/* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE 28915538Sphk (with sequential number TRACE_N). */ 29014114Speter 29114114Speterstatic basic_block 29214114Speterrotate_loop (edge back_edge, struct trace *trace, int trace_n) 29314114Speter{ 29414114Speter basic_block bb; 29514114Speter 29614114Speter /* Information about the best end (end after rotation) of the loop. */ 29714114Speter basic_block best_bb = NULL; 29814114Speter edge best_edge = NULL; 29914114Speter int best_freq = -1; 30014114Speter gcov_type best_count = -1; 30114114Speter /* The best edge is preferred when its destination is not visited yet 30214114Speter or is a start block of some trace. */ 30314114Speter bool is_preferred = false; 30414114Speter 30533821Sbde /* Find the most frequent edge that goes out from current trace. */ 30633821Sbde bb = back_edge->dest; 30714114Speter do 30814114Speter { 30914114Speter edge e; 31014114Speter edge_iterator ei; 31114114Speter 31214114Speter FOR_EACH_EDGE (e, ei, bb->succs) 31314114Speter if (e->dest != EXIT_BLOCK_PTR 31414114Speter && e->dest->il.rtl->visited != trace_n 31514114Speter && (e->flags & EDGE_CAN_FALLTHRU) 31614114Speter && !(e->flags & EDGE_COMPLEX)) 3179313Ssos { 3189313Ssos if (is_preferred) 3199313Ssos { 3209313Ssos /* The best edge is preferred. */ 32115538Sphk if (!e->dest->il.rtl->visited 3229313Ssos || bbd[e->dest->index].start_of_trace >= 0) 32337950Sbde { 3249313Ssos /* The current edge E is also preferred. */ 3259313Ssos int freq = EDGE_FREQUENCY (e); 3269313Ssos if (freq > best_freq || e->count > best_count) 3279313Ssos { 32814114Speter best_freq = freq; 32914114Speter best_count = e->count; 33014114Speter best_edge = e; 33114114Speter best_bb = bb; 33214114Speter } 3339313Ssos } 33414114Speter } 33514114Speter else 3369313Ssos { 33714114Speter if (!e->dest->il.rtl->visited 3389313Ssos || bbd[e->dest->index].start_of_trace >= 0) 33914114Speter { 3409313Ssos /* The current edge E is preferred. */ 3419313Ssos is_preferred = true; 34214584Speter best_freq = EDGE_FREQUENCY (e); 34312130Sdg best_count = e->count; 3449313Ssos best_edge = e; 34514114Speter best_bb = bb; 3469313Ssos } 34714114Speter else 34838354Sbde { 34938354Sbde int freq = EDGE_FREQUENCY (e); 3509313Ssos if (!best_edge || freq > best_freq || e->count > best_count) 35114114Speter { 35214114Speter best_freq = freq; 35314471Speter best_count = e->count; 3549313Ssos best_edge = e; 3559313Ssos best_bb = bb; 35614114Speter } 3579313Ssos } 3589313Ssos } 3599313Ssos } 36037950Sbde bb = bb->aux; 3619313Ssos } 36214114Speter while (bb != back_edge->dest); 36314114Speter 36414114Speter if (best_bb) 36514114Speter { 36614114Speter /* Rotate the loop so that the BEST_EDGE goes out from the last block of 36714114Speter the trace. */ 36814114Speter if (back_edge->dest == trace->first) 36914114Speter { 37014114Speter trace->first = best_bb->aux; 37114114Speter } 3729313Ssos else 37314114Speter { 3749313Ssos basic_block prev_bb; 37512130Sdg 3769313Ssos for (prev_bb = trace->first; 37714114Speter prev_bb->aux != back_edge->dest; 3789313Ssos prev_bb = prev_bb->aux) 3799313Ssos ; 3809313Ssos prev_bb->aux = best_bb->aux; 3819313Ssos 3829313Ssos /* Try to get rid of uncond jump to cond jump. */ 38314114Speter if (single_succ_p (prev_bb)) 38414114Speter { 38514114Speter basic_block header = single_succ (prev_bb); 38614114Speter 38714114Speter /* Duplicate HEADER if it is a small block containing cond jump 38814114Speter in the end. */ 38914114Speter if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0) 39014114Speter && !find_reg_note (BB_END (header), REG_CROSSING_JUMP, 39114331Speter NULL_RTX)) 39213503Sdyson copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n); 39314114Speter } 3949313Ssos } 39514114Speter } 3969313Ssos else 39714114Speter { 39814114Speter /* We have not found suitable loop tail so do no rotation. */ 39914114Speter best_bb = back_edge->src; 40014114Speter } 40114114Speter best_bb->aux = NULL; 40214114Speter return best_bb; 40322543Smpp} 40414114Speter 40514114Speter/* This function marks BB that it was visited in trace number TRACE. */ 40614114Speter 40714114Speterstatic void 40814114Spetermark_bb_visited (basic_block bb, int trace) 40914471Speter{ 41014114Speter bb->il.rtl->visited = trace; 41114114Speter if (bbd[bb->index].heap) 4129313Ssos { 4139313Ssos fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node); 41414331Speter bbd[bb->index].heap = NULL; 41514331Speter bbd[bb->index].node = NULL; 41614331Speter } 41714331Speter} 41814331Speter 41914331Speter/* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do 42014331Speter not include basic blocks their probability is lower than BRANCH_TH or their 4219313Ssos frequency is lower than EXEC_TH into traces (or count is lower than 4229313Ssos COUNT_TH). It stores the new traces into TRACES and modifies the number of 4239313Ssos traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It 42430994Sphk expects that starting basic blocks are in *HEAP and at the end it deletes 4259313Ssos *HEAP and stores starting points for the next round into new *HEAP. */ 42614331Speter 42714331Speterstatic void 4289313Ssosfind_traces_1_round (int branch_th, int exec_th, gcov_type count_th, 4299313Ssos struct trace *traces, int *n_traces, int round, 43014331Speter fibheap_t *heap, int number_of_rounds) 43149959Smarcel{ 43214331Speter /* Heap for discarded basic blocks which are possible starting points for 4339313Ssos the next round. */ 4349313Ssos fibheap_t new_heap = fibheap_new (); 4359313Ssos 43614331Speter while (!fibheap_empty (*heap)) 43714331Speter { 43814331Speter basic_block bb; 43914331Speter struct trace *trace; 44014331Speter edge best_edge, e; 44114331Speter fibheapkey_t key; 44214331Speter edge_iterator ei; 44330994Sphk 44414331Speter bb = fibheap_extract_min (*heap); 44514331Speter bbd[bb->index].heap = NULL; 44614331Speter bbd[bb->index].node = NULL; 44730994Sphk 44814331Speter if (dump_file) 44914331Speter fprintf (dump_file, "Getting bb %d\n", bb->index); 45014331Speter 45114331Speter /* If the BB's frequency is too low send BB to the next round. When 45214331Speter partitioning hot/cold blocks into separate sections, make sure all 45314331Speter the cold blocks (and ONLY the cold blocks) go into the (extra) final 4549313Ssos round. */ 45537950Sbde 45637950Sbde if (push_to_next_round_p (bb, round, number_of_rounds, exec_th, 45737950Sbde count_th)) 45837950Sbde { 4599313Ssos int key = bb_to_key (bb); 46014331Speter bbd[bb->index].heap = new_heap; 46114331Speter bbd[bb->index].node = fibheap_insert (new_heap, key, bb); 46214331Speter 46314331Speter if (dump_file) 46414331Speter fprintf (dump_file, 46514331Speter " Possible start point of next round: %d (key: %d)\n", 46614331Speter bb->index, key); 46714331Speter continue; 46814331Speter } 46914331Speter 47014331Speter trace = traces + *n_traces; 47114331Speter trace->first = bb; 47214331Speter trace->round = round; 47314331Speter trace->length = 0; 47414331Speter bbd[bb->index].in_trace = *n_traces; 47537950Sbde (*n_traces)++; 47637950Sbde 47714331Speter do 47814331Speter { 47914331Speter int prob, freq; 48014331Speter bool ends_in_call; 48114331Speter 48214331Speter /* The probability and frequency of the best edge. */ 48314331Speter int best_prob = INT_MIN / 2; 48414331Speter int best_freq = INT_MIN / 2; 48514331Speter 48614331Speter best_edge = NULL; 48714331Speter mark_bb_visited (bb, *n_traces); 48814331Speter trace->length++; 48914331Speter 49014331Speter if (dump_file) 49114331Speter fprintf (dump_file, "Basic block %d was visited in trace %d\n", 49235058Sphk bb->index, *n_traces - 1); 49314331Speter 49414331Speter ends_in_call = block_ends_with_call_p (bb); 49514331Speter 49614331Speter /* Select the successor that will be placed after BB. */ 49714331Speter FOR_EACH_EDGE (e, ei, bb->succs) 49814331Speter { 49914331Speter gcc_assert (!(e->flags & EDGE_FAKE)); 50030994Sphk 50114331Speter if (e->dest == EXIT_BLOCK_PTR) 50249959Smarcel continue; 50314331Speter 50414331Speter if (e->dest->il.rtl->visited 50514331Speter && e->dest->il.rtl->visited != *n_traces) 50614331Speter continue; 50714331Speter 50814331Speter if (BB_PARTITION (e->dest) != BB_PARTITION (bb)) 50914331Speter continue; 51014331Speter 51114331Speter prob = e->probability; 51214331Speter freq = e->dest->frequency; 51314331Speter 51414331Speter /* The only sensible preference for a call instruction is the 51514331Speter fallthru edge. Don't bother selecting anything else. */ 51630994Sphk if (ends_in_call) 51714331Speter { 51814331Speter if (e->flags & EDGE_CAN_FALLTHRU) 51914331Speter { 52014331Speter best_edge = e; 52114331Speter best_prob = prob; 52214331Speter best_freq = freq; 52314331Speter } 52414331Speter continue; 52514331Speter } 52614331Speter 52735058Sphk /* Edge that cannot be fallthru or improbable or infrequent 52814331Speter successor (i.e. it is unsuitable successor). */ 52935058Sphk if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX) 53014331Speter || prob < branch_th || EDGE_FREQUENCY (e) < exec_th 53137950Sbde || e->count < count_th) 53237950Sbde continue; 53314331Speter 53414331Speter /* If partitioning hot/cold basic blocks, don't consider edges 53514331Speter that cross section boundaries. */ 53614331Speter 53714331Speter if (better_edge_p (bb, e, prob, freq, best_prob, best_freq, 53814331Speter best_edge)) 53914331Speter { 54049959Smarcel best_edge = e; 54114331Speter best_prob = prob; 54214331Speter best_freq = freq; 5439313Ssos } 5449313Ssos } 5459313Ssos 54630994Sphk /* If the best destination has multiple predecessors, and can be 5479313Ssos duplicated cheaper than a jump, don't allow it to be added 54846129Sluoqi to a trace. We'll duplicate it when connecting traces. */ 5499313Ssos if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2 5509313Ssos && copy_bb_p (best_edge->dest, 0)) 55149959Smarcel best_edge = NULL; 5529313Ssos 5539313Ssos /* Add all non-selected successors to the heaps. */ 55446129Sluoqi FOR_EACH_EDGE (e, ei, bb->succs) 5559313Ssos { 5569313Ssos if (e == best_edge 5579313Ssos || e->dest == EXIT_BLOCK_PTR 55846129Sluoqi || e->dest->il.rtl->visited) 55946129Sluoqi continue; 5609313Ssos 5619313Ssos key = bb_to_key (e->dest); 5629313Ssos 5639313Ssos if (bbd[e->dest->index].heap) 56430994Sphk { 5659313Ssos /* E->DEST is already in some heap. */ 5669313Ssos if (key != bbd[e->dest->index].node->key) 5679313Ssos { 5689313Ssos if (dump_file) 56949959Smarcel { 5709313Ssos fprintf (dump_file, 57144384Sjulian "Changing key for bb %d from %ld to %ld.\n", 5729313Ssos e->dest->index, 57330994Sphk (long) bbd[e->dest->index].node->key, 57430994Sphk key); 5759313Ssos } 5769313Ssos fibheap_replace_key (bbd[e->dest->index].heap, 5779313Ssos bbd[e->dest->index].node, key); 57849890Smarcel } 57949890Smarcel } 58049890Smarcel else 58149890Smarcel { 58249890Smarcel fibheap_t which_heap = *heap; 58349890Smarcel 58449959Smarcel prob = e->probability; 58549890Smarcel freq = EDGE_FREQUENCY (e); 58649890Smarcel 58749890Smarcel if (!(e->flags & EDGE_CAN_FALLTHRU) 58849890Smarcel || (e->flags & EDGE_COMPLEX) 58949890Smarcel || prob < branch_th || freq < exec_th 59049890Smarcel || e->count < count_th) 59149890Smarcel { 59249890Smarcel /* When partitioning hot/cold basic blocks, make sure 59349890Smarcel the cold blocks (and only the cold blocks) all get 59449890Smarcel pushed to the last round of trace collection. */ 59541931Sjulian 59641931Sjulian if (push_to_next_round_p (e->dest, round, 59741931Sjulian number_of_rounds, 59841931Sjulian exec_th, count_th)) 59941931Sjulian which_heap = new_heap; 60041931Sjulian } 60141931Sjulian 60241931Sjulian bbd[e->dest->index].heap = which_heap; 60341931Sjulian bbd[e->dest->index].node = fibheap_insert (which_heap, 60441931Sjulian key, e->dest); 60541931Sjulian 60641931Sjulian if (dump_file) 60741931Sjulian { 60841931Sjulian fprintf (dump_file, 60941931Sjulian " Possible start of %s round: %d (key: %ld)\n", 61041931Sjulian (which_heap == new_heap) ? "next" : "this", 61141931Sjulian e->dest->index, (long) key); 61249959Smarcel } 61349959Smarcel 61449959Smarcel } 61549959Smarcel } 61649959Smarcel 61741931Sjulian if (best_edge) /* Suitable successor was found. */ 61841931Sjulian { 61941931Sjulian if (best_edge->dest->il.rtl->visited == *n_traces) 62041931Sjulian { 62144384Sjulian /* We do nothing with one basic block loops. */ 62241931Sjulian if (best_edge->dest != bb) 62341931Sjulian { 62441931Sjulian if (EDGE_FREQUENCY (best_edge) 62541931Sjulian > 4 * best_edge->dest->frequency / 5) 62641931Sjulian { 62741931Sjulian /* The loop has at least 4 iterations. If the loop 62841931Sjulian header is not the first block of the function 62941931Sjulian we can rotate the loop. */ 63041931Sjulian 63141931Sjulian if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb) 63241931Sjulian { 63341931Sjulian if (dump_file) 63441931Sjulian { 63541931Sjulian fprintf (dump_file, 63641931Sjulian "Rotating loop %d - %d\n", 63741931Sjulian best_edge->dest->index, bb->index); 63841931Sjulian } 63941931Sjulian bb->aux = best_edge->dest; 64041931Sjulian bbd[best_edge->dest->index].in_trace = 64144384Sjulian (*n_traces) - 1; 64241931Sjulian bb = rotate_loop (best_edge, trace, *n_traces); 64341931Sjulian } 64441931Sjulian } 64541931Sjulian else 64641931Sjulian { 64742054Sjulian /* The loop has less than 4 iterations. */ 64841931Sjulian 64941931Sjulian if (single_succ_p (bb) 65041931Sjulian && copy_bb_p (best_edge->dest, !optimize_size)) 65141931Sjulian { 65249959Smarcel bb = copy_bb (best_edge->dest, best_edge, bb, 65349959Smarcel *n_traces); 65441931Sjulian trace->length++; 65541931Sjulian } 65641931Sjulian } 65741931Sjulian } 65814331Speter 65914331Speter /* Terminate the trace. */ 6609313Ssos break; 6619313Ssos } 6629313Ssos else 6639313Ssos { 6649313Ssos /* Check for a situation 6659313Ssos 66614331Speter A 66714331Speter /| 66841931Sjulian B | 66941931Sjulian \| 67014331Speter C 67130994Sphk 67214331Speter where 67312858Speter EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC) 6749313Ssos >= EDGE_FREQUENCY (AC). 6759313Ssos (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) ) 6769313Ssos Best ordering is then A B C. 6779313Ssos 6789313Ssos This situation is created for example by: 6799313Ssos 6809313Ssos if (A) B; 68112858Speter C; 6829313Ssos 68314331Speter */ 6849313Ssos 6859313Ssos FOR_EACH_EDGE (e, ei, bb->succs) 6869313Ssos if (e != best_edge 6879313Ssos && (e->flags & EDGE_CAN_FALLTHRU) 6889313Ssos && !(e->flags & EDGE_COMPLEX) 68937950Sbde && !e->dest->il.rtl->visited 69037950Sbde && single_pred_p (e->dest) 69137950Sbde && !(e->flags & EDGE_CROSSING) 6929313Ssos && single_succ_p (e->dest) 6939313Ssos && (single_succ_edge (e->dest)->flags 6949313Ssos & EDGE_CAN_FALLTHRU) 6959313Ssos && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX) 6969313Ssos && single_succ (e->dest) == best_edge->dest 6979313Ssos && 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge)) 6989313Ssos { 6999313Ssos best_edge = e; 7009313Ssos if (dump_file) 7019313Ssos fprintf (dump_file, "Selecting BB %d\n", 70241931Sjulian best_edge->dest->index); 70342360Sjulian break; 70441931Sjulian } 70541931Sjulian 70642360Sjulian bb->aux = best_edge->dest; 70742360Sjulian bbd[best_edge->dest->index].in_trace = (*n_traces) - 1; 70842360Sjulian bb = best_edge->dest; 70942360Sjulian } 71042360Sjulian } 71142360Sjulian } 71242360Sjulian while (best_edge); 71342360Sjulian trace->last = bb; 71442360Sjulian bbd[trace->first->index].start_of_trace = *n_traces - 1; 71542360Sjulian bbd[trace->last->index].end_of_trace = *n_traces - 1; 71642360Sjulian 71742360Sjulian /* The trace is terminated so we have to recount the keys in heap 71842360Sjulian (some block can have a lower key because now one of its predecessors 71942360Sjulian is an end of the trace). */ 72042360Sjulian FOR_EACH_EDGE (e, ei, bb->succs) 72142360Sjulian { 72242360Sjulian if (e->dest == EXIT_BLOCK_PTR 72342360Sjulian || e->dest->il.rtl->visited) 72442360Sjulian continue; 72541931Sjulian 72641931Sjulian if (bbd[e->dest->index].heap) 72742360Sjulian { 72841931Sjulian key = bb_to_key (e->dest); 72941931Sjulian if (key != bbd[e->dest->index].node->key) 73041931Sjulian { 73141931Sjulian if (dump_file) 73241931Sjulian { 73341931Sjulian fprintf (dump_file, 73441931Sjulian "Changing key for bb %d from %ld to %ld.\n", 73541931Sjulian e->dest->index, 73642360Sjulian (long) bbd[e->dest->index].node->key, key); 73742360Sjulian } 73842360Sjulian fibheap_replace_key (bbd[e->dest->index].heap, 73942360Sjulian bbd[e->dest->index].node, 74042360Sjulian key); 74141931Sjulian } 74241931Sjulian } 74341931Sjulian } 74442360Sjulian } 74541931Sjulian 74641931Sjulian fibheap_delete (*heap); 74743208Sjulian 74825219Smsmith /* "Return" the new heap. */ 7499313Ssos *heap = new_heap; 7509313Ssos} 7519313Ssos 75230994Sphk/* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add 7539313Ssos it to trace after BB, mark OLD_BB visited and update pass' data structures 7549313Ssos (TRACE is a number of trace which OLD_BB is duplicated to). */ 75537548Sjkh 75637548Sjkhstatic basic_block 75737548Sjkhcopy_bb (basic_block old_bb, edge e, basic_block bb, int trace) 75837548Sjkh{ 75937548Sjkh basic_block new_bb; 76037548Sjkh 76137548Sjkh new_bb = duplicate_block (old_bb, e, bb); 76237548Sjkh BB_COPY_PARTITION (new_bb, old_bb); 76337548Sjkh 76437548Sjkh gcc_assert (e->dest == new_bb); 76537950Sbde gcc_assert (!e->dest->il.rtl->visited); 76637950Sbde 76737950Sbde if (dump_file) 76837548Sjkh fprintf (dump_file, 76937548Sjkh "Duplicated bb %d (created bb %d)\n", 77037548Sjkh old_bb->index, new_bb->index); 77137548Sjkh new_bb->il.rtl->visited = trace; 77237548Sjkh new_bb->aux = bb->aux; 77337548Sjkh bb->aux = new_bb; 77437548Sjkh 77537548Sjkh if (new_bb->index >= array_size || last_basic_block > array_size) 77637548Sjkh { 77737548Sjkh int i; 77837548Sjkh int new_size; 77937548Sjkh 78037548Sjkh new_size = MAX (last_basic_block, new_bb->index + 1); 78137548Sjkh new_size = GET_ARRAY_SIZE (new_size); 78237548Sjkh bbd = xrealloc (bbd, new_size * sizeof (bbro_basic_block_data)); 78337548Sjkh for (i = array_size; i < new_size; i++) 78437548Sjkh { 78537548Sjkh bbd[i].start_of_trace = -1; 78637548Sjkh bbd[i].in_trace = -1; 78714331Speter bbd[i].end_of_trace = -1; 78830994Sphk bbd[i].heap = NULL; 78914331Speter bbd[i].node = NULL; 79014331Speter } 7919313Ssos array_size = new_size; 79214331Speter 79314331Speter if (dump_file) 79414331Speter { 79514331Speter fprintf (dump_file, 79630994Sphk "Growing the dynamic array to %d elements.\n", 79714331Speter array_size); 79814331Speter } 7999313Ssos } 80030994Sphk 8019313Ssos bbd[new_bb->index].in_trace = trace; 8029313Ssos 80341650Sjkh return new_bb; 8049313Ssos} 8059313Ssos 80649959Smarcel/* Compute and return the key (for the heap) of the basic block BB. */ 8079313Ssos 80841650Sjkhstatic fibheapkey_t 80946571Speterbb_to_key (basic_block bb) 81046571Speter{ 81141650Sjkh edge e; 8129313Ssos edge_iterator ei; 81341650Sjkh int priority = 0; 81441650Sjkh 81546571Speter /* Do not start in probably never executed blocks. */ 81646571Speter 81741650Sjkh if (BB_PARTITION (bb) == BB_COLD_PARTITION 8189313Ssos || probably_never_executed_bb_p (bb)) 81941650Sjkh return BB_FREQ_MAX; 82041650Sjkh 82141650Sjkh /* Prefer blocks whose predecessor is an end of some trace 82230994Sphk or whose predecessor edge is EDGE_DFS_BACK. */ 8239313Ssos FOR_EACH_EDGE (e, ei, bb->preds) 8249313Ssos { 8259313Ssos if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0) 8269313Ssos || (e->flags & EDGE_DFS_BACK)) 82730994Sphk { 8289313Ssos int edge_freq = EDGE_FREQUENCY (e); 8299313Ssos 8309313Ssos if (edge_freq > priority) 8319313Ssos priority = edge_freq; 8329313Ssos } 8339313Ssos } 83449959Smarcel 8359313Ssos if (priority) 8369313Ssos /* The block with priority should have significantly lower key. */ 8379313Ssos return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency); 83814331Speter return -bb->frequency; 8399313Ssos} 84030994Sphk 8419313Ssos/* Return true when the edge E from basic block BB is better than the temporary 8429313Ssos best edge (details are in function). The probability of edge E is PROB. The 8439313Ssos frequency of the successor is FREQ. The current best probability is 84414331Speter BEST_PROB, the best frequency is BEST_FREQ. 8459313Ssos The edge is considered to be equivalent when PROB does not differ much from 8469313Ssos BEST_PROB; similarly for frequency. */ 8479313Ssos 8489313Ssosstatic bool 8499313Ssosbetter_edge_p (basic_block bb, edge e, int prob, int freq, int best_prob, 8509313Ssos int best_freq, edge cur_best_edge) 85114381Speter{ 85214381Speter bool is_better_edge; 85314381Speter 8549313Ssos /* The BEST_* values do not have to be best, but can be a bit smaller than 85530994Sphk maximum values. */ 8569313Ssos int diff_prob = best_prob / 10; 8579313Ssos int diff_freq = best_freq / 10; 85814331Speter 85914381Speter if (prob > best_prob + diff_prob) 86016322Sgpalmer /* The edge has higher probability than the temporary best edge. */ 8619313Ssos is_better_edge = true; 8629313Ssos else if (prob < best_prob - diff_prob) 86349959Smarcel /* The edge has lower probability than the temporary best edge. */ 8649313Ssos is_better_edge = false; 86514381Speter else if (freq < best_freq - diff_freq) 86614381Speter /* The edge and the temporary best edge have almost equivalent 86714381Speter probabilities. The higher frequency of a successor now means 86814381Speter that there is another edge going into that successor. 86914381Speter This successor has lower frequency so it is better. */ 87014381Speter is_better_edge = true; 87114381Speter else if (freq > best_freq + diff_freq) 87214381Speter /* This successor has higher frequency so it is worse. */ 87314381Speter is_better_edge = false; 87414381Speter else if (e->dest->prev_bb == bb) 87514381Speter /* The edges have equivalent probabilities and the successors 87614381Speter have equivalent frequencies. Select the previous successor. */ 87736119Sphk is_better_edge = true; 87830994Sphk else 87914381Speter is_better_edge = false; 8809313Ssos 8819313Ssos /* If we are doing hot/cold partitioning, make sure that we always favor 8829313Ssos non-crossing edges over crossing edges. */ 88330994Sphk 8849313Ssos if (!is_better_edge 88550345Smarcel && flag_reorder_blocks_and_partition 88650465Smarcel && cur_best_edge 8879313Ssos && (cur_best_edge->flags & EDGE_CROSSING) 8889313Ssos && !(e->flags & EDGE_CROSSING)) 88950345Smarcel is_better_edge = true; 8909313Ssos 89150345Smarcel return is_better_edge; 89250465Smarcel} 89350465Smarcel 89450465Smarcel/* Connect traces in array TRACES, N_TRACES is the count of traces. */ 89550345Smarcel 89650465Smarcelstatic void 89750345Smarcelconnect_traces (int n_traces, struct trace *traces) 89850465Smarcel{ 89950345Smarcel int i; 90050345Smarcel bool *connected; 90150345Smarcel bool two_passes; 90250345Smarcel int last_trace; 90350345Smarcel int current_pass; 90450345Smarcel int current_partition; 9059313Ssos int freq_threshold; 9069313Ssos gcov_type count_threshold; 90714381Speter 90814381Speter freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000; 90914381Speter if (max_entry_count < INT_MAX / 1000) 91014381Speter count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000; 9119313Ssos else 9129313Ssos count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD; 91330994Sphk 9149313Ssos connected = XCNEWVEC (bool, n_traces); 91512858Speter last_trace = -1; 91612858Speter current_pass = 1; 9179313Ssos current_partition = BB_PARTITION (traces[0].first); 91812858Speter two_passes = false; 91914381Speter 92014381Speter if (flag_reorder_blocks_and_partition) 92114381Speter for (i = 0; i < n_traces && !two_passes; i++) 92214331Speter if (BB_PARTITION (traces[0].first) 9239313Ssos != BB_PARTITION (traces[i].first)) 92414331Speter two_passes = true; 92514331Speter 92614331Speter for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++) 9279313Ssos { 92849959Smarcel int t = i; 9299313Ssos int t2; 93014381Speter edge e, best; 93114381Speter int best_len; 93214381Speter 93314381Speter if (i >= n_traces) 93414381Speter { 93514381Speter gcc_assert (two_passes && current_pass == 1); 93614381Speter i = 0; 93714381Speter t = i; 93814381Speter current_pass = 2; 93914381Speter if (current_partition == BB_HOT_PARTITION) 94014381Speter current_partition = BB_COLD_PARTITION; 94114381Speter else 94214381Speter current_partition = BB_HOT_PARTITION; 94314381Speter } 94414381Speter 94512858Speter if (connected[t]) 94630994Sphk continue; 9479313Ssos 9489313Ssos if (two_passes 94944384Sjulian && BB_PARTITION (traces[t].first) != current_partition) 95044384Sjulian continue; 9519313Ssos 95230994Sphk connected[t] = true; 9539313Ssos 95412858Speter /* Find the predecessor traces. */ 9559313Ssos for (t2 = t; t2 > 0;) 9569313Ssos { 9579313Ssos edge_iterator ei; 9589313Ssos best = NULL; 95912858Speter best_len = 0; 9609313Ssos FOR_EACH_EDGE (e, ei, traces[t2].first->preds) 9619313Ssos { 9629313Ssos int si = e->src->index; 96337950Sbde 96437950Sbde if (e->src != ENTRY_BLOCK_PTR 9659313Ssos && (e->flags & EDGE_CAN_FALLTHRU) 9669313Ssos && !(e->flags & EDGE_COMPLEX) 9679313Ssos && bbd[si].end_of_trace >= 0 96841931Sjulian && !connected[bbd[si].end_of_trace] 96944384Sjulian && (BB_PARTITION (e->src) == current_partition) 97044384Sjulian && (!best 97144384Sjulian || e->probability > best->probability 9729313Ssos || (e->probability == best->probability 9739313Ssos && traces[bbd[si].end_of_trace].length > best_len))) 97444384Sjulian { 9759313Ssos best = e; 97643208Sjulian best_len = traces[bbd[si].end_of_trace].length; 97714331Speter } 97844384Sjulian } 97914331Speter if (best) 98014331Speter { 98114331Speter best->src->aux = best->dest; 98214331Speter t2 = bbd[best->src->index].end_of_trace; 98314331Speter connected[t2] = true; 98414331Speter 98514331Speter if (dump_file) 98614331Speter { 98714331Speter fprintf (dump_file, "Connection: %d %d\n", 98814331Speter best->src->index, best->dest->index); 9899313Ssos } 9909313Ssos } 99114331Speter else 99230994Sphk break; 9939313Ssos } 99412858Speter 9959313Ssos if (last_trace >= 0) 9969313Ssos traces[last_trace].last->aux = traces[t2].first; 9979313Ssos last_trace = t; 9989313Ssos 99912858Speter /* Find the successor traces. */ 10009313Ssos while (1) 10019313Ssos { 10029313Ssos /* Find the continuation of the chain. */ 100337950Sbde edge_iterator ei; 100437950Sbde best = NULL; 100537950Sbde best_len = 0; 10069313Ssos FOR_EACH_EDGE (e, ei, traces[t].last->succs) 10079313Ssos { 10089313Ssos int di = e->dest->index; 100941931Sjulian 101044384Sjulian if (e->dest != EXIT_BLOCK_PTR 101144384Sjulian && (e->flags & EDGE_CAN_FALLTHRU) 101244384Sjulian && !(e->flags & EDGE_COMPLEX) 10139313Ssos && bbd[di].start_of_trace >= 0 10149313Ssos && !connected[bbd[di].start_of_trace] 101544384Sjulian && (BB_PARTITION (e->dest) == current_partition) 10169313Ssos && (!best 101714331Speter || e->probability > best->probability 101814331Speter || (e->probability == best->probability 101914331Speter && traces[bbd[di].start_of_trace].length > best_len))) 102014331Speter { 102144384Sjulian best = e; 102214331Speter best_len = traces[bbd[di].start_of_trace].length; 102314331Speter } 102414331Speter } 102514331Speter 102614331Speter if (best) 102714331Speter { 102814331Speter if (dump_file) 102914331Speter { 103014331Speter fprintf (dump_file, "Connection: %d %d\n", 103114331Speter best->src->index, best->dest->index); 10329313Ssos } 103313420Ssos t = bbd[best->dest->index].start_of_trace; 103414331Speter traces[last_trace].last->aux = traces[t].first; 103530994Sphk connected[t] = true; 103613420Ssos last_trace = t; 103714331Speter } 103814331Speter else 103914331Speter { 104014331Speter /* Try to connect the traces by duplication of 1 block. */ 104114331Speter edge e2; 104214331Speter basic_block next_bb = NULL; 104314331Speter bool try_copy = false; 104414331Speter 104514331Speter FOR_EACH_EDGE (e, ei, traces[t].last->succs) 104649959Smarcel if (e->dest != EXIT_BLOCK_PTR 104749959Smarcel && (e->flags & EDGE_CAN_FALLTHRU) 104814331Speter && !(e->flags & EDGE_COMPLEX) 104914331Speter && (!best || e->probability > best->probability)) 105014331Speter { 105114331Speter edge_iterator ei; 105214331Speter edge best2 = NULL; 105330994Sphk int best2_len = 0; 105414331Speter 105514331Speter /* If the destination is a start of a trace which is only 105614331Speter one block long, then no need to search the successor 105714331Speter blocks of the trace. Accept it. */ 105830994Sphk if (bbd[e->dest->index].start_of_trace >= 0 105914331Speter && traces[bbd[e->dest->index].start_of_trace].length 106013420Ssos == 1) 106114331Speter { 106214331Speter best = e; 106314331Speter try_copy = true; 106414331Speter continue; 106514331Speter } 106630994Sphk 106714331Speter FOR_EACH_EDGE (e2, ei, e->dest->succs) 106814331Speter { 106949959Smarcel int di = e2->dest->index; 107049959Smarcel 107114331Speter if (e2->dest == EXIT_BLOCK_PTR 107214331Speter || ((e2->flags & EDGE_CAN_FALLTHRU) 107314331Speter && !(e2->flags & EDGE_COMPLEX) 107414331Speter && bbd[di].start_of_trace >= 0 107514331Speter && !connected[bbd[di].start_of_trace] 107630994Sphk && (BB_PARTITION (e2->dest) == current_partition) 107714331Speter && (EDGE_FREQUENCY (e2) >= freq_threshold) 107814331Speter && (e2->count >= count_threshold) 107914331Speter && (!best2 108014331Speter || e2->probability > best2->probability 108114331Speter || (e2->probability == best2->probability 108214331Speter && traces[bbd[di].start_of_trace].length 108314331Speter > best2_len)))) 108430994Sphk { 108514331Speter best = e; 108614331Speter best2 = e2; 108714331Speter if (e2->dest != EXIT_BLOCK_PTR) 108814331Speter best2_len = traces[bbd[di].start_of_trace].length; 108914331Speter else 109014331Speter best2_len = INT_MAX; 109137950Sbde next_bb = e2->dest; 109237950Sbde try_copy = true; 109314331Speter } 109414331Speter } 109514331Speter } 109614331Speter 109714331Speter if (flag_reorder_blocks_and_partition) 109814331Speter try_copy = false; 109914331Speter 110014331Speter /* Copy tiny blocks always; copy larger blocks only when the 110114331Speter edge is traversed frequently enough. */ 110237950Sbde if (try_copy 110337950Sbde && copy_bb_p (best->dest, 110437950Sbde !optimize_size 110537950Sbde && EDGE_FREQUENCY (best) >= freq_threshold 110614331Speter && best->count >= count_threshold)) 110714331Speter { 110830994Sphk basic_block new_bb; 110914331Speter 111014331Speter if (dump_file) 111114331Speter { 111230994Sphk fprintf (dump_file, "Connection: %d %d ", 111314331Speter traces[t].last->index, best->dest->index); 111414331Speter if (!next_bb) 111514331Speter fputc ('\n', dump_file); 111637950Sbde else if (next_bb == EXIT_BLOCK_PTR) 111737950Sbde fprintf (dump_file, "exit\n"); 111814331Speter else 111914331Speter fprintf (dump_file, "%d\n", next_bb->index); 112014331Speter } 112130994Sphk 112214331Speter new_bb = copy_bb (best->dest, best, traces[t].last, t); 112330837Skato traces[t].last = new_bb; 112430837Skato if (next_bb && next_bb != EXIT_BLOCK_PTR) 112530994Sphk { 112630837Skato t = bbd[next_bb->index].start_of_trace; 112730855Skato traces[last_trace].last->aux = traces[t].first; 112830837Skato connected[t] = true; 112946112Sphk last_trace = t; 113030855Skato } 113130855Skato else 113230855Skato break; /* Stop finding the successor traces. */ 113330855Skato } 113430855Skato else 113530855Skato break; /* Stop finding the successor traces. */ 113630837Skato } 113730837Skato } 113830837Skato } 113930994Sphk 114030837Skato if (dump_file) 114130837Skato { 114230837Skato basic_block bb; 114330837Skato 114430837Skato fprintf (dump_file, "Final order:\n"); 114530837Skato for (bb = traces[0].first; bb; bb = bb->aux) 114630994Sphk fprintf (dump_file, "%d ", bb->index); 114730837Skato fprintf (dump_file, "\n"); 114830837Skato fflush (dump_file); 114942185Ssos } 115042185Ssos 115150350Smarcel FREE (connected); 115250350Smarcel} 115342185Ssos 115450350Smarcel/* Return true when BB can and should be copied. CODE_MAY_GROW is true 115550350Smarcel when code size is allowed to grow by duplication. */ 115650350Smarcel 115750350Smarcelstatic bool 115842185Ssoscopy_bb_p (basic_block bb, int code_may_grow) 115950350Smarcel{ 116050350Smarcel int size = 0; 116142185Ssos int max_size = uncond_jump_length; 116250350Smarcel rtx insn; 116350350Smarcel 116450350Smarcel if (!bb->frequency) 116550350Smarcel return false; 116650350Smarcel if (EDGE_COUNT (bb->preds) < 2) 116742185Ssos return false; 116850350Smarcel if (!can_duplicate_block_p (bb)) 116950350Smarcel return false; 117042185Ssos 117150350Smarcel /* Avoid duplicating blocks which have many successors (PR/13430). */ 117250350Smarcel if (EDGE_COUNT (bb->succs) > 8) 117342185Ssos return false; 117450350Smarcel 117550350Smarcel if (code_may_grow && maybe_hot_bb_p (bb)) 117650350Smarcel max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS); 117750350Smarcel 117850350Smarcel FOR_BB_INSNS (bb, insn) 117950350Smarcel { 118042185Ssos if (INSN_P (insn)) 118150350Smarcel size += get_attr_min_length (insn); 118250350Smarcel } 118350350Smarcel 118450350Smarcel if (size <= max_size) 118550350Smarcel return true; 118650350Smarcel 118750350Smarcel if (dump_file) 118850350Smarcel { 118950350Smarcel fprintf (dump_file, 119050350Smarcel "Block %d can't be copied because its size = %d.\n", 119150350Smarcel bb->index, size); 119250350Smarcel } 119350350Smarcel 119450350Smarcel return false; 119542185Ssos} 119642185Ssos 119742185Ssos/* Return the length of unconditional jump instruction. */ 119842185Ssos 119950350Smarcelstatic int 120050350Smarcelget_uncond_jump_length (void) 120142185Ssos{ 120250350Smarcel rtx label, jump; 120350350Smarcel int length; 120450350Smarcel 120550350Smarcel label = emit_label_before (gen_label_rtx (), get_insns ()); 120642185Ssos jump = emit_jump_insn (gen_jump (label)); 120750350Smarcel 120850350Smarcel length = get_attr_min_length (jump); 120950546Smarcel 121042185Ssos delete_insn (jump); 121150350Smarcel delete_insn (label); 121250350Smarcel return length; 121350350Smarcel} 121450350Smarcel 121550350Smarcel/* Find the basic blocks that are rarely executed and need to be moved to 121642185Ssos a separate section of the .o file (to cut down on paging and improve 121750350Smarcel cache locality). */ 121850546Smarcel 121950350Smarcelstatic void 122050350Smarcelfind_rarely_executed_basic_blocks_and_crossing_edges (edge *crossing_edges, 122142185Ssos int *n_crossing_edges, 122250546Smarcel int *max_idx) 122350350Smarcel{ 122442185Ssos basic_block bb; 122550546Smarcel bool has_hot_blocks = false; 122650350Smarcel edge e; 122750546Smarcel int i; 122850350Smarcel edge_iterator ei; 122950350Smarcel 123050350Smarcel /* Mark which partition (hot/cold) each basic block belongs in. */ 123150350Smarcel 123250350Smarcel FOR_EACH_BB (bb) 123350350Smarcel { 123450350Smarcel if (probably_never_executed_bb_p (bb)) 123550546Smarcel BB_SET_PARTITION (bb, BB_COLD_PARTITION); 123650350Smarcel else 123742185Ssos { 123849626Smarcel BB_SET_PARTITION (bb, BB_HOT_PARTITION); 123949626Smarcel has_hot_blocks = true; 124049626Smarcel } 124149626Smarcel } 124249626Smarcel 124349626Smarcel /* Mark every edge that crosses between sections. */ 124449842Smarcel 124549842Smarcel i = 0; 124649626Smarcel FOR_EACH_BB (bb) 124749626Smarcel FOR_EACH_EDGE (e, ei, bb->succs) 124849626Smarcel { 124949626Smarcel if (e->src != ENTRY_BLOCK_PTR 125049626Smarcel && e->dest != EXIT_BLOCK_PTR 125149626Smarcel && BB_PARTITION (e->src) != BB_PARTITION (e->dest)) 125249626Smarcel { 125349626Smarcel e->flags |= EDGE_CROSSING; 125449842Smarcel if (i == *max_idx) 125549626Smarcel { 125649842Smarcel *max_idx *= 2; 125749626Smarcel crossing_edges = xrealloc (crossing_edges, 125849626Smarcel (*max_idx) * sizeof (edge)); 125949842Smarcel } 126049842Smarcel crossing_edges[i++] = e; 126149626Smarcel } 126249626Smarcel else 126349626Smarcel e->flags &= ~EDGE_CROSSING; 126449626Smarcel } 126549626Smarcel *n_crossing_edges = i; 126649626Smarcel} 126749626Smarcel 126849842Smarcel/* If any destination of a crossing edge does not have a label, add label; 126949842Smarcel Convert any fall-through crossing edges (for blocks that do not contain 127049626Smarcel a jump) to unconditional jumps. */ 127149626Smarcel 127249626Smarcelstatic void 127349626Smarceladd_labels_and_missing_jumps (edge *crossing_edges, int n_crossing_edges) 127449626Smarcel{ 127549626Smarcel int i; 127649626Smarcel basic_block src; 127749626Smarcel basic_block dest; 127849842Smarcel rtx label; 127949626Smarcel rtx barrier; 128049842Smarcel rtx new_jump; 128149626Smarcel 128249626Smarcel for (i=0; i < n_crossing_edges; i++) 128349842Smarcel { 128449842Smarcel if (crossing_edges[i]) 128549626Smarcel { 128649849Smarcel src = crossing_edges[i]->src; 128749849Smarcel dest = crossing_edges[i]->dest; 128849849Smarcel 128949849Smarcel /* Make sure dest has a label. */ 129049849Smarcel 129149849Smarcel if (dest && (dest != EXIT_BLOCK_PTR)) 129249849Smarcel { 129349849Smarcel label = block_label (dest); 129449849Smarcel 129549849Smarcel /* Make sure source block ends with a jump. */ 129649849Smarcel 129749849Smarcel if (src && (src != ENTRY_BLOCK_PTR)) 129849849Smarcel { 129949849Smarcel if (!JUMP_P (BB_END (src))) 130049849Smarcel /* bb just falls through. */ 130149849Smarcel { 130249849Smarcel /* make sure there's only one successor */ 130349849Smarcel gcc_assert (single_succ_p (src)); 130449849Smarcel 130549849Smarcel /* Find label in dest block. */ 130649849Smarcel label = block_label (dest); 130749849Smarcel 130849849Smarcel new_jump = emit_jump_insn_after (gen_jump (label), 130949849Smarcel BB_END (src)); 131049849Smarcel barrier = emit_barrier_after (new_jump); 131149849Smarcel JUMP_LABEL (new_jump) = label; 131249849Smarcel LABEL_NUSES (label) += 1; 131349849Smarcel src->il.rtl->footer = unlink_insn_chain (barrier, barrier); 131449849Smarcel /* Mark edge as non-fallthru. */ 131549849Smarcel crossing_edges[i]->flags &= ~EDGE_FALLTHRU; 131649849Smarcel } /* end: 'if (GET_CODE ... ' */ 131749849Smarcel } /* end: 'if (src && src->index...' */ 131849849Smarcel } /* end: 'if (dest && dest->index...' */ 131949849Smarcel } /* end: 'if (crossing_edges[i]...' */ 132049849Smarcel } /* end for loop */ 132149849Smarcel} 132249849Smarcel 132349849Smarcel/* Find any bb's where the fall-through edge is a crossing edge (note that 132449849Smarcel these bb's must also contain a conditional jump; we've already 132549849Smarcel dealt with fall-through edges for blocks that didn't have a 132649849Smarcel conditional jump in the call to add_labels_and_missing_jumps). 132749849Smarcel Convert the fall-through edge to non-crossing edge by inserting a 132849849Smarcel new bb to fall-through into. The new bb will contain an 132949849Smarcel unconditional jump (crossing edge) to the original fall through 133049849Smarcel destination. */ 133149849Smarcel 133249849Smarcelstatic void 133349849Smarcelfix_up_fall_thru_edges (void) 133449849Smarcel{ 133549849Smarcel basic_block cur_bb; 133649849Smarcel basic_block new_bb; 133749849Smarcel edge succ1; 133849849Smarcel edge succ2; 133949849Smarcel edge fall_thru; 134049849Smarcel edge cond_jump = NULL; 134149849Smarcel edge e; 134249849Smarcel bool cond_jump_crosses; 134349849Smarcel int invert_worked; 134449849Smarcel rtx old_jump; 134549849Smarcel rtx fall_thru_label; 134649849Smarcel rtx barrier; 134749849Smarcel 134850818Smarcel FOR_EACH_BB (cur_bb) 134950818Smarcel { 135050818Smarcel fall_thru = NULL; 135150818Smarcel if (EDGE_COUNT (cur_bb->succs) > 0) 135250818Smarcel succ1 = EDGE_SUCC (cur_bb, 0); 135350818Smarcel else 135450818Smarcel succ1 = NULL; 135550818Smarcel 135650818Smarcel if (EDGE_COUNT (cur_bb->succs) > 1) 135750818Smarcel succ2 = EDGE_SUCC (cur_bb, 1); 135850818Smarcel else 135950818Smarcel succ2 = NULL; 136050818Smarcel 136150818Smarcel /* Find the fall-through edge. */ 136250818Smarcel 136350818Smarcel if (succ1 136450818Smarcel && (succ1->flags & EDGE_FALLTHRU)) 136550818Smarcel { 136650818Smarcel fall_thru = succ1; 136750818Smarcel cond_jump = succ2; 136850818Smarcel } 136950818Smarcel else if (succ2 137050818Smarcel && (succ2->flags & EDGE_FALLTHRU)) 137150818Smarcel { 137250818Smarcel fall_thru = succ2; 137350818Smarcel cond_jump = succ1; 137450818Smarcel } 137550818Smarcel 137650818Smarcel if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR)) 137750818Smarcel { 137850818Smarcel /* Check to see if the fall-thru edge is a crossing edge. */ 137950818Smarcel 138050818Smarcel if (fall_thru->flags & EDGE_CROSSING) 138150818Smarcel { 138250818Smarcel /* The fall_thru edge crosses; now check the cond jump edge, if 138350818Smarcel it exists. */ 138450818Smarcel 138550818Smarcel cond_jump_crosses = true; 138650818Smarcel invert_worked = 0; 138750818Smarcel old_jump = BB_END (cur_bb); 138850818Smarcel 138950818Smarcel /* Find the jump instruction, if there is one. */ 139050818Smarcel 139150818Smarcel if (cond_jump) 139250818Smarcel { 139350818Smarcel if (!(cond_jump->flags & EDGE_CROSSING)) 139450818Smarcel cond_jump_crosses = false; 139550818Smarcel 139650818Smarcel /* We know the fall-thru edge crosses; if the cond 139750818Smarcel jump edge does NOT cross, and its destination is the 139850818Smarcel next block in the bb order, invert the jump 139950818Smarcel (i.e. fix it so the fall thru does not cross and 140050818Smarcel the cond jump does). */ 140150818Smarcel 140250818Smarcel if (!cond_jump_crosses 140350818Smarcel && cur_bb->aux == cond_jump->dest) 140450818Smarcel { 140550818Smarcel /* Find label in fall_thru block. We've already added 140650818Smarcel any missing labels, so there must be one. */ 140750818Smarcel 140850818Smarcel fall_thru_label = block_label (fall_thru->dest); 140950818Smarcel 141050818Smarcel if (old_jump && fall_thru_label) 141150818Smarcel invert_worked = invert_jump (old_jump, 141250818Smarcel fall_thru_label,0); 141350818Smarcel if (invert_worked) 141450818Smarcel { 141550818Smarcel fall_thru->flags &= ~EDGE_FALLTHRU; 141650818Smarcel cond_jump->flags |= EDGE_FALLTHRU; 141750818Smarcel update_br_prob_note (cur_bb); 141850818Smarcel e = fall_thru; 141950818Smarcel fall_thru = cond_jump; 142050818Smarcel cond_jump = e; 142150818Smarcel cond_jump->flags |= EDGE_CROSSING; 142250818Smarcel fall_thru->flags &= ~EDGE_CROSSING; 142350818Smarcel } 142450818Smarcel } 142550818Smarcel } 142650818Smarcel 142750818Smarcel if (cond_jump_crosses || !invert_worked) 142850818Smarcel { 142950818Smarcel /* This is the case where both edges out of the basic 1430 block are crossing edges. Here we will fix up the 1431 fall through edge. The jump edge will be taken care 1432 of later. */ 1433 1434 new_bb = force_nonfallthru (fall_thru); 1435 1436 if (new_bb) 1437 { 1438 new_bb->aux = cur_bb->aux; 1439 cur_bb->aux = new_bb; 1440 1441 /* Make sure new fall-through bb is in same 1442 partition as bb it's falling through from. */ 1443 1444 BB_COPY_PARTITION (new_bb, cur_bb); 1445 single_succ_edge (new_bb)->flags |= EDGE_CROSSING; 1446 } 1447 1448 /* Add barrier after new jump */ 1449 1450 if (new_bb) 1451 { 1452 barrier = emit_barrier_after (BB_END (new_bb)); 1453 new_bb->il.rtl->footer = unlink_insn_chain (barrier, 1454 barrier); 1455 } 1456 else 1457 { 1458 barrier = emit_barrier_after (BB_END (cur_bb)); 1459 cur_bb->il.rtl->footer = unlink_insn_chain (barrier, 1460 barrier); 1461 } 1462 } 1463 } 1464 } 1465 } 1466} 1467 1468/* This function checks the destination blockof a "crossing jump" to 1469 see if it has any crossing predecessors that begin with a code label 1470 and end with an unconditional jump. If so, it returns that predecessor 1471 block. (This is to avoid creating lots of new basic blocks that all 1472 contain unconditional jumps to the same destination). */ 1473 1474static basic_block 1475find_jump_block (basic_block jump_dest) 1476{ 1477 basic_block source_bb = NULL; 1478 edge e; 1479 rtx insn; 1480 edge_iterator ei; 1481 1482 FOR_EACH_EDGE (e, ei, jump_dest->preds) 1483 if (e->flags & EDGE_CROSSING) 1484 { 1485 basic_block src = e->src; 1486 1487 /* Check each predecessor to see if it has a label, and contains 1488 only one executable instruction, which is an unconditional jump. 1489 If so, we can use it. */ 1490 1491 if (LABEL_P (BB_HEAD (src))) 1492 for (insn = BB_HEAD (src); 1493 !INSN_P (insn) && insn != NEXT_INSN (BB_END (src)); 1494 insn = NEXT_INSN (insn)) 1495 { 1496 if (INSN_P (insn) 1497 && insn == BB_END (src) 1498 && JUMP_P (insn) 1499 && !any_condjump_p (insn)) 1500 { 1501 source_bb = src; 1502 break; 1503 } 1504 } 1505 1506 if (source_bb) 1507 break; 1508 } 1509 1510 return source_bb; 1511} 1512 1513/* Find all BB's with conditional jumps that are crossing edges; 1514 insert a new bb and make the conditional jump branch to the new 1515 bb instead (make the new bb same color so conditional branch won't 1516 be a 'crossing' edge). Insert an unconditional jump from the 1517 new bb to the original destination of the conditional jump. */ 1518 1519static void 1520fix_crossing_conditional_branches (void) 1521{ 1522 basic_block cur_bb; 1523 basic_block new_bb; 1524 basic_block last_bb; 1525 basic_block dest; 1526 basic_block prev_bb; 1527 edge succ1; 1528 edge succ2; 1529 edge crossing_edge; 1530 edge new_edge; 1531 rtx old_jump; 1532 rtx set_src; 1533 rtx old_label = NULL_RTX; 1534 rtx new_label; 1535 rtx new_jump; 1536 rtx barrier; 1537 1538 last_bb = EXIT_BLOCK_PTR->prev_bb; 1539 1540 FOR_EACH_BB (cur_bb) 1541 { 1542 crossing_edge = NULL; 1543 if (EDGE_COUNT (cur_bb->succs) > 0) 1544 succ1 = EDGE_SUCC (cur_bb, 0); 1545 else 1546 succ1 = NULL; 1547 1548 if (EDGE_COUNT (cur_bb->succs) > 1) 1549 succ2 = EDGE_SUCC (cur_bb, 1); 1550 else 1551 succ2 = NULL; 1552 1553 /* We already took care of fall-through edges, so only one successor 1554 can be a crossing edge. */ 1555 1556 if (succ1 && (succ1->flags & EDGE_CROSSING)) 1557 crossing_edge = succ1; 1558 else if (succ2 && (succ2->flags & EDGE_CROSSING)) 1559 crossing_edge = succ2; 1560 1561 if (crossing_edge) 1562 { 1563 old_jump = BB_END (cur_bb); 1564 1565 /* Check to make sure the jump instruction is a 1566 conditional jump. */ 1567 1568 set_src = NULL_RTX; 1569 1570 if (any_condjump_p (old_jump)) 1571 { 1572 if (GET_CODE (PATTERN (old_jump)) == SET) 1573 set_src = SET_SRC (PATTERN (old_jump)); 1574 else if (GET_CODE (PATTERN (old_jump)) == PARALLEL) 1575 { 1576 set_src = XVECEXP (PATTERN (old_jump), 0,0); 1577 if (GET_CODE (set_src) == SET) 1578 set_src = SET_SRC (set_src); 1579 else 1580 set_src = NULL_RTX; 1581 } 1582 } 1583 1584 if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE)) 1585 { 1586 if (GET_CODE (XEXP (set_src, 1)) == PC) 1587 old_label = XEXP (set_src, 2); 1588 else if (GET_CODE (XEXP (set_src, 2)) == PC) 1589 old_label = XEXP (set_src, 1); 1590 1591 /* Check to see if new bb for jumping to that dest has 1592 already been created; if so, use it; if not, create 1593 a new one. */ 1594 1595 new_bb = find_jump_block (crossing_edge->dest); 1596 1597 if (new_bb) 1598 new_label = block_label (new_bb); 1599 else 1600 { 1601 /* Create new basic block to be dest for 1602 conditional jump. */ 1603 1604 new_bb = create_basic_block (NULL, NULL, last_bb); 1605 new_bb->aux = last_bb->aux; 1606 last_bb->aux = new_bb; 1607 prev_bb = last_bb; 1608 last_bb = new_bb; 1609 1610 /* Update register liveness information. */ 1611 1612 new_bb->il.rtl->global_live_at_start = ALLOC_REG_SET (®_obstack); 1613 new_bb->il.rtl->global_live_at_end = ALLOC_REG_SET (®_obstack); 1614 COPY_REG_SET (new_bb->il.rtl->global_live_at_end, 1615 prev_bb->il.rtl->global_live_at_end); 1616 COPY_REG_SET (new_bb->il.rtl->global_live_at_start, 1617 prev_bb->il.rtl->global_live_at_end); 1618 1619 /* Put appropriate instructions in new bb. */ 1620 1621 new_label = gen_label_rtx (); 1622 emit_label_before (new_label, BB_HEAD (new_bb)); 1623 BB_HEAD (new_bb) = new_label; 1624 1625 if (GET_CODE (old_label) == LABEL_REF) 1626 { 1627 old_label = JUMP_LABEL (old_jump); 1628 new_jump = emit_jump_insn_after (gen_jump 1629 (old_label), 1630 BB_END (new_bb)); 1631 } 1632 else 1633 { 1634 gcc_assert (HAVE_return 1635 && GET_CODE (old_label) == RETURN); 1636 new_jump = emit_jump_insn_after (gen_return (), 1637 BB_END (new_bb)); 1638 } 1639 1640 barrier = emit_barrier_after (new_jump); 1641 JUMP_LABEL (new_jump) = old_label; 1642 new_bb->il.rtl->footer = unlink_insn_chain (barrier, 1643 barrier); 1644 1645 /* Make sure new bb is in same partition as source 1646 of conditional branch. */ 1647 BB_COPY_PARTITION (new_bb, cur_bb); 1648 } 1649 1650 /* Make old jump branch to new bb. */ 1651 1652 redirect_jump (old_jump, new_label, 0); 1653 1654 /* Remove crossing_edge as predecessor of 'dest'. */ 1655 1656 dest = crossing_edge->dest; 1657 1658 redirect_edge_succ (crossing_edge, new_bb); 1659 1660 /* Make a new edge from new_bb to old dest; new edge 1661 will be a successor for new_bb and a predecessor 1662 for 'dest'. */ 1663 1664 if (EDGE_COUNT (new_bb->succs) == 0) 1665 new_edge = make_edge (new_bb, dest, 0); 1666 else 1667 new_edge = EDGE_SUCC (new_bb, 0); 1668 1669 crossing_edge->flags &= ~EDGE_CROSSING; 1670 new_edge->flags |= EDGE_CROSSING; 1671 } 1672 } 1673 } 1674} 1675 1676/* Find any unconditional branches that cross between hot and cold 1677 sections. Convert them into indirect jumps instead. */ 1678 1679static void 1680fix_crossing_unconditional_branches (void) 1681{ 1682 basic_block cur_bb; 1683 rtx last_insn; 1684 rtx label; 1685 rtx label_addr; 1686 rtx indirect_jump_sequence; 1687 rtx jump_insn = NULL_RTX; 1688 rtx new_reg; 1689 rtx cur_insn; 1690 edge succ; 1691 1692 FOR_EACH_BB (cur_bb) 1693 { 1694 last_insn = BB_END (cur_bb); 1695 1696 if (EDGE_COUNT (cur_bb->succs) < 1) 1697 continue; 1698 1699 succ = EDGE_SUCC (cur_bb, 0); 1700 1701 /* Check to see if bb ends in a crossing (unconditional) jump. At 1702 this point, no crossing jumps should be conditional. */ 1703 1704 if (JUMP_P (last_insn) 1705 && (succ->flags & EDGE_CROSSING)) 1706 { 1707 rtx label2, table; 1708 1709 gcc_assert (!any_condjump_p (last_insn)); 1710 1711 /* Make sure the jump is not already an indirect or table jump. */ 1712 1713 if (!computed_jump_p (last_insn) 1714 && !tablejump_p (last_insn, &label2, &table)) 1715 { 1716 /* We have found a "crossing" unconditional branch. Now 1717 we must convert it to an indirect jump. First create 1718 reference of label, as target for jump. */ 1719 1720 label = JUMP_LABEL (last_insn); 1721 label_addr = gen_rtx_LABEL_REF (Pmode, label); 1722 LABEL_NUSES (label) += 1; 1723 1724 /* Get a register to use for the indirect jump. */ 1725 1726 new_reg = gen_reg_rtx (Pmode); 1727 1728 /* Generate indirect the jump sequence. */ 1729 1730 start_sequence (); 1731 emit_move_insn (new_reg, label_addr); 1732 emit_indirect_jump (new_reg); 1733 indirect_jump_sequence = get_insns (); 1734 end_sequence (); 1735 1736 /* Make sure every instruction in the new jump sequence has 1737 its basic block set to be cur_bb. */ 1738 1739 for (cur_insn = indirect_jump_sequence; cur_insn; 1740 cur_insn = NEXT_INSN (cur_insn)) 1741 { 1742 if (!BARRIER_P (cur_insn)) 1743 BLOCK_FOR_INSN (cur_insn) = cur_bb; 1744 if (JUMP_P (cur_insn)) 1745 jump_insn = cur_insn; 1746 } 1747 1748 /* Insert the new (indirect) jump sequence immediately before 1749 the unconditional jump, then delete the unconditional jump. */ 1750 1751 emit_insn_before (indirect_jump_sequence, last_insn); 1752 delete_insn (last_insn); 1753 1754 /* Make BB_END for cur_bb be the jump instruction (NOT the 1755 barrier instruction at the end of the sequence...). */ 1756 1757 BB_END (cur_bb) = jump_insn; 1758 } 1759 } 1760 } 1761} 1762 1763/* Add REG_CROSSING_JUMP note to all crossing jump insns. */ 1764 1765static void 1766add_reg_crossing_jump_notes (void) 1767{ 1768 basic_block bb; 1769 edge e; 1770 edge_iterator ei; 1771 1772 FOR_EACH_BB (bb) 1773 FOR_EACH_EDGE (e, ei, bb->succs) 1774 if ((e->flags & EDGE_CROSSING) 1775 && JUMP_P (BB_END (e->src))) 1776 REG_NOTES (BB_END (e->src)) = gen_rtx_EXPR_LIST (REG_CROSSING_JUMP, 1777 NULL_RTX, 1778 REG_NOTES (BB_END 1779 (e->src))); 1780} 1781 1782/* Hot and cold basic blocks are partitioned and put in separate 1783 sections of the .o file, to reduce paging and improve cache 1784 performance (hopefully). This can result in bits of code from the 1785 same function being widely separated in the .o file. However this 1786 is not obvious to the current bb structure. Therefore we must take 1787 care to ensure that: 1). There are no fall_thru edges that cross 1788 between sections; 2). For those architectures which have "short" 1789 conditional branches, all conditional branches that attempt to 1790 cross between sections are converted to unconditional branches; 1791 and, 3). For those architectures which have "short" unconditional 1792 branches, all unconditional branches that attempt to cross between 1793 sections are converted to indirect jumps. 1794 1795 The code for fixing up fall_thru edges that cross between hot and 1796 cold basic blocks does so by creating new basic blocks containing 1797 unconditional branches to the appropriate label in the "other" 1798 section. The new basic block is then put in the same (hot or cold) 1799 section as the original conditional branch, and the fall_thru edge 1800 is modified to fall into the new basic block instead. By adding 1801 this level of indirection we end up with only unconditional branches 1802 crossing between hot and cold sections. 1803 1804 Conditional branches are dealt with by adding a level of indirection. 1805 A new basic block is added in the same (hot/cold) section as the 1806 conditional branch, and the conditional branch is retargeted to the 1807 new basic block. The new basic block contains an unconditional branch 1808 to the original target of the conditional branch (in the other section). 1809 1810 Unconditional branches are dealt with by converting them into 1811 indirect jumps. */ 1812 1813static void 1814fix_edges_for_rarely_executed_code (edge *crossing_edges, 1815 int n_crossing_edges) 1816{ 1817 /* Make sure the source of any crossing edge ends in a jump and the 1818 destination of any crossing edge has a label. */ 1819 1820 add_labels_and_missing_jumps (crossing_edges, n_crossing_edges); 1821 1822 /* Convert all crossing fall_thru edges to non-crossing fall 1823 thrus to unconditional jumps (that jump to the original fall 1824 thru dest). */ 1825 1826 fix_up_fall_thru_edges (); 1827 1828 /* If the architecture does not have conditional branches that can 1829 span all of memory, convert crossing conditional branches into 1830 crossing unconditional branches. */ 1831 1832 if (!HAS_LONG_COND_BRANCH) 1833 fix_crossing_conditional_branches (); 1834 1835 /* If the architecture does not have unconditional branches that 1836 can span all of memory, convert crossing unconditional branches 1837 into indirect jumps. Since adding an indirect jump also adds 1838 a new register usage, update the register usage information as 1839 well. */ 1840 1841 if (!HAS_LONG_UNCOND_BRANCH) 1842 { 1843 fix_crossing_unconditional_branches (); 1844 reg_scan (get_insns(), max_reg_num ()); 1845 } 1846 1847 add_reg_crossing_jump_notes (); 1848} 1849 1850/* Verify, in the basic block chain, that there is at most one switch 1851 between hot/cold partitions. This is modelled on 1852 rtl_verify_flow_info_1, but it cannot go inside that function 1853 because this condition will not be true until after 1854 reorder_basic_blocks is called. */ 1855 1856static void 1857verify_hot_cold_block_grouping (void) 1858{ 1859 basic_block bb; 1860 int err = 0; 1861 bool switched_sections = false; 1862 int current_partition = 0; 1863 1864 FOR_EACH_BB (bb) 1865 { 1866 if (!current_partition) 1867 current_partition = BB_PARTITION (bb); 1868 if (BB_PARTITION (bb) != current_partition) 1869 { 1870 if (switched_sections) 1871 { 1872 error ("multiple hot/cold transitions found (bb %i)", 1873 bb->index); 1874 err = 1; 1875 } 1876 else 1877 { 1878 switched_sections = true; 1879 current_partition = BB_PARTITION (bb); 1880 } 1881 } 1882 } 1883 1884 gcc_assert(!err); 1885} 1886 1887/* Reorder basic blocks. The main entry point to this file. FLAGS is 1888 the set of flags to pass to cfg_layout_initialize(). */ 1889 1890void 1891reorder_basic_blocks (unsigned int flags) 1892{ 1893 int n_traces; 1894 int i; 1895 struct trace *traces; 1896 1897 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1) 1898 return; 1899 1900 if (targetm.cannot_modify_jumps_p ()) 1901 return; 1902 1903 cfg_layout_initialize (flags); 1904 1905 set_edge_can_fallthru_flag (); 1906 mark_dfs_back_edges (); 1907 1908 /* We are estimating the length of uncond jump insn only once since the code 1909 for getting the insn length always returns the minimal length now. */ 1910 if (uncond_jump_length == 0) 1911 uncond_jump_length = get_uncond_jump_length (); 1912 1913 /* We need to know some information for each basic block. */ 1914 array_size = GET_ARRAY_SIZE (last_basic_block); 1915 bbd = XNEWVEC (bbro_basic_block_data, array_size); 1916 for (i = 0; i < array_size; i++) 1917 { 1918 bbd[i].start_of_trace = -1; 1919 bbd[i].in_trace = -1; 1920 bbd[i].end_of_trace = -1; 1921 bbd[i].heap = NULL; 1922 bbd[i].node = NULL; 1923 } 1924 1925 traces = XNEWVEC (struct trace, n_basic_blocks); 1926 n_traces = 0; 1927 find_traces (&n_traces, traces); 1928 connect_traces (n_traces, traces); 1929 FREE (traces); 1930 FREE (bbd); 1931 1932 if (dump_file) 1933 dump_flow_info (dump_file, dump_flags); 1934 1935 cfg_layout_finalize (); 1936 if (flag_reorder_blocks_and_partition) 1937 verify_hot_cold_block_grouping (); 1938} 1939 1940/* Determine which partition the first basic block in the function 1941 belongs to, then find the first basic block in the current function 1942 that belongs to a different section, and insert a 1943 NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the 1944 instruction stream. When writing out the assembly code, 1945 encountering this note will make the compiler switch between the 1946 hot and cold text sections. */ 1947 1948static void 1949insert_section_boundary_note (void) 1950{ 1951 basic_block bb; 1952 rtx new_note; 1953 int first_partition = 0; 1954 1955 if (flag_reorder_blocks_and_partition) 1956 FOR_EACH_BB (bb) 1957 { 1958 if (!first_partition) 1959 first_partition = BB_PARTITION (bb); 1960 if (BB_PARTITION (bb) != first_partition) 1961 { 1962 new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, 1963 BB_HEAD (bb)); 1964 break; 1965 } 1966 } 1967} 1968 1969/* Duplicate the blocks containing computed gotos. This basically unfactors 1970 computed gotos that were factored early on in the compilation process to 1971 speed up edge based data flow. We used to not unfactoring them again, 1972 which can seriously pessimize code with many computed jumps in the source 1973 code, such as interpreters. See e.g. PR15242. */ 1974 1975static bool 1976gate_duplicate_computed_gotos (void) 1977{ 1978 return (optimize > 0 && flag_expensive_optimizations && !optimize_size); 1979} 1980 1981 1982static unsigned int 1983duplicate_computed_gotos (void) 1984{ 1985 basic_block bb, new_bb; 1986 bitmap candidates; 1987 int max_size; 1988 1989 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1) 1990 return 0; 1991 1992 if (targetm.cannot_modify_jumps_p ()) 1993 return 0; 1994 1995 cfg_layout_initialize (0); 1996 1997 /* We are estimating the length of uncond jump insn only once 1998 since the code for getting the insn length always returns 1999 the minimal length now. */ 2000 if (uncond_jump_length == 0) 2001 uncond_jump_length = get_uncond_jump_length (); 2002 2003 max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS); 2004 candidates = BITMAP_ALLOC (NULL); 2005 2006 /* Look for blocks that end in a computed jump, and see if such blocks 2007 are suitable for unfactoring. If a block is a candidate for unfactoring, 2008 mark it in the candidates. */ 2009 FOR_EACH_BB (bb) 2010 { 2011 rtx insn; 2012 edge e; 2013 edge_iterator ei; 2014 int size, all_flags; 2015 2016 /* Build the reorder chain for the original order of blocks. */ 2017 if (bb->next_bb != EXIT_BLOCK_PTR) 2018 bb->aux = bb->next_bb; 2019 2020 /* Obviously the block has to end in a computed jump. */ 2021 if (!computed_jump_p (BB_END (bb))) 2022 continue; 2023 2024 /* Only consider blocks that can be duplicated. */ 2025 if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX) 2026 || !can_duplicate_block_p (bb)) 2027 continue; 2028 2029 /* Make sure that the block is small enough. */ 2030 size = 0; 2031 FOR_BB_INSNS (bb, insn) 2032 if (INSN_P (insn)) 2033 { 2034 size += get_attr_min_length (insn); 2035 if (size > max_size) 2036 break; 2037 } 2038 if (size > max_size) 2039 continue; 2040 2041 /* Final check: there must not be any incoming abnormal edges. */ 2042 all_flags = 0; 2043 FOR_EACH_EDGE (e, ei, bb->preds) 2044 all_flags |= e->flags; 2045 if (all_flags & EDGE_COMPLEX) 2046 continue; 2047 2048 bitmap_set_bit (candidates, bb->index); 2049 } 2050 2051 /* Nothing to do if there is no computed jump here. */ 2052 if (bitmap_empty_p (candidates)) 2053 goto done; 2054 2055 /* Duplicate computed gotos. */ 2056 FOR_EACH_BB (bb) 2057 { 2058 if (bb->il.rtl->visited) 2059 continue; 2060 2061 bb->il.rtl->visited = 1; 2062 2063 /* BB must have one outgoing edge. That edge must not lead to 2064 the exit block or the next block. 2065 The destination must have more than one predecessor. */ 2066 if (!single_succ_p (bb) 2067 || single_succ (bb) == EXIT_BLOCK_PTR 2068 || single_succ (bb) == bb->next_bb 2069 || single_pred_p (single_succ (bb))) 2070 continue; 2071 2072 /* The successor block has to be a duplication candidate. */ 2073 if (!bitmap_bit_p (candidates, single_succ (bb)->index)) 2074 continue; 2075 2076 new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb); 2077 new_bb->aux = bb->aux; 2078 bb->aux = new_bb; 2079 new_bb->il.rtl->visited = 1; 2080 } 2081 2082done: 2083 cfg_layout_finalize (); 2084 2085 BITMAP_FREE (candidates); 2086 return 0; 2087} 2088 2089struct tree_opt_pass pass_duplicate_computed_gotos = 2090{ 2091 "compgotos", /* name */ 2092 gate_duplicate_computed_gotos, /* gate */ 2093 duplicate_computed_gotos, /* execute */ 2094 NULL, /* sub */ 2095 NULL, /* next */ 2096 0, /* static_pass_number */ 2097 TV_REORDER_BLOCKS, /* tv_id */ 2098 0, /* properties_required */ 2099 0, /* properties_provided */ 2100 0, /* properties_destroyed */ 2101 0, /* todo_flags_start */ 2102 TODO_dump_func, /* todo_flags_finish */ 2103 0 /* letter */ 2104}; 2105 2106 2107/* This function is the main 'entrance' for the optimization that 2108 partitions hot and cold basic blocks into separate sections of the 2109 .o file (to improve performance and cache locality). Ideally it 2110 would be called after all optimizations that rearrange the CFG have 2111 been called. However part of this optimization may introduce new 2112 register usage, so it must be called before register allocation has 2113 occurred. This means that this optimization is actually called 2114 well before the optimization that reorders basic blocks (see 2115 function above). 2116 2117 This optimization checks the feedback information to determine 2118 which basic blocks are hot/cold, updates flags on the basic blocks 2119 to indicate which section they belong in. This information is 2120 later used for writing out sections in the .o file. Because hot 2121 and cold sections can be arbitrarily large (within the bounds of 2122 memory), far beyond the size of a single function, it is necessary 2123 to fix up all edges that cross section boundaries, to make sure the 2124 instructions used can actually span the required distance. The 2125 fixes are described below. 2126 2127 Fall-through edges must be changed into jumps; it is not safe or 2128 legal to fall through across a section boundary. Whenever a 2129 fall-through edge crossing a section boundary is encountered, a new 2130 basic block is inserted (in the same section as the fall-through 2131 source), and the fall through edge is redirected to the new basic 2132 block. The new basic block contains an unconditional jump to the 2133 original fall-through target. (If the unconditional jump is 2134 insufficient to cross section boundaries, that is dealt with a 2135 little later, see below). 2136 2137 In order to deal with architectures that have short conditional 2138 branches (which cannot span all of memory) we take any conditional 2139 jump that attempts to cross a section boundary and add a level of 2140 indirection: it becomes a conditional jump to a new basic block, in 2141 the same section. The new basic block contains an unconditional 2142 jump to the original target, in the other section. 2143 2144 For those architectures whose unconditional branch is also 2145 incapable of reaching all of memory, those unconditional jumps are 2146 converted into indirect jumps, through a register. 2147 2148 IMPORTANT NOTE: This optimization causes some messy interactions 2149 with the cfg cleanup optimizations; those optimizations want to 2150 merge blocks wherever possible, and to collapse indirect jump 2151 sequences (change "A jumps to B jumps to C" directly into "A jumps 2152 to C"). Those optimizations can undo the jump fixes that 2153 partitioning is required to make (see above), in order to ensure 2154 that jumps attempting to cross section boundaries are really able 2155 to cover whatever distance the jump requires (on many architectures 2156 conditional or unconditional jumps are not able to reach all of 2157 memory). Therefore tests have to be inserted into each such 2158 optimization to make sure that it does not undo stuff necessary to 2159 cross partition boundaries. This would be much less of a problem 2160 if we could perform this optimization later in the compilation, but 2161 unfortunately the fact that we may need to create indirect jumps 2162 (through registers) requires that this optimization be performed 2163 before register allocation. */ 2164 2165static void 2166partition_hot_cold_basic_blocks (void) 2167{ 2168 basic_block cur_bb; 2169 edge *crossing_edges; 2170 int n_crossing_edges; 2171 int max_edges = 2 * last_basic_block; 2172 2173 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1) 2174 return; 2175 2176 crossing_edges = XCNEWVEC (edge, max_edges); 2177 2178 cfg_layout_initialize (0); 2179 2180 FOR_EACH_BB (cur_bb) 2181 if (cur_bb->index >= NUM_FIXED_BLOCKS 2182 && cur_bb->next_bb->index >= NUM_FIXED_BLOCKS) 2183 cur_bb->aux = cur_bb->next_bb; 2184 2185 find_rarely_executed_basic_blocks_and_crossing_edges (crossing_edges, 2186 &n_crossing_edges, 2187 &max_edges); 2188 2189 if (n_crossing_edges > 0) 2190 fix_edges_for_rarely_executed_code (crossing_edges, n_crossing_edges); 2191 2192 free (crossing_edges); 2193 2194 cfg_layout_finalize(); 2195} 2196 2197static bool 2198gate_handle_reorder_blocks (void) 2199{ 2200 return (optimize > 0); 2201} 2202 2203 2204/* Reorder basic blocks. */ 2205static unsigned int 2206rest_of_handle_reorder_blocks (void) 2207{ 2208 bool changed; 2209 unsigned int liveness_flags; 2210 2211 /* Last attempt to optimize CFG, as scheduling, peepholing and insn 2212 splitting possibly introduced more crossjumping opportunities. */ 2213 liveness_flags = (!HAVE_conditional_execution ? CLEANUP_UPDATE_LIFE : 0); 2214 changed = cleanup_cfg (CLEANUP_EXPENSIVE | liveness_flags); 2215 2216 if (flag_sched2_use_traces && flag_schedule_insns_after_reload) 2217 { 2218 timevar_push (TV_TRACER); 2219 tracer (liveness_flags); 2220 timevar_pop (TV_TRACER); 2221 } 2222 2223 if (flag_reorder_blocks || flag_reorder_blocks_and_partition) 2224 reorder_basic_blocks (liveness_flags); 2225 if (flag_reorder_blocks || flag_reorder_blocks_and_partition 2226 || (flag_sched2_use_traces && flag_schedule_insns_after_reload)) 2227 changed |= cleanup_cfg (CLEANUP_EXPENSIVE | liveness_flags); 2228 2229 /* On conditional execution targets we can not update the life cheaply, so 2230 we deffer the updating to after both cleanups. This may lose some cases 2231 but should not be terribly bad. */ 2232 if (changed && HAVE_conditional_execution) 2233 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, 2234 PROP_DEATH_NOTES); 2235 2236 /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes. */ 2237 insert_section_boundary_note (); 2238 return 0; 2239} 2240 2241struct tree_opt_pass pass_reorder_blocks = 2242{ 2243 "bbro", /* name */ 2244 gate_handle_reorder_blocks, /* gate */ 2245 rest_of_handle_reorder_blocks, /* execute */ 2246 NULL, /* sub */ 2247 NULL, /* next */ 2248 0, /* static_pass_number */ 2249 TV_REORDER_BLOCKS, /* tv_id */ 2250 0, /* properties_required */ 2251 0, /* properties_provided */ 2252 0, /* properties_destroyed */ 2253 0, /* todo_flags_start */ 2254 TODO_dump_func, /* todo_flags_finish */ 2255 'B' /* letter */ 2256}; 2257 2258static bool 2259gate_handle_partition_blocks (void) 2260{ 2261 /* The optimization to partition hot/cold basic blocks into separate 2262 sections of the .o file does not work well with linkonce or with 2263 user defined section attributes. Don't call it if either case 2264 arises. */ 2265 2266 return (flag_reorder_blocks_and_partition 2267 && !DECL_ONE_ONLY (current_function_decl) 2268 && !user_defined_section_attribute); 2269} 2270 2271/* Partition hot and cold basic blocks. */ 2272static unsigned int 2273rest_of_handle_partition_blocks (void) 2274{ 2275 no_new_pseudos = 0; 2276 partition_hot_cold_basic_blocks (); 2277 allocate_reg_life_data (); 2278 update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, 2279 PROP_LOG_LINKS | PROP_REG_INFO | PROP_DEATH_NOTES); 2280 no_new_pseudos = 1; 2281 return 0; 2282} 2283 2284struct tree_opt_pass pass_partition_blocks = 2285{ 2286 "bbpart", /* name */ 2287 gate_handle_partition_blocks, /* gate */ 2288 rest_of_handle_partition_blocks, /* execute */ 2289 NULL, /* sub */ 2290 NULL, /* next */ 2291 0, /* static_pass_number */ 2292 TV_REORDER_BLOCKS, /* tv_id */ 2293 0, /* properties_required */ 2294 0, /* properties_provided */ 2295 0, /* properties_destroyed */ 2296 0, /* todo_flags_start */ 2297 TODO_dump_func, /* todo_flags_finish */ 2298 0 /* letter */ 2299}; 2300 2301 2302