1117395Skan/* The tracer pass for the GNU compiler. 2117395Skan Contributed by Jan Hubicka, SuSE Labs. 3169689Skan Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. 4117395Skan 5117395Skan This file is part of GCC. 6117395Skan 7117395Skan GCC is free software; you can redistribute it and/or modify it 8117395Skan under the terms of the GNU General Public License as published by 9117395Skan the Free Software Foundation; either version 2, or (at your option) 10117395Skan any later version. 11117395Skan 12117395Skan GCC is distributed in the hope that it will be useful, but WITHOUT 13117395Skan ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14117395Skan or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15117395Skan License for more details. 16117395Skan 17117395Skan You should have received a copy of the GNU General Public License 18117395Skan along with GCC; see the file COPYING. If not, write to the Free 19169689Skan Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 20169689Skan 02110-1301, USA. */ 21117395Skan 22117395Skan/* This pass performs the tail duplication needed for superblock formation. 23117395Skan For more information see: 24117395Skan 25117395Skan Design and Analysis of Profile-Based Optimization in Compaq's 26117395Skan Compilation Tools for Alpha; Journal of Instruction-Level 27117395Skan Parallelism 3 (2000) 1-25 28117395Skan 29117395Skan Unlike Compaq's implementation we don't do the loop peeling as most 30117395Skan probably a better job can be done by a special pass and we don't 31117395Skan need to worry too much about the code size implications as the tail 32117395Skan duplicates are crossjumped again if optimizations are not 33117395Skan performed. */ 34117395Skan 35117395Skan 36117395Skan#include "config.h" 37117395Skan#include "system.h" 38132718Skan#include "coretypes.h" 39132718Skan#include "tm.h" 40117395Skan#include "tree.h" 41117395Skan#include "rtl.h" 42117395Skan#include "hard-reg-set.h" 43117395Skan#include "basic-block.h" 44117395Skan#include "output.h" 45117395Skan#include "cfglayout.h" 46117395Skan#include "fibheap.h" 47117395Skan#include "flags.h" 48132718Skan#include "timevar.h" 49117395Skan#include "params.h" 50132718Skan#include "coverage.h" 51169689Skan#include "tree-pass.h" 52117395Skan 53132718Skanstatic int count_insns (basic_block); 54132718Skanstatic bool ignore_bb_p (basic_block); 55132718Skanstatic bool better_p (edge, edge); 56132718Skanstatic edge find_best_successor (basic_block); 57132718Skanstatic edge find_best_predecessor (basic_block); 58132718Skanstatic int find_trace (basic_block, basic_block *); 59132718Skanstatic void tail_duplicate (void); 60132718Skanstatic void layout_superblocks (void); 61117395Skan 62117395Skan/* Minimal outgoing edge probability considered for superblock formation. */ 63117395Skanstatic int probability_cutoff; 64117395Skanstatic int branch_ratio_cutoff; 65117395Skan 66117395Skan/* Return true if BB has been seen - it is connected to some trace 67117395Skan already. */ 68117395Skan 69169689Skan#define seen(bb) (bb->il.rtl->visited || bb->aux) 70117395Skan 71117395Skan/* Return true if we should ignore the basic block for purposes of tracing. */ 72117395Skanstatic bool 73132718Skanignore_bb_p (basic_block bb) 74117395Skan{ 75169689Skan if (bb->index < NUM_FIXED_BLOCKS) 76117395Skan return true; 77117395Skan if (!maybe_hot_bb_p (bb)) 78117395Skan return true; 79117395Skan return false; 80117395Skan} 81117395Skan 82117395Skan/* Return number of instructions in the block. */ 83117395Skan 84117395Skanstatic int 85132718Skancount_insns (basic_block bb) 86117395Skan{ 87117395Skan rtx insn; 88117395Skan int n = 0; 89117395Skan 90132718Skan for (insn = BB_HEAD (bb); 91132718Skan insn != NEXT_INSN (BB_END (bb)); 92132718Skan insn = NEXT_INSN (insn)) 93117395Skan if (active_insn_p (insn)) 94117395Skan n++; 95117395Skan return n; 96117395Skan} 97117395Skan 98117395Skan/* Return true if E1 is more frequent than E2. */ 99117395Skanstatic bool 100132718Skanbetter_p (edge e1, edge e2) 101117395Skan{ 102117395Skan if (e1->count != e2->count) 103117395Skan return e1->count > e2->count; 104117395Skan if (e1->src->frequency * e1->probability != 105117395Skan e2->src->frequency * e2->probability) 106117395Skan return (e1->src->frequency * e1->probability 107117395Skan > e2->src->frequency * e2->probability); 108117395Skan /* This is needed to avoid changes in the decision after 109117395Skan CFG is modified. */ 110117395Skan if (e1->src != e2->src) 111117395Skan return e1->src->index > e2->src->index; 112117395Skan return e1->dest->index > e2->dest->index; 113117395Skan} 114117395Skan 115117395Skan/* Return most frequent successor of basic block BB. */ 116117395Skan 117117395Skanstatic edge 118132718Skanfind_best_successor (basic_block bb) 119117395Skan{ 120117395Skan edge e; 121117395Skan edge best = NULL; 122169689Skan edge_iterator ei; 123117395Skan 124169689Skan FOR_EACH_EDGE (e, ei, bb->succs) 125117395Skan if (!best || better_p (e, best)) 126117395Skan best = e; 127117395Skan if (!best || ignore_bb_p (best->dest)) 128117395Skan return NULL; 129117395Skan if (best->probability <= probability_cutoff) 130117395Skan return NULL; 131117395Skan return best; 132117395Skan} 133117395Skan 134117395Skan/* Return most frequent predecessor of basic block BB. */ 135117395Skan 136117395Skanstatic edge 137132718Skanfind_best_predecessor (basic_block bb) 138117395Skan{ 139117395Skan edge e; 140117395Skan edge best = NULL; 141169689Skan edge_iterator ei; 142117395Skan 143169689Skan FOR_EACH_EDGE (e, ei, bb->preds) 144117395Skan if (!best || better_p (e, best)) 145117395Skan best = e; 146117395Skan if (!best || ignore_bb_p (best->src)) 147117395Skan return NULL; 148117395Skan if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE 149117395Skan < bb->frequency * branch_ratio_cutoff) 150117395Skan return NULL; 151117395Skan return best; 152117395Skan} 153117395Skan 154117395Skan/* Find the trace using bb and record it in the TRACE array. 155117395Skan Return number of basic blocks recorded. */ 156117395Skan 157117395Skanstatic int 158132718Skanfind_trace (basic_block bb, basic_block *trace) 159117395Skan{ 160117395Skan int i = 0; 161117395Skan edge e; 162117395Skan 163169689Skan if (dump_file) 164169689Skan fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency); 165117395Skan 166117395Skan while ((e = find_best_predecessor (bb)) != NULL) 167117395Skan { 168117395Skan basic_block bb2 = e->src; 169117395Skan if (seen (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) 170117395Skan || find_best_successor (bb2) != e) 171117395Skan break; 172169689Skan if (dump_file) 173169689Skan fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); 174117395Skan bb = bb2; 175117395Skan } 176169689Skan if (dump_file) 177169689Skan fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency); 178117395Skan trace[i++] = bb; 179117395Skan 180117395Skan /* Follow the trace in forward direction. */ 181117395Skan while ((e = find_best_successor (bb)) != NULL) 182117395Skan { 183117395Skan bb = e->dest; 184117395Skan if (seen (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) 185117395Skan || find_best_predecessor (bb) != e) 186117395Skan break; 187169689Skan if (dump_file) 188169689Skan fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); 189117395Skan trace[i++] = bb; 190117395Skan } 191169689Skan if (dump_file) 192169689Skan fprintf (dump_file, "\n"); 193117395Skan return i; 194117395Skan} 195117395Skan 196117395Skan/* Look for basic blocks in frequency order, construct traces and tail duplicate 197117395Skan if profitable. */ 198117395Skan 199117395Skanstatic void 200132718Skantail_duplicate (void) 201117395Skan{ 202169689Skan fibnode_t *blocks = XCNEWVEC (fibnode_t, last_basic_block); 203169689Skan basic_block *trace = XNEWVEC (basic_block, n_basic_blocks); 204169689Skan int *counts = XNEWVEC (int, last_basic_block); 205117395Skan int ninsns = 0, nduplicated = 0; 206117395Skan gcov_type weighted_insns = 0, traced_insns = 0; 207117395Skan fibheap_t heap = fibheap_new (); 208117395Skan gcov_type cover_insns; 209117395Skan int max_dup_insns; 210117395Skan basic_block bb; 211117395Skan 212132718Skan if (profile_info && flag_branch_probabilities) 213117395Skan probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK); 214117395Skan else 215117395Skan probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY); 216117395Skan probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; 217117395Skan 218117395Skan branch_ratio_cutoff = 219117395Skan (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO)); 220117395Skan 221117395Skan FOR_EACH_BB (bb) 222117395Skan { 223117395Skan int n = count_insns (bb); 224117395Skan if (!ignore_bb_p (bb)) 225117395Skan blocks[bb->index] = fibheap_insert (heap, -bb->frequency, 226117395Skan bb); 227117395Skan 228117395Skan counts [bb->index] = n; 229117395Skan ninsns += n; 230117395Skan weighted_insns += n * bb->frequency; 231117395Skan } 232117395Skan 233132718Skan if (profile_info && flag_branch_probabilities) 234117395Skan cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK); 235117395Skan else 236117395Skan cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE); 237117395Skan cover_insns = (weighted_insns * cover_insns + 50) / 100; 238117395Skan max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100; 239117395Skan 240117395Skan while (traced_insns < cover_insns && nduplicated < max_dup_insns 241117395Skan && !fibheap_empty (heap)) 242117395Skan { 243117395Skan basic_block bb = fibheap_extract_min (heap); 244117395Skan int n, pos; 245117395Skan 246117395Skan if (!bb) 247117395Skan break; 248117395Skan 249117395Skan blocks[bb->index] = NULL; 250117395Skan 251117395Skan if (ignore_bb_p (bb)) 252117395Skan continue; 253169689Skan gcc_assert (!seen (bb)); 254117395Skan 255117395Skan n = find_trace (bb, trace); 256117395Skan 257117395Skan bb = trace[0]; 258117395Skan traced_insns += bb->frequency * counts [bb->index]; 259117395Skan if (blocks[bb->index]) 260117395Skan { 261117395Skan fibheap_delete_node (heap, blocks[bb->index]); 262117395Skan blocks[bb->index] = NULL; 263117395Skan } 264117395Skan 265117395Skan for (pos = 1; pos < n; pos++) 266117395Skan { 267117395Skan basic_block bb2 = trace[pos]; 268117395Skan 269117395Skan if (blocks[bb2->index]) 270117395Skan { 271117395Skan fibheap_delete_node (heap, blocks[bb2->index]); 272117395Skan blocks[bb2->index] = NULL; 273117395Skan } 274117395Skan traced_insns += bb2->frequency * counts [bb2->index]; 275169689Skan if (EDGE_COUNT (bb2->preds) > 1 276169689Skan && can_duplicate_block_p (bb2)) 277117395Skan { 278169689Skan edge e; 279117395Skan basic_block old = bb2; 280117395Skan 281169689Skan e = find_edge (bb, bb2); 282169689Skan 283117395Skan nduplicated += counts [bb2->index]; 284169689Skan bb2 = duplicate_block (bb2, e, bb); 285117395Skan 286117395Skan /* Reconsider the original copy of block we've duplicated. 287132718Skan Removing the most common predecessor may make it to be 288117395Skan head. */ 289117395Skan blocks[old->index] = 290117395Skan fibheap_insert (heap, -old->frequency, old); 291117395Skan 292169689Skan if (dump_file) 293169689Skan fprintf (dump_file, "Duplicated %i as %i [%i]\n", 294117395Skan old->index, bb2->index, bb2->frequency); 295117395Skan } 296169689Skan bb->aux = bb2; 297169689Skan bb2->il.rtl->visited = 1; 298117395Skan bb = bb2; 299117395Skan /* In case the trace became infrequent, stop duplicating. */ 300117395Skan if (ignore_bb_p (bb)) 301117395Skan break; 302117395Skan } 303169689Skan if (dump_file) 304169689Skan fprintf (dump_file, " covered now %.1f\n\n", 305117395Skan traced_insns * 100.0 / weighted_insns); 306117395Skan } 307169689Skan if (dump_file) 308169689Skan fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated, 309117395Skan nduplicated * 100 / ninsns); 310117395Skan 311117395Skan free (blocks); 312117395Skan free (trace); 313117395Skan free (counts); 314117395Skan fibheap_delete (heap); 315117395Skan} 316117395Skan 317132718Skan/* Connect the superblocks into linear sequence. At the moment we attempt to keep 318117395Skan the original order as much as possible, but the algorithm may be made smarter 319117395Skan later if needed. BB reordering pass should void most of the benefits of such 320117395Skan change though. */ 321117395Skan 322117395Skanstatic void 323132718Skanlayout_superblocks (void) 324117395Skan{ 325169689Skan basic_block end = single_succ (ENTRY_BLOCK_PTR); 326169689Skan basic_block bb = end->next_bb; 327117395Skan 328117395Skan while (bb != EXIT_BLOCK_PTR) 329117395Skan { 330169689Skan edge_iterator ei; 331117395Skan edge e, best = NULL; 332169689Skan while (end->aux) 333169689Skan end = end->aux; 334117395Skan 335169689Skan FOR_EACH_EDGE (e, ei, end->succs) 336117395Skan if (e->dest != EXIT_BLOCK_PTR 337169689Skan && e->dest != single_succ (ENTRY_BLOCK_PTR) 338169689Skan && !e->dest->il.rtl->visited 339117395Skan && (!best || EDGE_FREQUENCY (e) > EDGE_FREQUENCY (best))) 340117395Skan best = e; 341117395Skan 342117395Skan if (best) 343117395Skan { 344169689Skan end->aux = best->dest; 345169689Skan best->dest->il.rtl->visited = 1; 346117395Skan } 347117395Skan else 348132718Skan for (; bb != EXIT_BLOCK_PTR; bb = bb->next_bb) 349117395Skan { 350169689Skan if (!bb->il.rtl->visited) 351117395Skan { 352169689Skan end->aux = bb; 353169689Skan bb->il.rtl->visited = 1; 354117395Skan break; 355117395Skan } 356117395Skan } 357117395Skan } 358117395Skan} 359117395Skan 360132718Skan/* Main entry point to this file. FLAGS is the set of flags to pass 361132718Skan to cfg_layout_initialize(). */ 362117395Skan 363117395Skanvoid 364132718Skantracer (unsigned int flags) 365117395Skan{ 366169689Skan if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1) 367117395Skan return; 368132718Skan 369132718Skan cfg_layout_initialize (flags); 370117395Skan mark_dfs_back_edges (); 371169689Skan if (dump_file) 372169689Skan dump_flow_info (dump_file, dump_flags); 373117395Skan tail_duplicate (); 374117395Skan layout_superblocks (); 375169689Skan if (dump_file) 376169689Skan dump_flow_info (dump_file, dump_flags); 377117395Skan cfg_layout_finalize (); 378132718Skan 379117395Skan /* Merge basic blocks in duplicated traces. */ 380117395Skan cleanup_cfg (CLEANUP_EXPENSIVE); 381169689Skan} 382169689Skan 383169689Skanstatic bool 384169689Skangate_handle_tracer (void) 385169689Skan{ 386169689Skan return (optimize > 0 && flag_tracer); 387169689Skan} 388132718Skan 389169689Skan/* Run tracer. */ 390169689Skanstatic unsigned int 391169689Skanrest_of_handle_tracer (void) 392169689Skan{ 393169689Skan if (dump_file) 394169689Skan dump_flow_info (dump_file, dump_flags); 395169689Skan tracer (0); 396169689Skan cleanup_cfg (CLEANUP_EXPENSIVE); 397169689Skan reg_scan (get_insns (), max_reg_num ()); 398169689Skan return 0; 399117395Skan} 400169689Skan 401169689Skanstruct tree_opt_pass pass_tracer = 402169689Skan{ 403169689Skan "tracer", /* name */ 404169689Skan gate_handle_tracer, /* gate */ 405169689Skan rest_of_handle_tracer, /* execute */ 406169689Skan NULL, /* sub */ 407169689Skan NULL, /* next */ 408169689Skan 0, /* static_pass_number */ 409169689Skan TV_TRACER, /* tv_id */ 410169689Skan 0, /* properties_required */ 411169689Skan 0, /* properties_provided */ 412169689Skan 0, /* properties_destroyed */ 413169689Skan 0, /* todo_flags_start */ 414169689Skan TODO_dump_func, /* todo_flags_finish */ 415169689Skan 'T' /* letter */ 416169689Skan}; 417169689Skan 418