tracer.c revision 117395
1/* The tracer pass for the GNU compiler. 2 Contributed by Jan Hubicka, SuSE Labs. 3 Copyright (C) 2001, 2002 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING. If not, write to the Free 19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA 20 02111-1307, USA. */ 21 22/* This pass performs the tail duplication needed for superblock formation. 23 For more information see: 24 25 Design and Analysis of Profile-Based Optimization in Compaq's 26 Compilation Tools for Alpha; Journal of Instruction-Level 27 Parallelism 3 (2000) 1-25 28 29 Unlike Compaq's implementation we don't do the loop peeling as most 30 probably a better job can be done by a special pass and we don't 31 need to worry too much about the code size implications as the tail 32 duplicates are crossjumped again if optimizations are not 33 performed. */ 34 35 36#include "config.h" 37#include "system.h" 38#include "tree.h" 39#include "rtl.h" 40#include "hard-reg-set.h" 41#include "basic-block.h" 42#include "output.h" 43#include "cfglayout.h" 44#include "fibheap.h" 45#include "flags.h" 46#include "params.h" 47#include "profile.h" 48 49static int count_insns PARAMS ((basic_block)); 50static bool ignore_bb_p PARAMS ((basic_block)); 51static bool better_p PARAMS ((edge, edge)); 52static edge find_best_successor PARAMS ((basic_block)); 53static edge find_best_predecessor PARAMS ((basic_block)); 54static int find_trace PARAMS ((basic_block, basic_block *)); 55static void tail_duplicate PARAMS ((void)); 56static void layout_superblocks PARAMS ((void)); 57static bool ignore_bb_p PARAMS ((basic_block)); 58 59/* Minimal outgoing edge probability considered for superblock formation. */ 60static int probability_cutoff; 61static int branch_ratio_cutoff; 62 63/* Return true if BB has been seen - it is connected to some trace 64 already. */ 65 66#define seen(bb) (RBI (bb)->visited || RBI (bb)->next) 67 68/* Return true if we should ignore the basic block for purposes of tracing. */ 69static bool 70ignore_bb_p (bb) 71 basic_block bb; 72{ 73 if (bb->index < 0) 74 return true; 75 if (!maybe_hot_bb_p (bb)) 76 return true; 77 return false; 78} 79 80/* Return number of instructions in the block. */ 81 82static int 83count_insns (bb) 84 basic_block bb; 85{ 86 rtx insn; 87 int n = 0; 88 89 for (insn = bb->head; insn != NEXT_INSN (bb->end); insn = NEXT_INSN (insn)) 90 if (active_insn_p (insn)) 91 n++; 92 return n; 93} 94 95/* Return true if E1 is more frequent than E2. */ 96static bool 97better_p (e1, e2) 98 edge e1, e2; 99{ 100 if (e1->count != e2->count) 101 return e1->count > e2->count; 102 if (e1->src->frequency * e1->probability != 103 e2->src->frequency * e2->probability) 104 return (e1->src->frequency * e1->probability 105 > e2->src->frequency * e2->probability); 106 /* This is needed to avoid changes in the decision after 107 CFG is modified. */ 108 if (e1->src != e2->src) 109 return e1->src->index > e2->src->index; 110 return e1->dest->index > e2->dest->index; 111} 112 113/* Return most frequent successor of basic block BB. */ 114 115static edge 116find_best_successor (bb) 117 basic_block bb; 118{ 119 edge e; 120 edge best = NULL; 121 122 for (e = bb->succ; e; e = e->succ_next) 123 if (!best || better_p (e, best)) 124 best = e; 125 if (!best || ignore_bb_p (best->dest)) 126 return NULL; 127 if (best->probability <= probability_cutoff) 128 return NULL; 129 return best; 130} 131 132/* Return most frequent predecessor of basic block BB. */ 133 134static edge 135find_best_predecessor (bb) 136 basic_block bb; 137{ 138 edge e; 139 edge best = NULL; 140 141 for (e = bb->pred; e; e = e->pred_next) 142 if (!best || better_p (e, best)) 143 best = e; 144 if (!best || ignore_bb_p (best->src)) 145 return NULL; 146 if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE 147 < bb->frequency * branch_ratio_cutoff) 148 return NULL; 149 return best; 150} 151 152/* Find the trace using bb and record it in the TRACE array. 153 Return number of basic blocks recorded. */ 154 155static int 156find_trace (bb, trace) 157 basic_block bb; 158 basic_block *trace; 159{ 160 int i = 0; 161 edge e; 162 163 if (rtl_dump_file) 164 fprintf (rtl_dump_file, "Trace seed %i [%i]", bb->index, bb->frequency); 165 166 while ((e = find_best_predecessor (bb)) != NULL) 167 { 168 basic_block bb2 = e->src; 169 if (seen (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) 170 || find_best_successor (bb2) != e) 171 break; 172 if (rtl_dump_file) 173 fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency); 174 bb = bb2; 175 } 176 if (rtl_dump_file) 177 fprintf (rtl_dump_file, " forward %i [%i]", bb->index, bb->frequency); 178 trace[i++] = bb; 179 180 /* Follow the trace in forward direction. */ 181 while ((e = find_best_successor (bb)) != NULL) 182 { 183 bb = e->dest; 184 if (seen (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) 185 || find_best_predecessor (bb) != e) 186 break; 187 if (rtl_dump_file) 188 fprintf (rtl_dump_file, ",%i [%i]", bb->index, bb->frequency); 189 trace[i++] = bb; 190 } 191 if (rtl_dump_file) 192 fprintf (rtl_dump_file, "\n"); 193 return i; 194} 195 196/* Look for basic blocks in frequency order, construct traces and tail duplicate 197 if profitable. */ 198 199static void 200tail_duplicate () 201{ 202 fibnode_t *blocks = xcalloc (last_basic_block, sizeof (fibnode_t)); 203 basic_block *trace = xmalloc (sizeof (basic_block) * n_basic_blocks); 204 int *counts = xmalloc (sizeof (int) * last_basic_block); 205 int ninsns = 0, nduplicated = 0; 206 gcov_type weighted_insns = 0, traced_insns = 0; 207 fibheap_t heap = fibheap_new (); 208 gcov_type cover_insns; 209 int max_dup_insns; 210 basic_block bb; 211 212 if (profile_info.count_profiles_merged && flag_branch_probabilities) 213 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK); 214 else 215 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY); 216 probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; 217 218 branch_ratio_cutoff = 219 (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO)); 220 221 FOR_EACH_BB (bb) 222 { 223 int n = count_insns (bb); 224 if (!ignore_bb_p (bb)) 225 blocks[bb->index] = fibheap_insert (heap, -bb->frequency, 226 bb); 227 228 counts [bb->index] = n; 229 ninsns += n; 230 weighted_insns += n * bb->frequency; 231 } 232 233 if (profile_info.count_profiles_merged && flag_branch_probabilities) 234 cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK); 235 else 236 cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE); 237 cover_insns = (weighted_insns * cover_insns + 50) / 100; 238 max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100; 239 240 while (traced_insns < cover_insns && nduplicated < max_dup_insns 241 && !fibheap_empty (heap)) 242 { 243 basic_block bb = fibheap_extract_min (heap); 244 int n, pos; 245 246 if (!bb) 247 break; 248 249 blocks[bb->index] = NULL; 250 251 if (ignore_bb_p (bb)) 252 continue; 253 if (seen (bb)) 254 abort (); 255 256 n = find_trace (bb, trace); 257 258 bb = trace[0]; 259 traced_insns += bb->frequency * counts [bb->index]; 260 if (blocks[bb->index]) 261 { 262 fibheap_delete_node (heap, blocks[bb->index]); 263 blocks[bb->index] = NULL; 264 } 265 266 for (pos = 1; pos < n; pos++) 267 { 268 basic_block bb2 = trace[pos]; 269 270 if (blocks[bb2->index]) 271 { 272 fibheap_delete_node (heap, blocks[bb2->index]); 273 blocks[bb2->index] = NULL; 274 } 275 traced_insns += bb2->frequency * counts [bb2->index]; 276 if (bb2->pred && bb2->pred->pred_next 277 && cfg_layout_can_duplicate_bb_p (bb2)) 278 { 279 edge e = bb2->pred; 280 basic_block old = bb2; 281 282 while (e->src != bb) 283 e = e->pred_next; 284 nduplicated += counts [bb2->index]; 285 bb2 = cfg_layout_duplicate_bb (bb2, e); 286 287 /* Reconsider the original copy of block we've duplicated. 288 Removing the most common predecesor may make it to be 289 head. */ 290 blocks[old->index] = 291 fibheap_insert (heap, -old->frequency, old); 292 293 if (rtl_dump_file) 294 fprintf (rtl_dump_file, "Duplicated %i as %i [%i]\n", 295 old->index, bb2->index, bb2->frequency); 296 } 297 RBI (bb)->next = bb2; 298 RBI (bb2)->visited = 1; 299 bb = bb2; 300 /* In case the trace became infrequent, stop duplicating. */ 301 if (ignore_bb_p (bb)) 302 break; 303 } 304 if (rtl_dump_file) 305 fprintf (rtl_dump_file, " covered now %.1f\n\n", 306 traced_insns * 100.0 / weighted_insns); 307 } 308 if (rtl_dump_file) 309 fprintf (rtl_dump_file, "Duplicated %i insns (%i%%)\n", nduplicated, 310 nduplicated * 100 / ninsns); 311 312 free (blocks); 313 free (trace); 314 free (counts); 315 fibheap_delete (heap); 316} 317 318/* Connect the superblocks into linear seuqence. At the moment we attempt to keep 319 the original order as much as possible, but the algorithm may be made smarter 320 later if needed. BB reordering pass should void most of the benefits of such 321 change though. */ 322 323static void 324layout_superblocks () 325{ 326 basic_block end = ENTRY_BLOCK_PTR->succ->dest; 327 basic_block bb = ENTRY_BLOCK_PTR->succ->dest->next_bb; 328 329 while (bb != EXIT_BLOCK_PTR) 330 { 331 edge e, best = NULL; 332 while (RBI (end)->next) 333 end = RBI (end)->next; 334 335 for (e = end->succ; e; e = e->succ_next) 336 if (e->dest != EXIT_BLOCK_PTR 337 && e->dest != ENTRY_BLOCK_PTR->succ->dest 338 && !RBI (e->dest)->visited 339 && (!best || EDGE_FREQUENCY (e) > EDGE_FREQUENCY (best))) 340 best = e; 341 342 if (best) 343 { 344 RBI (end)->next = best->dest; 345 RBI (best->dest)->visited = 1; 346 } 347 else 348 for (; bb != EXIT_BLOCK_PTR; bb=bb->next_bb) 349 { 350 if (!RBI (bb)->visited) 351 { 352 RBI (end)->next = bb; 353 RBI (bb)->visited = 1; 354 break; 355 } 356 } 357 } 358} 359 360/* Main entry point to this file. */ 361 362void 363tracer () 364{ 365 if (n_basic_blocks <= 1) 366 return; 367 cfg_layout_initialize (); 368 mark_dfs_back_edges (); 369 if (rtl_dump_file) 370 dump_flow_info (rtl_dump_file); 371 tail_duplicate (); 372 layout_superblocks (); 373 if (rtl_dump_file) 374 dump_flow_info (rtl_dump_file); 375 cfg_layout_finalize (); 376 /* Merge basic blocks in duplicated traces. */ 377 cleanup_cfg (CLEANUP_EXPENSIVE); 378} 379