tracer.c revision 267654
1114702Smurray/* The tracer pass for the GNU compiler. 2114702Smurray Contributed by Jan Hubicka, SuSE Labs. 3114702Smurray Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. 4114702Smurray 5114702Smurray This file is part of GCC. 6114702Smurray 7114702Smurray GCC is free software; you can redistribute it and/or modify it 8114702Smurray under the terms of the GNU General Public License as published by 9114702Smurray the Free Software Foundation; either version 2, or (at your option) 10114702Smurray any later version. 11114702Smurray 12114702Smurray GCC is distributed in the hope that it will be useful, but WITHOUT 13114702Smurray ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14115049Smurray or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15115049Smurray License for more details. 16115049Smurray 17115049Smurray You should have received a copy of the GNU General Public License 18114702Smurray along with GCC; see the file COPYING. If not, write to the Free 19114702Smurray Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 20114702Smurray 02110-1301, USA. */ 21114702Smurray 22114702Smurray/* This pass performs the tail duplication needed for superblock formation. 23114702Smurray For more information see: 24114702Smurray 25114702Smurray Design and Analysis of Profile-Based Optimization in Compaq's 26114702Smurray Compilation Tools for Alpha; Journal of Instruction-Level 27114702Smurray Parallelism 3 (2000) 1-25 28114702Smurray 29114702Smurray Unlike Compaq's implementation we don't do the loop peeling as most 30114702Smurray probably a better job can be done by a special pass and we don't 31114702Smurray need to worry too much about the code size implications as the tail 32114702Smurray duplicates are crossjumped again if optimizations are not 33114702Smurray performed. */ 34114702Smurray 35114702Smurray 36114702Smurray#include "config.h" 37114702Smurray#include "system.h" 38114702Smurray#include "coretypes.h" 39114702Smurray#include "tm.h" 40114702Smurray#include "tree.h" 41114702Smurray#include "rtl.h" 42114702Smurray#include "hard-reg-set.h" 43114702Smurray#include "basic-block.h" 44114702Smurray#include "output.h" 45114702Smurray#include "cfglayout.h" 46114702Smurray#include "fibheap.h" 47114702Smurray#include "flags.h" 48114702Smurray#include "timevar.h" 49115049Smurray#include "params.h" 50114702Smurray#include "coverage.h" 51114702Smurray#include "tree-pass.h" 52114702Smurray 53114702Smurraystatic int count_insns (basic_block); 54114702Smurraystatic bool ignore_bb_p (basic_block); 55114702Smurraystatic bool better_p (edge, edge); 56114702Smurraystatic edge find_best_successor (basic_block); 57114702Smurraystatic edge find_best_predecessor (basic_block); 58114702Smurraystatic int find_trace (basic_block, basic_block *); 59114702Smurraystatic void tail_duplicate (void); 60114702Smurraystatic void layout_superblocks (void); 61114702Smurray 62114702Smurray/* Minimal outgoing edge probability considered for superblock formation. */ 63114702Smurraystatic int probability_cutoff; 64114702Smurraystatic int branch_ratio_cutoff; 65114702Smurray 66114702Smurray/* Return true if BB has been seen - it is connected to some trace 67114702Smurray already. */ 68114702Smurray 69114702Smurray#define seen(bb) (bb->il.rtl->visited || bb->aux) 70114702Smurray 71114702Smurray/* Return true if we should ignore the basic block for purposes of tracing. */ 72114702Smurraystatic bool 73114702Smurrayignore_bb_p (basic_block bb) 74114702Smurray{ 75114702Smurray if (bb->index < NUM_FIXED_BLOCKS) 76114702Smurray return true; 77114702Smurray if (!maybe_hot_bb_p (bb)) 78114702Smurray return true; 79114702Smurray return false; 80114702Smurray} 81114702Smurray 82114702Smurray/* Return number of instructions in the block. */ 83114702Smurray 84114702Smurraystatic int 85114702Smurraycount_insns (basic_block bb) 86114702Smurray{ 87114702Smurray rtx insn; 88114702Smurray int n = 0; 89114702Smurray 90114702Smurray for (insn = BB_HEAD (bb); 91114702Smurray insn != NEXT_INSN (BB_END (bb)); 92114702Smurray insn = NEXT_INSN (insn)) 93114702Smurray if (active_insn_p (insn)) 94114702Smurray n++; 95114702Smurray return n; 96114702Smurray} 97114702Smurray 98114702Smurray/* Return true if E1 is more frequent than E2. */ 99114702Smurraystatic bool 100114702Smurraybetter_p (edge e1, edge e2) 101114702Smurray{ 102114702Smurray if (e1->count != e2->count) 103114702Smurray return e1->count > e2->count; 104114702Smurray if (e1->src->frequency * e1->probability != 105114702Smurray e2->src->frequency * e2->probability) 106114702Smurray return (e1->src->frequency * e1->probability 107114702Smurray > e2->src->frequency * e2->probability); 108114702Smurray /* This is needed to avoid changes in the decision after 109114702Smurray CFG is modified. */ 110114702Smurray if (e1->src != e2->src) 111114702Smurray return e1->src->index > e2->src->index; 112114702Smurray return e1->dest->index > e2->dest->index; 113114702Smurray} 114114702Smurray 115/* Return most frequent successor of basic block BB. */ 116 117static edge 118find_best_successor (basic_block bb) 119{ 120 edge e; 121 edge best = NULL; 122 edge_iterator ei; 123 124 FOR_EACH_EDGE (e, ei, bb->succs) 125 if (!best || better_p (e, best)) 126 best = e; 127 if (!best || ignore_bb_p (best->dest)) 128 return NULL; 129 if (best->probability <= probability_cutoff) 130 return NULL; 131 return best; 132} 133 134/* Return most frequent predecessor of basic block BB. */ 135 136static edge 137find_best_predecessor (basic_block bb) 138{ 139 edge e; 140 edge best = NULL; 141 edge_iterator ei; 142 143 FOR_EACH_EDGE (e, ei, bb->preds) 144 if (!best || better_p (e, best)) 145 best = e; 146 if (!best || ignore_bb_p (best->src)) 147 return NULL; 148 if (EDGE_FREQUENCY (best) * REG_BR_PROB_BASE 149 < bb->frequency * branch_ratio_cutoff) 150 return NULL; 151 return best; 152} 153 154/* Find the trace using bb and record it in the TRACE array. 155 Return number of basic blocks recorded. */ 156 157static int 158find_trace (basic_block bb, basic_block *trace) 159{ 160 int i = 0; 161 edge e; 162 163 if (dump_file) 164 fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->frequency); 165 166 while ((e = find_best_predecessor (bb)) != NULL) 167 { 168 basic_block bb2 = e->src; 169 if (seen (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) 170 || find_best_successor (bb2) != e) 171 break; 172 if (dump_file) 173 fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); 174 bb = bb2; 175 } 176 if (dump_file) 177 fprintf (dump_file, " forward %i [%i]", bb->index, bb->frequency); 178 trace[i++] = bb; 179 180 /* Follow the trace in forward direction. */ 181 while ((e = find_best_successor (bb)) != NULL) 182 { 183 bb = e->dest; 184 if (seen (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) 185 || find_best_predecessor (bb) != e) 186 break; 187 if (dump_file) 188 fprintf (dump_file, ",%i [%i]", bb->index, bb->frequency); 189 trace[i++] = bb; 190 } 191 if (dump_file) 192 fprintf (dump_file, "\n"); 193 return i; 194} 195 196/* Look for basic blocks in frequency order, construct traces and tail duplicate 197 if profitable. */ 198 199static void 200tail_duplicate (void) 201{ 202 fibnode_t *blocks = XCNEWVEC (fibnode_t, last_basic_block); 203 basic_block *trace = XNEWVEC (basic_block, n_basic_blocks); 204 int *counts = XNEWVEC (int, last_basic_block); 205 int ninsns = 0, nduplicated = 0; 206 gcov_type weighted_insns = 0, traced_insns = 0; 207 fibheap_t heap = fibheap_new (); 208 gcov_type cover_insns; 209 int max_dup_insns; 210 basic_block bb; 211 212 if (profile_info && flag_branch_probabilities) 213 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK); 214 else 215 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY); 216 probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; 217 218 branch_ratio_cutoff = 219 (REG_BR_PROB_BASE / 100 * PARAM_VALUE (TRACER_MIN_BRANCH_RATIO)); 220 221 FOR_EACH_BB (bb) 222 { 223 int n = count_insns (bb); 224 if (!ignore_bb_p (bb)) 225 blocks[bb->index] = fibheap_insert (heap, -bb->frequency, 226 bb); 227 228 counts [bb->index] = n; 229 ninsns += n; 230 weighted_insns += n * bb->frequency; 231 } 232 233 if (profile_info && flag_branch_probabilities) 234 cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE_FEEDBACK); 235 else 236 cover_insns = PARAM_VALUE (TRACER_DYNAMIC_COVERAGE); 237 cover_insns = (weighted_insns * cover_insns + 50) / 100; 238 max_dup_insns = (ninsns * PARAM_VALUE (TRACER_MAX_CODE_GROWTH) + 50) / 100; 239 240 while (traced_insns < cover_insns && nduplicated < max_dup_insns 241 && !fibheap_empty (heap)) 242 { 243 basic_block bb = fibheap_extract_min (heap); 244 int n, pos; 245 246 if (!bb) 247 break; 248 249 blocks[bb->index] = NULL; 250 251 if (ignore_bb_p (bb)) 252 continue; 253 gcc_assert (!seen (bb)); 254 255 n = find_trace (bb, trace); 256 257 bb = trace[0]; 258 traced_insns += bb->frequency * counts [bb->index]; 259 if (blocks[bb->index]) 260 { 261 fibheap_delete_node (heap, blocks[bb->index]); 262 blocks[bb->index] = NULL; 263 } 264 265 for (pos = 1; pos < n; pos++) 266 { 267 basic_block bb2 = trace[pos]; 268 269 if (blocks[bb2->index]) 270 { 271 fibheap_delete_node (heap, blocks[bb2->index]); 272 blocks[bb2->index] = NULL; 273 } 274 traced_insns += bb2->frequency * counts [bb2->index]; 275 if (EDGE_COUNT (bb2->preds) > 1 276 && can_duplicate_block_p (bb2)) 277 { 278 edge e; 279 basic_block old = bb2; 280 281 e = find_edge (bb, bb2); 282 283 nduplicated += counts [bb2->index]; 284 bb2 = duplicate_block (bb2, e, bb); 285 286 /* Reconsider the original copy of block we've duplicated. 287 Removing the most common predecessor may make it to be 288 head. */ 289 blocks[old->index] = 290 fibheap_insert (heap, -old->frequency, old); 291 292 if (dump_file) 293 fprintf (dump_file, "Duplicated %i as %i [%i]\n", 294 old->index, bb2->index, bb2->frequency); 295 } 296 bb->aux = bb2; 297 bb2->il.rtl->visited = 1; 298 bb = bb2; 299 /* In case the trace became infrequent, stop duplicating. */ 300 if (ignore_bb_p (bb)) 301 break; 302 } 303 if (dump_file) 304 fprintf (dump_file, " covered now %.1f\n\n", 305 traced_insns * 100.0 / weighted_insns); 306 } 307 if (dump_file) 308 fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated, 309 nduplicated * 100 / ninsns); 310 311 free (blocks); 312 free (trace); 313 free (counts); 314 fibheap_delete (heap); 315} 316 317/* Connect the superblocks into linear sequence. At the moment we attempt to keep 318 the original order as much as possible, but the algorithm may be made smarter 319 later if needed. BB reordering pass should void most of the benefits of such 320 change though. */ 321 322static void 323layout_superblocks (void) 324{ 325 basic_block end = single_succ (ENTRY_BLOCK_PTR); 326 basic_block bb = end->next_bb; 327 328 while (bb != EXIT_BLOCK_PTR) 329 { 330 edge_iterator ei; 331 edge e, best = NULL; 332 while (end->aux) 333 end = end->aux; 334 335 FOR_EACH_EDGE (e, ei, end->succs) 336 if (e->dest != EXIT_BLOCK_PTR 337 && e->dest != single_succ (ENTRY_BLOCK_PTR) 338 && !e->dest->il.rtl->visited 339 && (!best || EDGE_FREQUENCY (e) > EDGE_FREQUENCY (best))) 340 best = e; 341 342 if (best) 343 { 344 end->aux = best->dest; 345 best->dest->il.rtl->visited = 1; 346 } 347 else 348 for (; bb != EXIT_BLOCK_PTR; bb = bb->next_bb) 349 { 350 if (!bb->il.rtl->visited) 351 { 352 end->aux = bb; 353 bb->il.rtl->visited = 1; 354 break; 355 } 356 } 357 } 358} 359 360/* Main entry point to this file. FLAGS is the set of flags to pass 361 to cfg_layout_initialize(). */ 362 363void 364tracer (unsigned int flags) 365{ 366 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1) 367 return; 368 369 cfg_layout_initialize (flags); 370 mark_dfs_back_edges (); 371 if (dump_file) 372 dump_flow_info (dump_file, dump_flags); 373 tail_duplicate (); 374 layout_superblocks (); 375 if (dump_file) 376 dump_flow_info (dump_file, dump_flags); 377 cfg_layout_finalize (); 378 379 /* Merge basic blocks in duplicated traces. */ 380 cleanup_cfg (CLEANUP_EXPENSIVE); 381} 382 383static bool 384gate_handle_tracer (void) 385{ 386 return (optimize > 0 && flag_tracer); 387} 388 389/* Run tracer. */ 390static unsigned int 391rest_of_handle_tracer (void) 392{ 393 if (dump_file) 394 dump_flow_info (dump_file, dump_flags); 395 tracer (0); 396 cleanup_cfg (CLEANUP_EXPENSIVE); 397 reg_scan (get_insns (), max_reg_num ()); 398 return 0; 399} 400 401struct tree_opt_pass pass_tracer = 402{ 403 "tracer", /* name */ 404 gate_handle_tracer, /* gate */ 405 rest_of_handle_tracer, /* execute */ 406 NULL, /* sub */ 407 NULL, /* next */ 408 0, /* static_pass_number */ 409 TV_TRACER, /* tv_id */ 410 0, /* properties_required */ 411 0, /* properties_provided */ 412 0, /* properties_destroyed */ 413 0, /* todo_flags_start */ 414 TODO_dump_func, /* todo_flags_finish */ 415 'T' /* letter */ 416}; 417 418