bb-reorder.c revision 132718
1161709Sdanger/* Basic block reordering routines for the GNU compiler. 2161709Sdanger Copyright (C) 2000, 2002, 2003 Free Software Foundation, Inc. 3161709Sdanger 4161709Sdanger This file is part of GCC. 5161709Sdanger 6161709Sdanger GCC is free software; you can redistribute it and/or modify it 7161709Sdanger under the terms of the GNU General Public License as published by 8161709Sdanger the Free Software Foundation; either version 2, or (at your option) 9161709Sdanger any later version. 10161709Sdanger 11161709Sdanger GCC is distributed in the hope that it will be useful, but WITHOUT 12161709Sdanger ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13161709Sdanger or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 14161709Sdanger License for more details. 15161709Sdanger 16161709Sdanger You should have received a copy of the GNU General Public License 17161709Sdanger along with GCC; see the file COPYING. If not, write to the Free 18161709Sdanger Software Foundation, 59 Temple Place - Suite 330, Boston, MA 19161709Sdanger 02111-1307, USA. */ 20161709Sdanger 21161709Sdanger/* This (greedy) algorithm constructs traces in several rounds. 22161709Sdanger The construction starts from "seeds". The seed for the first round 23161709Sdanger is the entry point of function. When there are more than one seed 24161709Sdanger that one is selected first that has the lowest key in the heap 25161709Sdanger (see function bb_to_key). Then the algorithm repeatedly adds the most 26161709Sdanger probable successor to the end of a trace. Finally it connects the traces. 27300199Smaxim 28161709Sdanger There are two parameters: Branch Threshold and Exec Threshold. 29161709Sdanger If the edge to a successor of the actual basic block is lower than 30161709Sdanger Branch Threshold or the frequency of the successor is lower than 31161709Sdanger Exec Threshold the successor will be the seed in one of the next rounds. 32161709Sdanger Each round has these parameters lower than the previous one. 33161709Sdanger The last round has to have these parameters set to zero 34161709Sdanger so that the remaining blocks are picked up. 35161709Sdanger 36300199Smaxim The algorithm selects the most probable successor from all unvisited 37161709Sdanger successors and successors that have been added to this trace. 38161709Sdanger The other successors (that has not been "sent" to the next round) will be 39161709Sdanger other seeds for this round and the secondary traces will start in them. 40161709Sdanger If the successor has not been visited in this trace it is added to the trace 41161709Sdanger (however, there is some heuristic for simple branches). 42161709Sdanger If the successor has been visited in this trace the loop has been found. 43161709Sdanger If the loop has many iterations the loop is rotated so that the 44161709Sdanger source block of the most probable edge going out from the loop 45161709Sdanger is the last block of the trace. 46161709Sdanger If the loop has few iterations and there is no edge from the last block of 47161709Sdanger the loop going out from loop the loop header is duplicated. 48161709Sdanger Finally, the construction of the trace is terminated. 49161709Sdanger 50161709Sdanger When connecting traces it first checks whether there is an edge from the 51161709Sdanger last block of one trace to the first block of another trace. 52161709Sdanger When there are still some unconnected traces it checks whether there exists 53161709Sdanger a basic block BB such that BB is a successor of the last bb of one trace 54161709Sdanger and BB is a predecessor of the first block of another trace. In this case, 55161709Sdanger BB is duplicated and the traces are connected through this duplicate. 56300199Smaxim The rest of traces are simply connected so there will be a jump to the 57300199Smaxim beginning of the rest of trace. 58161709Sdanger 59161709Sdanger 60300199Smaxim References: 61300199Smaxim 62300199Smaxim "Software Trace Cache" 63300199Smaxim A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999 64300199Smaxim http://citeseer.nj.nec.com/15361.html 65300199Smaxim 66161709Sdanger*/ 67161709Sdanger 68161709Sdanger#include "config.h" 69161709Sdanger#include "system.h" 70189880Ssam#include "coretypes.h" 71189880Ssam#include "tm.h" 72189880Ssam#include "rtl.h" 73189880Ssam#include "basic-block.h" 74189880Ssam#include "flags.h" 75189880Ssam#include "timevar.h" 76300199Smaxim#include "output.h" 77300199Smaxim#include "cfglayout.h" 78300199Smaxim#include "fibheap.h" 79300199Smaxim#include "target.h" 80161709Sdanger 81161709Sdanger/* The number of rounds. */ 82161709Sdanger#define N_ROUNDS 4 83161709Sdanger 84300199Smaxim/* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */ 85300199Smaximstatic int branch_threshold[N_ROUNDS] = {400, 200, 100, 0}; 86300199Smaxim 87161709Sdanger/* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */ 88161709Sdangerstatic int exec_threshold[N_ROUNDS] = {500, 200, 50, 0}; 89161709Sdanger 90161709Sdanger/* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry 91161709Sdanger block the edge destination is not duplicated while connecting traces. */ 92161709Sdanger#define DUPLICATION_THRESHOLD 100 93161709Sdanger 94161709Sdanger/* Length of unconditional jump instruction. */ 95161709Sdangerstatic int uncond_jump_length; 96161709Sdanger 97161709Sdanger/* Structure to hold needed information for each basic block. */ 98161709Sdangertypedef struct bbro_basic_block_data_def 99161709Sdanger{ 100161709Sdanger /* Which trace is the bb start of (-1 means it is not a start of a trace). */ 101161709Sdanger int start_of_trace; 102161709Sdanger 103161709Sdanger /* Which trace is the bb end of (-1 means it is not an end of a trace). */ 104161709Sdanger int end_of_trace; 105161709Sdanger 106161709Sdanger /* Which heap is BB in (if any)? */ 107161709Sdanger fibheap_t heap; 108161709Sdanger 109161709Sdanger /* Which heap node is BB in (if any)? */ 110161709Sdanger fibnode_t node; 111161709Sdanger} bbro_basic_block_data; 112161709Sdanger 113161709Sdanger/* The current size of the following dynamic array. */ 114161709Sdangerstatic int array_size; 115161709Sdanger 116161709Sdanger/* The array which holds needed information for basic blocks. */ 117161709Sdangerstatic bbro_basic_block_data *bbd; 118161709Sdanger 119161709Sdanger/* To avoid frequent reallocation the size of arrays is greater than needed, 120161709Sdanger the number of elements is (not less than) 1.25 * size_wanted. */ 121161709Sdanger#define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5) 122286663Sbrueffer 123161709Sdanger/* Free the memory and set the pointer to NULL. */ 124161709Sdanger#define FREE(P) \ 125161709Sdanger do { if (P) { free (P); P = 0; } else { abort (); } } while (0) 126161709Sdanger 127161709Sdanger/* Structure for holding information about a trace. */ 128161709Sdangerstruct trace 129161709Sdanger{ 130161709Sdanger /* First and last basic block of the trace. */ 131161709Sdanger basic_block first, last; 132161709Sdanger 133161709Sdanger /* The round of the STC creation which this trace was found in. */ 134161709Sdanger int round; 135161709Sdanger 136161709Sdanger /* The length (i.e. the number of basic blocks) of the trace. */ 137161709Sdanger int length; 138161709Sdanger}; 139161709Sdanger 140161709Sdanger/* Maximum frequency and count of one of the entry blocks. */ 141161709Sdangerint max_entry_frequency; 142161709Sdangergcov_type max_entry_count; 143161709Sdanger 144161709Sdanger/* Local function prototypes. */ 145161709Sdangerstatic void find_traces (int *, struct trace *); 146161709Sdangerstatic basic_block rotate_loop (edge, struct trace *, int); 147161709Sdangerstatic void mark_bb_visited (basic_block, int); 148161709Sdangerstatic void find_traces_1_round (int, int, gcov_type, struct trace *, int *, 149161709Sdanger int, fibheap_t *); 150161709Sdangerstatic basic_block copy_bb (basic_block, edge, basic_block, int); 151161709Sdangerstatic fibheapkey_t bb_to_key (basic_block); 152161709Sdangerstatic bool better_edge_p (basic_block, edge, int, int, int, int); 153161709Sdangerstatic void connect_traces (int, struct trace *); 154161709Sdangerstatic bool copy_bb_p (basic_block, int); 155161709Sdangerstatic int get_uncond_jump_length (void); 156161709Sdanger 157161709Sdanger/* Find the traces for Software Trace Cache. Chain each trace through 158161709Sdanger RBI()->next. Store the number of traces to N_TRACES and description of 159161709Sdanger traces to TRACES. */ 160161709Sdanger 161161709Sdangerstatic void 162161709Sdangerfind_traces (int *n_traces, struct trace *traces) 163161709Sdanger{ 164161709Sdanger int i; 165161709Sdanger edge e; 166161709Sdanger fibheap_t heap; 167161709Sdanger 168161709Sdanger /* Insert entry points of function into heap. */ 169161709Sdanger heap = fibheap_new (); 170161709Sdanger max_entry_frequency = 0; 171161709Sdanger max_entry_count = 0; 172161709Sdanger for (e = ENTRY_BLOCK_PTR->succ; e; e = e->succ_next) 173161709Sdanger { 174161709Sdanger bbd[e->dest->index].heap = heap; 175161709Sdanger bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest), 176161709Sdanger e->dest); 177161709Sdanger if (e->dest->frequency > max_entry_frequency) 178161709Sdanger max_entry_frequency = e->dest->frequency; 179161709Sdanger if (e->dest->count > max_entry_count) 180161709Sdanger max_entry_count = e->dest->count; 181161709Sdanger } 182161709Sdanger 183161709Sdanger /* Find the traces. */ 184161709Sdanger for (i = 0; i < N_ROUNDS; i++) 185161709Sdanger { 186161709Sdanger gcov_type count_threshold; 187161709Sdanger 188161709Sdanger if (rtl_dump_file) 189161709Sdanger fprintf (rtl_dump_file, "STC - round %d\n", i + 1); 190161709Sdanger 191161709Sdanger if (max_entry_count < INT_MAX / 1000) 192161709Sdanger count_threshold = max_entry_count * exec_threshold[i] / 1000; 193161709Sdanger else 194161709Sdanger count_threshold = max_entry_count / 1000 * exec_threshold[i]; 195161709Sdanger 196161709Sdanger find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000, 197161709Sdanger max_entry_frequency * exec_threshold[i] / 1000, 198161709Sdanger count_threshold, traces, n_traces, i, &heap); 199161709Sdanger } 200161709Sdanger fibheap_delete (heap); 201161709Sdanger 202161709Sdanger if (rtl_dump_file) 203161709Sdanger { 204161709Sdanger for (i = 0; i < *n_traces; i++) 205161709Sdanger { 206161709Sdanger basic_block bb; 207161709Sdanger fprintf (rtl_dump_file, "Trace %d (round %d): ", i + 1, 208161709Sdanger traces[i].round + 1); 209161709Sdanger for (bb = traces[i].first; bb != traces[i].last; bb = bb->rbi->next) 210161709Sdanger fprintf (rtl_dump_file, "%d [%d] ", bb->index, bb->frequency); 211161709Sdanger fprintf (rtl_dump_file, "%d [%d]\n", bb->index, bb->frequency); 212161709Sdanger } 213161709Sdanger fflush (rtl_dump_file); 214161709Sdanger } 215161709Sdanger} 216161709Sdanger 217161709Sdanger/* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE 218161709Sdanger (with sequential number TRACE_N). */ 219161709Sdanger 220161709Sdangerstatic basic_block 221208357Swxsrotate_loop (edge back_edge, struct trace *trace, int trace_n) 222301591Strasz{ 223301591Strasz basic_block bb; 224208357Swxs 225168894Sadrian /* Information about the best end (end after rotation) of the loop. */ 226168894Sadrian basic_block best_bb = NULL; 227169834Sru edge best_edge = NULL; 228169834Sru int best_freq = -1; 229169834Sru gcov_type best_count = -1; 230169834Sru /* The best edge is preferred when its destination is not visited yet 231168894Sadrian or is a start block of some trace. */ 232169834Sru bool is_preferred = false; 233169834Sru 234169834Sru /* Find the most frequent edge that goes out from current trace. */ 235169834Sru bb = back_edge->dest; 236169834Sru do 237168894Sadrian { 238168894Sadrian edge e; 239168894Sadrian for (e = bb->succ; e; e = e->succ_next) 240168894Sadrian if (e->dest != EXIT_BLOCK_PTR 241168894Sadrian && e->dest->rbi->visited != trace_n 242168894Sadrian && (e->flags & EDGE_CAN_FALLTHRU) 243161709Sdanger && !(e->flags & EDGE_COMPLEX)) 244161709Sdanger { 245161709Sdanger if (is_preferred) 246161709Sdanger { 247161709Sdanger /* The best edge is preferred. */ 248161709Sdanger if (!e->dest->rbi->visited 249161709Sdanger || bbd[e->dest->index].start_of_trace >= 0) 250161709Sdanger { 251161709Sdanger /* The current edge E is also preferred. */ 252161709Sdanger int freq = EDGE_FREQUENCY (e); 253161709Sdanger if (freq > best_freq || e->count > best_count) 254161709Sdanger { 255161709Sdanger best_freq = freq; 256161709Sdanger best_count = e->count; 257161709Sdanger best_edge = e; 258161709Sdanger best_bb = bb; 259161709Sdanger } 260161709Sdanger } 261161709Sdanger } 262161709Sdanger else 263161709Sdanger { 264161709Sdanger if (!e->dest->rbi->visited 265161709Sdanger || bbd[e->dest->index].start_of_trace >= 0) 266161709Sdanger { 267161709Sdanger /* The current edge E is preferred. */ 268161709Sdanger is_preferred = true; 269161709Sdanger best_freq = EDGE_FREQUENCY (e); 270161709Sdanger best_count = e->count; 271161709Sdanger best_edge = e; 272161709Sdanger best_bb = bb; 273161709Sdanger } 274161709Sdanger else 275161709Sdanger { 276161709Sdanger int freq = EDGE_FREQUENCY (e); 277161709Sdanger if (!best_edge || freq > best_freq || e->count > best_count) 278161709Sdanger { 279161709Sdanger best_freq = freq; 280161709Sdanger best_count = e->count; 281161709Sdanger best_edge = e; 282161709Sdanger best_bb = bb; 283161709Sdanger } 284161709Sdanger } 285161709Sdanger } 286161709Sdanger } 287161709Sdanger bb = bb->rbi->next; 288161709Sdanger } 289161709Sdanger while (bb != back_edge->dest); 290161709Sdanger 291161709Sdanger if (best_bb) 292161709Sdanger { 293161709Sdanger /* Rotate the loop so that the BEST_EDGE goes out from the last block of 294161709Sdanger the trace. */ 295161709Sdanger if (back_edge->dest == trace->first) 296270647Sse { 297270647Sse trace->first = best_bb->rbi->next; 298161709Sdanger } 299161709Sdanger else 300161709Sdanger { 301161709Sdanger basic_block prev_bb; 302161709Sdanger 303161709Sdanger for (prev_bb = trace->first; 304161709Sdanger prev_bb->rbi->next != back_edge->dest; 305161709Sdanger prev_bb = prev_bb->rbi->next) 306161709Sdanger ; 307161709Sdanger prev_bb->rbi->next = best_bb->rbi->next; 308161709Sdanger 309161709Sdanger /* Try to get rid of uncond jump to cond jump. */ 310161709Sdanger if (prev_bb->succ && !prev_bb->succ->succ_next) 311161709Sdanger { 312161709Sdanger basic_block header = prev_bb->succ->dest; 313164007Sdanger 314161709Sdanger /* Duplicate HEADER if it is a small block containing cond jump 315161709Sdanger in the end. */ 316161709Sdanger if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)) 317161709Sdanger { 318161709Sdanger copy_bb (header, prev_bb->succ, prev_bb, trace_n); 319161709Sdanger } 320161709Sdanger } 321161709Sdanger } 322161709Sdanger } 323161709Sdanger else 324161709Sdanger { 325161709Sdanger /* We have not found suitable loop tail so do no rotation. */ 326161709Sdanger best_bb = back_edge->src; 327161709Sdanger } 328161709Sdanger best_bb->rbi->next = NULL; 329161709Sdanger return best_bb; 330161709Sdanger} 331161709Sdanger 332161709Sdanger/* This function marks BB that it was visited in trace number TRACE. */ 333161709Sdanger 334161709Sdangerstatic void 335161709Sdangermark_bb_visited (basic_block bb, int trace) 336161709Sdanger{ 337161709Sdanger bb->rbi->visited = trace; 338161709Sdanger if (bbd[bb->index].heap) 339161709Sdanger { 340161709Sdanger fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node); 341168894Sadrian bbd[bb->index].heap = NULL; 342168894Sadrian bbd[bb->index].node = NULL; 343161709Sdanger } 344161709Sdanger} 345161709Sdanger 346161709Sdanger/* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do 347161709Sdanger not include basic blocks their probability is lower than BRANCH_TH or their 348161709Sdanger frequency is lower than EXEC_TH into traces (or count is lower than 349161709Sdanger COUNT_TH). It stores the new traces into TRACES and modifies the number of 350161709Sdanger traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It 351161709Sdanger expects that starting basic blocks are in *HEAP and at the end it deletes 352161709Sdanger *HEAP and stores starting points for the next round into new *HEAP. */ 353267776Sbapt 354161709Sdangerstatic void 355267776Sbaptfind_traces_1_round (int branch_th, int exec_th, gcov_type count_th, 356 struct trace *traces, int *n_traces, int round, 357 fibheap_t *heap) 358{ 359 /* Heap for discarded basic blocks which are possible starting points for 360 the next round. */ 361 fibheap_t new_heap = fibheap_new (); 362 363 while (!fibheap_empty (*heap)) 364 { 365 basic_block bb; 366 struct trace *trace; 367 edge best_edge, e; 368 fibheapkey_t key; 369 370 bb = fibheap_extract_min (*heap); 371 bbd[bb->index].heap = NULL; 372 bbd[bb->index].node = NULL; 373 374 if (rtl_dump_file) 375 fprintf (rtl_dump_file, "Getting bb %d\n", bb->index); 376 377 /* If the BB's frequency is too low send BB to the next round. */ 378 if (round < N_ROUNDS - 1 379 && (bb->frequency < exec_th || bb->count < count_th 380 || probably_never_executed_bb_p (bb))) 381 { 382 int key = bb_to_key (bb); 383 bbd[bb->index].heap = new_heap; 384 bbd[bb->index].node = fibheap_insert (new_heap, key, bb); 385 386 if (rtl_dump_file) 387 fprintf (rtl_dump_file, 388 " Possible start point of next round: %d (key: %d)\n", 389 bb->index, key); 390 continue; 391 } 392 393 trace = traces + *n_traces; 394 trace->first = bb; 395 trace->round = round; 396 trace->length = 0; 397 (*n_traces)++; 398 399 do 400 { 401 int prob, freq; 402 403 /* The probability and frequency of the best edge. */ 404 int best_prob = INT_MIN / 2; 405 int best_freq = INT_MIN / 2; 406 407 best_edge = NULL; 408 mark_bb_visited (bb, *n_traces); 409 trace->length++; 410 411 if (rtl_dump_file) 412 fprintf (rtl_dump_file, "Basic block %d was visited in trace %d\n", 413 bb->index, *n_traces - 1); 414 415 /* Select the successor that will be placed after BB. */ 416 for (e = bb->succ; e; e = e->succ_next) 417 { 418#ifdef ENABLE_CHECKING 419 if (e->flags & EDGE_FAKE) 420 abort (); 421#endif 422 423 if (e->dest == EXIT_BLOCK_PTR) 424 continue; 425 426 if (e->dest->rbi->visited 427 && e->dest->rbi->visited != *n_traces) 428 continue; 429 430 prob = e->probability; 431 freq = EDGE_FREQUENCY (e); 432 433 /* Edge that cannot be fallthru or improbable or infrequent 434 successor (ie. it is unsuitable successor). */ 435 if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX) 436 || prob < branch_th || freq < exec_th || e->count < count_th) 437 continue; 438 439 if (better_edge_p (bb, e, prob, freq, best_prob, best_freq)) 440 { 441 best_edge = e; 442 best_prob = prob; 443 best_freq = freq; 444 } 445 } 446 447 /* If the best destination has multiple predecessors, and can be 448 duplicated cheaper than a jump, don't allow it to be added 449 to a trace. We'll duplicate it when connecting traces. */ 450 if (best_edge && best_edge->dest->pred->pred_next 451 && copy_bb_p (best_edge->dest, 0)) 452 best_edge = NULL; 453 454 /* Add all non-selected successors to the heaps. */ 455 for (e = bb->succ; e; e = e->succ_next) 456 { 457 if (e == best_edge 458 || e->dest == EXIT_BLOCK_PTR 459 || e->dest->rbi->visited) 460 continue; 461 462 key = bb_to_key (e->dest); 463 464 if (bbd[e->dest->index].heap) 465 { 466 /* E->DEST is already in some heap. */ 467 if (key != bbd[e->dest->index].node->key) 468 { 469 if (rtl_dump_file) 470 { 471 fprintf (rtl_dump_file, 472 "Changing key for bb %d from %ld to %ld.\n", 473 e->dest->index, 474 (long) bbd[e->dest->index].node->key, 475 key); 476 } 477 fibheap_replace_key (bbd[e->dest->index].heap, 478 bbd[e->dest->index].node, key); 479 } 480 } 481 else 482 { 483 fibheap_t which_heap = *heap; 484 485 prob = e->probability; 486 freq = EDGE_FREQUENCY (e); 487 488 if (!(e->flags & EDGE_CAN_FALLTHRU) 489 || (e->flags & EDGE_COMPLEX) 490 || prob < branch_th || freq < exec_th 491 || e->count < count_th) 492 { 493 if (round < N_ROUNDS - 1) 494 which_heap = new_heap; 495 } 496 497 bbd[e->dest->index].heap = which_heap; 498 bbd[e->dest->index].node = fibheap_insert (which_heap, 499 key, e->dest); 500 501 if (rtl_dump_file) 502 { 503 fprintf (rtl_dump_file, 504 " Possible start of %s round: %d (key: %ld)\n", 505 (which_heap == new_heap) ? "next" : "this", 506 e->dest->index, (long) key); 507 } 508 509 } 510 } 511 512 if (best_edge) /* Suitable successor was found. */ 513 { 514 if (best_edge->dest->rbi->visited == *n_traces) 515 { 516 /* We do nothing with one basic block loops. */ 517 if (best_edge->dest != bb) 518 { 519 if (EDGE_FREQUENCY (best_edge) 520 > 4 * best_edge->dest->frequency / 5) 521 { 522 /* The loop has at least 4 iterations. If the loop 523 header is not the first block of the function 524 we can rotate the loop. */ 525 526 if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb) 527 { 528 if (rtl_dump_file) 529 { 530 fprintf (rtl_dump_file, 531 "Rotating loop %d - %d\n", 532 best_edge->dest->index, bb->index); 533 } 534 bb->rbi->next = best_edge->dest; 535 bb = rotate_loop (best_edge, trace, *n_traces); 536 } 537 } 538 else 539 { 540 /* The loop has less than 4 iterations. */ 541 542 /* Check whether there is another edge from BB. */ 543 edge another_edge; 544 for (another_edge = bb->succ; 545 another_edge; 546 another_edge = another_edge->succ_next) 547 if (another_edge != best_edge) 548 break; 549 550 if (!another_edge && copy_bb_p (best_edge->dest, 551 !optimize_size)) 552 { 553 bb = copy_bb (best_edge->dest, best_edge, bb, 554 *n_traces); 555 } 556 } 557 } 558 559 /* Terminate the trace. */ 560 break; 561 } 562 else 563 { 564 /* Check for a situation 565 566 A 567 /| 568 B | 569 \| 570 C 571 572 where 573 EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC) 574 >= EDGE_FREQUENCY (AC). 575 (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) ) 576 Best ordering is then A B C. 577 578 This situation is created for example by: 579 580 if (A) B; 581 C; 582 583 */ 584 585 for (e = bb->succ; e; e = e->succ_next) 586 if (e != best_edge 587 && (e->flags & EDGE_CAN_FALLTHRU) 588 && !(e->flags & EDGE_COMPLEX) 589 && !e->dest->rbi->visited 590 && !e->dest->pred->pred_next 591 && e->dest->succ 592 && (e->dest->succ->flags & EDGE_CAN_FALLTHRU) 593 && !(e->dest->succ->flags & EDGE_COMPLEX) 594 && !e->dest->succ->succ_next 595 && e->dest->succ->dest == best_edge->dest 596 && 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge)) 597 { 598 best_edge = e; 599 if (rtl_dump_file) 600 fprintf (rtl_dump_file, "Selecting BB %d\n", 601 best_edge->dest->index); 602 break; 603 } 604 605 bb->rbi->next = best_edge->dest; 606 bb = best_edge->dest; 607 } 608 } 609 } 610 while (best_edge); 611 trace->last = bb; 612 bbd[trace->first->index].start_of_trace = *n_traces - 1; 613 bbd[trace->last->index].end_of_trace = *n_traces - 1; 614 615 /* The trace is terminated so we have to recount the keys in heap 616 (some block can have a lower key because now one of its predecessors 617 is an end of the trace). */ 618 for (e = bb->succ; e; e = e->succ_next) 619 { 620 if (e->dest == EXIT_BLOCK_PTR 621 || e->dest->rbi->visited) 622 continue; 623 624 if (bbd[e->dest->index].heap) 625 { 626 key = bb_to_key (e->dest); 627 if (key != bbd[e->dest->index].node->key) 628 { 629 if (rtl_dump_file) 630 { 631 fprintf (rtl_dump_file, 632 "Changing key for bb %d from %ld to %ld.\n", 633 e->dest->index, 634 (long) bbd[e->dest->index].node->key, key); 635 } 636 fibheap_replace_key (bbd[e->dest->index].heap, 637 bbd[e->dest->index].node, 638 key); 639 } 640 } 641 } 642 } 643 644 fibheap_delete (*heap); 645 646 /* "Return" the new heap. */ 647 *heap = new_heap; 648} 649 650/* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add 651 it to trace after BB, mark OLD_BB visited and update pass' data structures 652 (TRACE is a number of trace which OLD_BB is duplicated to). */ 653 654static basic_block 655copy_bb (basic_block old_bb, edge e, basic_block bb, int trace) 656{ 657 basic_block new_bb; 658 659 new_bb = cfg_layout_duplicate_bb (old_bb, e); 660 if (e->dest != new_bb) 661 abort (); 662 if (e->dest->rbi->visited) 663 abort (); 664 if (rtl_dump_file) 665 fprintf (rtl_dump_file, 666 "Duplicated bb %d (created bb %d)\n", 667 old_bb->index, new_bb->index); 668 new_bb->rbi->visited = trace; 669 new_bb->rbi->next = bb->rbi->next; 670 bb->rbi->next = new_bb; 671 672 if (new_bb->index >= array_size || last_basic_block > array_size) 673 { 674 int i; 675 int new_size; 676 677 new_size = MAX (last_basic_block, new_bb->index + 1); 678 new_size = GET_ARRAY_SIZE (new_size); 679 bbd = xrealloc (bbd, new_size * sizeof (bbro_basic_block_data)); 680 for (i = array_size; i < new_size; i++) 681 { 682 bbd[i].start_of_trace = -1; 683 bbd[i].end_of_trace = -1; 684 bbd[i].heap = NULL; 685 bbd[i].node = NULL; 686 } 687 array_size = new_size; 688 689 if (rtl_dump_file) 690 { 691 fprintf (rtl_dump_file, 692 "Growing the dynamic array to %d elements.\n", 693 array_size); 694 } 695 } 696 697 return new_bb; 698} 699 700/* Compute and return the key (for the heap) of the basic block BB. */ 701 702static fibheapkey_t 703bb_to_key (basic_block bb) 704{ 705 edge e; 706 707 int priority = 0; 708 709 /* Do not start in probably never executed blocks. */ 710 if (probably_never_executed_bb_p (bb)) 711 return BB_FREQ_MAX; 712 713 /* Prefer blocks whose predecessor is an end of some trace 714 or whose predecessor edge is EDGE_DFS_BACK. */ 715 for (e = bb->pred; e; e = e->pred_next) 716 { 717 if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0) 718 || (e->flags & EDGE_DFS_BACK)) 719 { 720 int edge_freq = EDGE_FREQUENCY (e); 721 722 if (edge_freq > priority) 723 priority = edge_freq; 724 } 725 } 726 727 if (priority) 728 /* The block with priority should have significantly lower key. */ 729 return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency); 730 return -bb->frequency; 731} 732 733/* Return true when the edge E from basic block BB is better than the temporary 734 best edge (details are in function). The probability of edge E is PROB. The 735 frequency of the successor is FREQ. The current best probability is 736 BEST_PROB, the best frequency is BEST_FREQ. 737 The edge is considered to be equivalent when PROB does not differ much from 738 BEST_PROB; similarly for frequency. */ 739 740static bool 741better_edge_p (basic_block bb, edge e, int prob, int freq, int best_prob, 742 int best_freq) 743{ 744 bool is_better_edge; 745 746 /* The BEST_* values do not have to be best, but can be a bit smaller than 747 maximum values. */ 748 int diff_prob = best_prob / 10; 749 int diff_freq = best_freq / 10; 750 751 if (prob > best_prob + diff_prob) 752 /* The edge has higher probability than the temporary best edge. */ 753 is_better_edge = true; 754 else if (prob < best_prob - diff_prob) 755 /* The edge has lower probability than the temporary best edge. */ 756 is_better_edge = false; 757 else if (freq < best_freq - diff_freq) 758 /* The edge and the temporary best edge have almost equivalent 759 probabilities. The higher frequency of a successor now means 760 that there is another edge going into that successor. 761 This successor has lower frequency so it is better. */ 762 is_better_edge = true; 763 else if (freq > best_freq + diff_freq) 764 /* This successor has higher frequency so it is worse. */ 765 is_better_edge = false; 766 else if (e->dest->prev_bb == bb) 767 /* The edges have equivalent probabilities and the successors 768 have equivalent frequencies. Select the previous successor. */ 769 is_better_edge = true; 770 else 771 is_better_edge = false; 772 773 return is_better_edge; 774} 775 776/* Connect traces in array TRACES, N_TRACES is the count of traces. */ 777 778static void 779connect_traces (int n_traces, struct trace *traces) 780{ 781 int i; 782 bool *connected; 783 int last_trace; 784 int freq_threshold; 785 gcov_type count_threshold; 786 787 freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000; 788 if (max_entry_count < INT_MAX / 1000) 789 count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000; 790 else 791 count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD; 792 793 connected = xcalloc (n_traces, sizeof (bool)); 794 last_trace = -1; 795 for (i = 0; i < n_traces; i++) 796 { 797 int t = i; 798 int t2; 799 edge e, best; 800 int best_len; 801 802 if (connected[t]) 803 continue; 804 805 connected[t] = true; 806 807 /* Find the predecessor traces. */ 808 for (t2 = t; t2 > 0;) 809 { 810 best = NULL; 811 best_len = 0; 812 for (e = traces[t2].first->pred; e; e = e->pred_next) 813 { 814 int si = e->src->index; 815 816 if (e->src != ENTRY_BLOCK_PTR 817 && (e->flags & EDGE_CAN_FALLTHRU) 818 && !(e->flags & EDGE_COMPLEX) 819 && bbd[si].end_of_trace >= 0 820 && !connected[bbd[si].end_of_trace] 821 && (!best 822 || e->probability > best->probability 823 || (e->probability == best->probability 824 && traces[bbd[si].end_of_trace].length > best_len))) 825 { 826 best = e; 827 best_len = traces[bbd[si].end_of_trace].length; 828 } 829 } 830 if (best) 831 { 832 best->src->rbi->next = best->dest; 833 t2 = bbd[best->src->index].end_of_trace; 834 connected[t2] = true; 835 if (rtl_dump_file) 836 { 837 fprintf (rtl_dump_file, "Connection: %d %d\n", 838 best->src->index, best->dest->index); 839 } 840 } 841 else 842 break; 843 } 844 845 if (last_trace >= 0) 846 traces[last_trace].last->rbi->next = traces[t2].first; 847 last_trace = t; 848 849 /* Find the successor traces. */ 850 while (1) 851 { 852 /* Find the continuation of the chain. */ 853 best = NULL; 854 best_len = 0; 855 for (e = traces[t].last->succ; e; e = e->succ_next) 856 { 857 int di = e->dest->index; 858 859 if (e->dest != EXIT_BLOCK_PTR 860 && (e->flags & EDGE_CAN_FALLTHRU) 861 && !(e->flags & EDGE_COMPLEX) 862 && bbd[di].start_of_trace >= 0 863 && !connected[bbd[di].start_of_trace] 864 && (!best 865 || e->probability > best->probability 866 || (e->probability == best->probability 867 && traces[bbd[di].start_of_trace].length > best_len))) 868 { 869 best = e; 870 best_len = traces[bbd[di].start_of_trace].length; 871 } 872 } 873 874 if (best) 875 { 876 if (rtl_dump_file) 877 { 878 fprintf (rtl_dump_file, "Connection: %d %d\n", 879 best->src->index, best->dest->index); 880 } 881 t = bbd[best->dest->index].start_of_trace; 882 traces[last_trace].last->rbi->next = traces[t].first; 883 connected[t] = true; 884 last_trace = t; 885 } 886 else 887 { 888 /* Try to connect the traces by duplication of 1 block. */ 889 edge e2; 890 basic_block next_bb = NULL; 891 bool try_copy = false; 892 893 for (e = traces[t].last->succ; e; e = e->succ_next) 894 if (e->dest != EXIT_BLOCK_PTR 895 && (e->flags & EDGE_CAN_FALLTHRU) 896 && !(e->flags & EDGE_COMPLEX) 897 && (!best || e->probability > best->probability)) 898 { 899 edge best2 = NULL; 900 int best2_len = 0; 901 902 /* If the destination is a start of a trace which is only 903 one block long, then no need to search the successor 904 blocks of the trace. Accept it. */ 905 if (bbd[e->dest->index].start_of_trace >= 0 906 && traces[bbd[e->dest->index].start_of_trace].length 907 == 1) 908 { 909 best = e; 910 try_copy = true; 911 continue; 912 } 913 914 for (e2 = e->dest->succ; e2; e2 = e2->succ_next) 915 { 916 int di = e2->dest->index; 917 918 if (e2->dest == EXIT_BLOCK_PTR 919 || ((e2->flags & EDGE_CAN_FALLTHRU) 920 && !(e2->flags & EDGE_COMPLEX) 921 && bbd[di].start_of_trace >= 0 922 && !connected[bbd[di].start_of_trace] 923 && (EDGE_FREQUENCY (e2) >= freq_threshold) 924 && (e2->count >= count_threshold) 925 && (!best2 926 || e2->probability > best2->probability 927 || (e2->probability == best2->probability 928 && traces[bbd[di].start_of_trace].length 929 > best2_len)))) 930 { 931 best = e; 932 best2 = e2; 933 if (e2->dest != EXIT_BLOCK_PTR) 934 best2_len = traces[bbd[di].start_of_trace].length; 935 else 936 best2_len = INT_MAX; 937 next_bb = e2->dest; 938 try_copy = true; 939 } 940 } 941 } 942 943 /* Copy tiny blocks always; copy larger blocks only when the 944 edge is traversed frequently enough. */ 945 if (try_copy 946 && copy_bb_p (best->dest, 947 !optimize_size 948 && EDGE_FREQUENCY (best) >= freq_threshold 949 && best->count >= count_threshold)) 950 { 951 basic_block new_bb; 952 953 if (rtl_dump_file) 954 { 955 fprintf (rtl_dump_file, "Connection: %d %d ", 956 traces[t].last->index, best->dest->index); 957 if (!next_bb) 958 fputc ('\n', rtl_dump_file); 959 else if (next_bb == EXIT_BLOCK_PTR) 960 fprintf (rtl_dump_file, "exit\n"); 961 else 962 fprintf (rtl_dump_file, "%d\n", next_bb->index); 963 } 964 965 new_bb = copy_bb (best->dest, best, traces[t].last, t); 966 traces[t].last = new_bb; 967 if (next_bb && next_bb != EXIT_BLOCK_PTR) 968 { 969 t = bbd[next_bb->index].start_of_trace; 970 traces[last_trace].last->rbi->next = traces[t].first; 971 connected[t] = true; 972 last_trace = t; 973 } 974 else 975 break; /* Stop finding the successor traces. */ 976 } 977 else 978 break; /* Stop finding the successor traces. */ 979 } 980 } 981 } 982 983 if (rtl_dump_file) 984 { 985 basic_block bb; 986 987 fprintf (rtl_dump_file, "Final order:\n"); 988 for (bb = traces[0].first; bb; bb = bb->rbi->next) 989 fprintf (rtl_dump_file, "%d ", bb->index); 990 fprintf (rtl_dump_file, "\n"); 991 fflush (rtl_dump_file); 992 } 993 994 FREE (connected); 995} 996 997/* Return true when BB can and should be copied. CODE_MAY_GROW is true 998 when code size is allowed to grow by duplication. */ 999 1000static bool 1001copy_bb_p (basic_block bb, int code_may_grow) 1002{ 1003 int size = 0; 1004 int max_size = uncond_jump_length; 1005 rtx insn; 1006 int n_succ; 1007 edge e; 1008 1009 if (!bb->frequency) 1010 return false; 1011 if (!bb->pred || !bb->pred->pred_next) 1012 return false; 1013 if (!cfg_layout_can_duplicate_bb_p (bb)) 1014 return false; 1015 1016 /* Avoid duplicating blocks which have many successors (PR/13430). */ 1017 n_succ = 0; 1018 for (e = bb->succ; e; e = e->succ_next) 1019 { 1020 n_succ++; 1021 if (n_succ > 8) 1022 return false; 1023 } 1024 1025 if (code_may_grow && maybe_hot_bb_p (bb)) 1026 max_size *= 8; 1027 1028 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); 1029 insn = NEXT_INSN (insn)) 1030 { 1031 if (INSN_P (insn)) 1032 size += get_attr_length (insn); 1033 } 1034 1035 if (size <= max_size) 1036 return true; 1037 1038 if (rtl_dump_file) 1039 { 1040 fprintf (rtl_dump_file, 1041 "Block %d can't be copied because its size = %d.\n", 1042 bb->index, size); 1043 } 1044 1045 return false; 1046} 1047 1048/* Return the length of unconditional jump instruction. */ 1049 1050static int 1051get_uncond_jump_length (void) 1052{ 1053 rtx label, jump; 1054 int length; 1055 1056 label = emit_label_before (gen_label_rtx (), get_insns ()); 1057 jump = emit_jump_insn (gen_jump (label)); 1058 1059 length = get_attr_length (jump); 1060 1061 delete_insn (jump); 1062 delete_insn (label); 1063 return length; 1064} 1065 1066/* Reorder basic blocks. The main entry point to this file. FLAGS is 1067 the set of flags to pass to cfg_layout_initialize(). */ 1068 1069void 1070reorder_basic_blocks (unsigned int flags) 1071{ 1072 int n_traces; 1073 int i; 1074 struct trace *traces; 1075 1076 if (n_basic_blocks <= 1) 1077 return; 1078 1079 if ((* targetm.cannot_modify_jumps_p) ()) 1080 return; 1081 1082 timevar_push (TV_REORDER_BLOCKS); 1083 1084 cfg_layout_initialize (flags); 1085 1086 set_edge_can_fallthru_flag (); 1087 mark_dfs_back_edges (); 1088 1089 /* We are estimating the length of uncond jump insn only once since the code 1090 for getting the insn length always returns the minimal length now. */ 1091 if (uncond_jump_length == 0) 1092 uncond_jump_length = get_uncond_jump_length (); 1093 1094 /* We need to know some information for each basic block. */ 1095 array_size = GET_ARRAY_SIZE (last_basic_block); 1096 bbd = xmalloc (array_size * sizeof (bbro_basic_block_data)); 1097 for (i = 0; i < array_size; i++) 1098 { 1099 bbd[i].start_of_trace = -1; 1100 bbd[i].end_of_trace = -1; 1101 bbd[i].heap = NULL; 1102 bbd[i].node = NULL; 1103 } 1104 1105 traces = xmalloc (n_basic_blocks * sizeof (struct trace)); 1106 n_traces = 0; 1107 find_traces (&n_traces, traces); 1108 connect_traces (n_traces, traces); 1109 FREE (traces); 1110 FREE (bbd); 1111 1112 if (rtl_dump_file) 1113 dump_flow_info (rtl_dump_file); 1114 1115 cfg_layout_finalize (); 1116 1117 timevar_pop (TV_REORDER_BLOCKS); 1118} 1119