1/* Basic block reordering routines for the GNU compiler. 2 Copyright (C) 2000, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2010 3 Free Software Foundation, Inc. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21/* This (greedy) algorithm constructs traces in several rounds. 22 The construction starts from "seeds". The seed for the first round 23 is the entry point of function. When there are more than one seed 24 that one is selected first that has the lowest key in the heap 25 (see function bb_to_key). Then the algorithm repeatedly adds the most 26 probable successor to the end of a trace. Finally it connects the traces. 27 28 There are two parameters: Branch Threshold and Exec Threshold. 29 If the edge to a successor of the actual basic block is lower than 30 Branch Threshold or the frequency of the successor is lower than 31 Exec Threshold the successor will be the seed in one of the next rounds. 32 Each round has these parameters lower than the previous one. 33 The last round has to have these parameters set to zero 34 so that the remaining blocks are picked up. 35 36 The algorithm selects the most probable successor from all unvisited 37 successors and successors that have been added to this trace. 38 The other successors (that has not been "sent" to the next round) will be 39 other seeds for this round and the secondary traces will start in them. 40 If the successor has not been visited in this trace it is added to the trace 41 (however, there is some heuristic for simple branches). 42 If the successor has been visited in this trace the loop has been found. 43 If the loop has many iterations the loop is rotated so that the 44 source block of the most probable edge going out from the loop 45 is the last block of the trace. 46 If the loop has few iterations and there is no edge from the last block of 47 the loop going out from loop the loop header is duplicated. 48 Finally, the construction of the trace is terminated. 49 50 When connecting traces it first checks whether there is an edge from the 51 last block of one trace to the first block of another trace. 52 When there are still some unconnected traces it checks whether there exists 53 a basic block BB such that BB is a successor of the last bb of one trace 54 and BB is a predecessor of the first block of another trace. In this case, 55 BB is duplicated and the traces are connected through this duplicate. 56 The rest of traces are simply connected so there will be a jump to the 57 beginning of the rest of trace. 58 59 60 References: 61 62 "Software Trace Cache" 63 A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999 64 http://citeseer.nj.nec.com/15361.html 65 66*/ 67 68#include "config.h" 69#include "system.h" 70#include "coretypes.h" 71#include "tm.h" 72#include "rtl.h" 73#include "regs.h" 74#include "flags.h" 75#include "timevar.h" 76#include "output.h" 77#include "cfglayout.h" 78#include "fibheap.h" 79#include "target.h" 80#include "function.h" 81#include "tm_p.h" 82#include "obstack.h" 83#include "expr.h" 84#include "params.h" 85#include "toplev.h" 86#include "tree-pass.h" 87#include "df.h" 88 89/* The number of rounds. In most cases there will only be 4 rounds, but 90 when partitioning hot and cold basic blocks into separate sections of 91 the .o file there will be an extra round.*/ 92#define N_ROUNDS 5 93 94/* Stubs in case we don't have a return insn. 95 We have to check at runtime too, not only compiletime. */ 96 97#ifndef HAVE_return 98#define HAVE_return 0 99#define gen_return() NULL_RTX 100#endif 101 102 103/* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE. */ 104static int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0}; 105 106/* Exec thresholds in thousandths (per mille) of the frequency of bb 0. */ 107static int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0}; 108 109/* If edge frequency is lower than DUPLICATION_THRESHOLD per mille of entry 110 block the edge destination is not duplicated while connecting traces. */ 111#define DUPLICATION_THRESHOLD 100 112 113/* Length of unconditional jump instruction. */ 114static int uncond_jump_length; 115 116/* Structure to hold needed information for each basic block. */ 117typedef struct bbro_basic_block_data_def 118{ 119 /* Which trace is the bb start of (-1 means it is not a start of a trace). */ 120 int start_of_trace; 121 122 /* Which trace is the bb end of (-1 means it is not an end of a trace). */ 123 int end_of_trace; 124 125 /* Which trace is the bb in? */ 126 int in_trace; 127 128 /* Which heap is BB in (if any)? */ 129 fibheap_t heap; 130 131 /* Which heap node is BB in (if any)? */ 132 fibnode_t node; 133} bbro_basic_block_data; 134 135/* The current size of the following dynamic array. */ 136static int array_size; 137 138/* The array which holds needed information for basic blocks. */ 139static bbro_basic_block_data *bbd; 140 141/* To avoid frequent reallocation the size of arrays is greater than needed, 142 the number of elements is (not less than) 1.25 * size_wanted. */ 143#define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5) 144 145/* Free the memory and set the pointer to NULL. */ 146#define FREE(P) (gcc_assert (P), free (P), P = 0) 147 148/* Structure for holding information about a trace. */ 149struct trace 150{ 151 /* First and last basic block of the trace. */ 152 basic_block first, last; 153 154 /* The round of the STC creation which this trace was found in. */ 155 int round; 156 157 /* The length (i.e. the number of basic blocks) of the trace. */ 158 int length; 159}; 160 161/* Maximum frequency and count of one of the entry blocks. */ 162static int max_entry_frequency; 163static gcov_type max_entry_count; 164 165/* Local function prototypes. */ 166static void find_traces (int *, struct trace *); 167static basic_block rotate_loop (edge, struct trace *, int); 168static void mark_bb_visited (basic_block, int); 169static void find_traces_1_round (int, int, gcov_type, struct trace *, int *, 170 int, fibheap_t *, int); 171static basic_block copy_bb (basic_block, edge, basic_block, int); 172static fibheapkey_t bb_to_key (basic_block); 173static bool better_edge_p (const_basic_block, const_edge, int, int, int, int, const_edge); 174static void connect_traces (int, struct trace *); 175static bool copy_bb_p (const_basic_block, int); 176static int get_uncond_jump_length (void); 177static bool push_to_next_round_p (const_basic_block, int, int, int, gcov_type); 178static void find_rarely_executed_basic_blocks_and_crossing_edges (edge **, 179 int *, 180 int *); 181static void add_labels_and_missing_jumps (edge *, int); 182static void add_reg_crossing_jump_notes (void); 183static void fix_up_fall_thru_edges (void); 184static void fix_edges_for_rarely_executed_code (edge *, int); 185static void fix_crossing_conditional_branches (void); 186static void fix_crossing_unconditional_branches (void); 187 188/* Check to see if bb should be pushed into the next round of trace 189 collections or not. Reasons for pushing the block forward are 1). 190 If the block is cold, we are doing partitioning, and there will be 191 another round (cold partition blocks are not supposed to be 192 collected into traces until the very last round); or 2). There will 193 be another round, and the basic block is not "hot enough" for the 194 current round of trace collection. */ 195 196static bool 197push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds, 198 int exec_th, gcov_type count_th) 199{ 200 bool there_exists_another_round; 201 bool block_not_hot_enough; 202 203 there_exists_another_round = round < number_of_rounds - 1; 204 205 block_not_hot_enough = (bb->frequency < exec_th 206 || bb->count < count_th 207 || probably_never_executed_bb_p (bb)); 208 209 if (there_exists_another_round 210 && block_not_hot_enough) 211 return true; 212 else 213 return false; 214} 215 216/* Find the traces for Software Trace Cache. Chain each trace through 217 RBI()->next. Store the number of traces to N_TRACES and description of 218 traces to TRACES. */ 219 220static void 221find_traces (int *n_traces, struct trace *traces) 222{ 223 int i; 224 int number_of_rounds; 225 edge e; 226 edge_iterator ei; 227 fibheap_t heap; 228 229 /* Add one extra round of trace collection when partitioning hot/cold 230 basic blocks into separate sections. The last round is for all the 231 cold blocks (and ONLY the cold blocks). */ 232 233 number_of_rounds = N_ROUNDS - 1; 234 235 /* Insert entry points of function into heap. */ 236 heap = fibheap_new (); 237 max_entry_frequency = 0; 238 max_entry_count = 0; 239 FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR->succs) 240 { 241 bbd[e->dest->index].heap = heap; 242 bbd[e->dest->index].node = fibheap_insert (heap, bb_to_key (e->dest), 243 e->dest); 244 if (e->dest->frequency > max_entry_frequency) 245 max_entry_frequency = e->dest->frequency; 246 if (e->dest->count > max_entry_count) 247 max_entry_count = e->dest->count; 248 } 249 250 /* Find the traces. */ 251 for (i = 0; i < number_of_rounds; i++) 252 { 253 gcov_type count_threshold; 254 255 if (dump_file) 256 fprintf (dump_file, "STC - round %d\n", i + 1); 257 258 if (max_entry_count < INT_MAX / 1000) 259 count_threshold = max_entry_count * exec_threshold[i] / 1000; 260 else 261 count_threshold = max_entry_count / 1000 * exec_threshold[i]; 262 263 find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000, 264 max_entry_frequency * exec_threshold[i] / 1000, 265 count_threshold, traces, n_traces, i, &heap, 266 number_of_rounds); 267 } 268 fibheap_delete (heap); 269 270 if (dump_file) 271 { 272 for (i = 0; i < *n_traces; i++) 273 { 274 basic_block bb; 275 fprintf (dump_file, "Trace %d (round %d): ", i + 1, 276 traces[i].round + 1); 277 for (bb = traces[i].first; bb != traces[i].last; bb = (basic_block) bb->aux) 278 fprintf (dump_file, "%d [%d] ", bb->index, bb->frequency); 279 fprintf (dump_file, "%d [%d]\n", bb->index, bb->frequency); 280 } 281 fflush (dump_file); 282 } 283} 284 285/* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE 286 (with sequential number TRACE_N). */ 287 288static basic_block 289rotate_loop (edge back_edge, struct trace *trace, int trace_n) 290{ 291 basic_block bb; 292 293 /* Information about the best end (end after rotation) of the loop. */ 294 basic_block best_bb = NULL; 295 edge best_edge = NULL; 296 int best_freq = -1; 297 gcov_type best_count = -1; 298 /* The best edge is preferred when its destination is not visited yet 299 or is a start block of some trace. */ 300 bool is_preferred = false; 301 302 /* Find the most frequent edge that goes out from current trace. */ 303 bb = back_edge->dest; 304 do 305 { 306 edge e; 307 edge_iterator ei; 308 309 FOR_EACH_EDGE (e, ei, bb->succs) 310 if (e->dest != EXIT_BLOCK_PTR 311 && e->dest->il.rtl->visited != trace_n 312 && (e->flags & EDGE_CAN_FALLTHRU) 313 && !(e->flags & EDGE_COMPLEX)) 314 { 315 if (is_preferred) 316 { 317 /* The best edge is preferred. */ 318 if (!e->dest->il.rtl->visited 319 || bbd[e->dest->index].start_of_trace >= 0) 320 { 321 /* The current edge E is also preferred. */ 322 int freq = EDGE_FREQUENCY (e); 323 if (freq > best_freq || e->count > best_count) 324 { 325 best_freq = freq; 326 best_count = e->count; 327 best_edge = e; 328 best_bb = bb; 329 } 330 } 331 } 332 else 333 { 334 if (!e->dest->il.rtl->visited 335 || bbd[e->dest->index].start_of_trace >= 0) 336 { 337 /* The current edge E is preferred. */ 338 is_preferred = true; 339 best_freq = EDGE_FREQUENCY (e); 340 best_count = e->count; 341 best_edge = e; 342 best_bb = bb; 343 } 344 else 345 { 346 int freq = EDGE_FREQUENCY (e); 347 if (!best_edge || freq > best_freq || e->count > best_count) 348 { 349 best_freq = freq; 350 best_count = e->count; 351 best_edge = e; 352 best_bb = bb; 353 } 354 } 355 } 356 } 357 bb = (basic_block) bb->aux; 358 } 359 while (bb != back_edge->dest); 360 361 if (best_bb) 362 { 363 /* Rotate the loop so that the BEST_EDGE goes out from the last block of 364 the trace. */ 365 if (back_edge->dest == trace->first) 366 { 367 trace->first = (basic_block) best_bb->aux; 368 } 369 else 370 { 371 basic_block prev_bb; 372 373 for (prev_bb = trace->first; 374 prev_bb->aux != back_edge->dest; 375 prev_bb = (basic_block) prev_bb->aux) 376 ; 377 prev_bb->aux = best_bb->aux; 378 379 /* Try to get rid of uncond jump to cond jump. */ 380 if (single_succ_p (prev_bb)) 381 { 382 basic_block header = single_succ (prev_bb); 383 384 /* Duplicate HEADER if it is a small block containing cond jump 385 in the end. */ 386 if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0) 387 && !find_reg_note (BB_END (header), REG_CROSSING_JUMP, 388 NULL_RTX)) 389 copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n); 390 } 391 } 392 } 393 else 394 { 395 /* We have not found suitable loop tail so do no rotation. */ 396 best_bb = back_edge->src; 397 } 398 best_bb->aux = NULL; 399 return best_bb; 400} 401 402/* This function marks BB that it was visited in trace number TRACE. */ 403 404static void 405mark_bb_visited (basic_block bb, int trace) 406{ 407 bb->il.rtl->visited = trace; 408 if (bbd[bb->index].heap) 409 { 410 fibheap_delete_node (bbd[bb->index].heap, bbd[bb->index].node); 411 bbd[bb->index].heap = NULL; 412 bbd[bb->index].node = NULL; 413 } 414} 415 416/* One round of finding traces. Find traces for BRANCH_TH and EXEC_TH i.e. do 417 not include basic blocks their probability is lower than BRANCH_TH or their 418 frequency is lower than EXEC_TH into traces (or count is lower than 419 COUNT_TH). It stores the new traces into TRACES and modifies the number of 420 traces *N_TRACES. Sets the round (which the trace belongs to) to ROUND. It 421 expects that starting basic blocks are in *HEAP and at the end it deletes 422 *HEAP and stores starting points for the next round into new *HEAP. */ 423 424static void 425find_traces_1_round (int branch_th, int exec_th, gcov_type count_th, 426 struct trace *traces, int *n_traces, int round, 427 fibheap_t *heap, int number_of_rounds) 428{ 429 /* Heap for discarded basic blocks which are possible starting points for 430 the next round. */ 431 fibheap_t new_heap = fibheap_new (); 432 433 while (!fibheap_empty (*heap)) 434 { 435 basic_block bb; 436 struct trace *trace; 437 edge best_edge, e; 438 fibheapkey_t key; 439 edge_iterator ei; 440 441 bb = (basic_block) fibheap_extract_min (*heap); 442 bbd[bb->index].heap = NULL; 443 bbd[bb->index].node = NULL; 444 445 if (dump_file) 446 fprintf (dump_file, "Getting bb %d\n", bb->index); 447 448 /* If the BB's frequency is too low send BB to the next round. When 449 partitioning hot/cold blocks into separate sections, make sure all 450 the cold blocks (and ONLY the cold blocks) go into the (extra) final 451 round. */ 452 453 if (push_to_next_round_p (bb, round, number_of_rounds, exec_th, 454 count_th)) 455 { 456 int key = bb_to_key (bb); 457 bbd[bb->index].heap = new_heap; 458 bbd[bb->index].node = fibheap_insert (new_heap, key, bb); 459 460 if (dump_file) 461 fprintf (dump_file, 462 " Possible start point of next round: %d (key: %d)\n", 463 bb->index, key); 464 continue; 465 } 466 467 trace = traces + *n_traces; 468 trace->first = bb; 469 trace->round = round; 470 trace->length = 0; 471 bbd[bb->index].in_trace = *n_traces; 472 (*n_traces)++; 473 474 do 475 { 476 int prob, freq; 477 bool ends_in_call; 478 479 /* The probability and frequency of the best edge. */ 480 int best_prob = INT_MIN / 2; 481 int best_freq = INT_MIN / 2; 482 483 best_edge = NULL; 484 mark_bb_visited (bb, *n_traces); 485 trace->length++; 486 487 if (dump_file) 488 fprintf (dump_file, "Basic block %d was visited in trace %d\n", 489 bb->index, *n_traces - 1); 490 491 ends_in_call = block_ends_with_call_p (bb); 492 493 /* Select the successor that will be placed after BB. */ 494 FOR_EACH_EDGE (e, ei, bb->succs) 495 { 496 gcc_assert (!(e->flags & EDGE_FAKE)); 497 498 if (e->dest == EXIT_BLOCK_PTR) 499 continue; 500 501 if (e->dest->il.rtl->visited 502 && e->dest->il.rtl->visited != *n_traces) 503 continue; 504 505 if (BB_PARTITION (e->dest) != BB_PARTITION (bb)) 506 continue; 507 508 prob = e->probability; 509 freq = e->dest->frequency; 510 511 /* The only sensible preference for a call instruction is the 512 fallthru edge. Don't bother selecting anything else. */ 513 if (ends_in_call) 514 { 515 if (e->flags & EDGE_CAN_FALLTHRU) 516 { 517 best_edge = e; 518 best_prob = prob; 519 best_freq = freq; 520 } 521 continue; 522 } 523 524 /* Edge that cannot be fallthru or improbable or infrequent 525 successor (i.e. it is unsuitable successor). */ 526 if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX) 527 || prob < branch_th || EDGE_FREQUENCY (e) < exec_th 528 || e->count < count_th) 529 continue; 530 531 /* If partitioning hot/cold basic blocks, don't consider edges 532 that cross section boundaries. */ 533 534 if (better_edge_p (bb, e, prob, freq, best_prob, best_freq, 535 best_edge)) 536 { 537 best_edge = e; 538 best_prob = prob; 539 best_freq = freq; 540 } 541 } 542 543 /* If the best destination has multiple predecessors, and can be 544 duplicated cheaper than a jump, don't allow it to be added 545 to a trace. We'll duplicate it when connecting traces. */ 546 if (best_edge && EDGE_COUNT (best_edge->dest->preds) >= 2 547 && copy_bb_p (best_edge->dest, 0)) 548 best_edge = NULL; 549 550 /* Add all non-selected successors to the heaps. */ 551 FOR_EACH_EDGE (e, ei, bb->succs) 552 { 553 if (e == best_edge 554 || e->dest == EXIT_BLOCK_PTR 555 || e->dest->il.rtl->visited) 556 continue; 557 558 key = bb_to_key (e->dest); 559 560 if (bbd[e->dest->index].heap) 561 { 562 /* E->DEST is already in some heap. */ 563 if (key != bbd[e->dest->index].node->key) 564 { 565 if (dump_file) 566 { 567 fprintf (dump_file, 568 "Changing key for bb %d from %ld to %ld.\n", 569 e->dest->index, 570 (long) bbd[e->dest->index].node->key, 571 key); 572 } 573 fibheap_replace_key (bbd[e->dest->index].heap, 574 bbd[e->dest->index].node, key); 575 } 576 } 577 else 578 { 579 fibheap_t which_heap = *heap; 580 581 prob = e->probability; 582 freq = EDGE_FREQUENCY (e); 583 584 if (!(e->flags & EDGE_CAN_FALLTHRU) 585 || (e->flags & EDGE_COMPLEX) 586 || prob < branch_th || freq < exec_th 587 || e->count < count_th) 588 { 589 /* When partitioning hot/cold basic blocks, make sure 590 the cold blocks (and only the cold blocks) all get 591 pushed to the last round of trace collection. */ 592 593 if (push_to_next_round_p (e->dest, round, 594 number_of_rounds, 595 exec_th, count_th)) 596 which_heap = new_heap; 597 } 598 599 bbd[e->dest->index].heap = which_heap; 600 bbd[e->dest->index].node = fibheap_insert (which_heap, 601 key, e->dest); 602 603 if (dump_file) 604 { 605 fprintf (dump_file, 606 " Possible start of %s round: %d (key: %ld)\n", 607 (which_heap == new_heap) ? "next" : "this", 608 e->dest->index, (long) key); 609 } 610 611 } 612 } 613 614 if (best_edge) /* Suitable successor was found. */ 615 { 616 if (best_edge->dest->il.rtl->visited == *n_traces) 617 { 618 /* We do nothing with one basic block loops. */ 619 if (best_edge->dest != bb) 620 { 621 if (EDGE_FREQUENCY (best_edge) 622 > 4 * best_edge->dest->frequency / 5) 623 { 624 /* The loop has at least 4 iterations. If the loop 625 header is not the first block of the function 626 we can rotate the loop. */ 627 628 if (best_edge->dest != ENTRY_BLOCK_PTR->next_bb) 629 { 630 if (dump_file) 631 { 632 fprintf (dump_file, 633 "Rotating loop %d - %d\n", 634 best_edge->dest->index, bb->index); 635 } 636 bb->aux = best_edge->dest; 637 bbd[best_edge->dest->index].in_trace = 638 (*n_traces) - 1; 639 bb = rotate_loop (best_edge, trace, *n_traces); 640 } 641 } 642 else 643 { 644 /* The loop has less than 4 iterations. */ 645 646 if (single_succ_p (bb) 647 && copy_bb_p (best_edge->dest, 648 optimize_edge_for_speed_p (best_edge))) 649 { 650 bb = copy_bb (best_edge->dest, best_edge, bb, 651 *n_traces); 652 trace->length++; 653 } 654 } 655 } 656 657 /* Terminate the trace. */ 658 break; 659 } 660 else 661 { 662 /* Check for a situation 663 664 A 665 /| 666 B | 667 \| 668 C 669 670 where 671 EDGE_FREQUENCY (AB) + EDGE_FREQUENCY (BC) 672 >= EDGE_FREQUENCY (AC). 673 (i.e. 2 * B->frequency >= EDGE_FREQUENCY (AC) ) 674 Best ordering is then A B C. 675 676 This situation is created for example by: 677 678 if (A) B; 679 C; 680 681 */ 682 683 FOR_EACH_EDGE (e, ei, bb->succs) 684 if (e != best_edge 685 && (e->flags & EDGE_CAN_FALLTHRU) 686 && !(e->flags & EDGE_COMPLEX) 687 && !e->dest->il.rtl->visited 688 && single_pred_p (e->dest) 689 && !(e->flags & EDGE_CROSSING) 690 && single_succ_p (e->dest) 691 && (single_succ_edge (e->dest)->flags 692 & EDGE_CAN_FALLTHRU) 693 && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX) 694 && single_succ (e->dest) == best_edge->dest 695 && 2 * e->dest->frequency >= EDGE_FREQUENCY (best_edge)) 696 { 697 best_edge = e; 698 if (dump_file) 699 fprintf (dump_file, "Selecting BB %d\n", 700 best_edge->dest->index); 701 break; 702 } 703 704 bb->aux = best_edge->dest; 705 bbd[best_edge->dest->index].in_trace = (*n_traces) - 1; 706 bb = best_edge->dest; 707 } 708 } 709 } 710 while (best_edge); 711 trace->last = bb; 712 bbd[trace->first->index].start_of_trace = *n_traces - 1; 713 bbd[trace->last->index].end_of_trace = *n_traces - 1; 714 715 /* The trace is terminated so we have to recount the keys in heap 716 (some block can have a lower key because now one of its predecessors 717 is an end of the trace). */ 718 FOR_EACH_EDGE (e, ei, bb->succs) 719 { 720 if (e->dest == EXIT_BLOCK_PTR 721 || e->dest->il.rtl->visited) 722 continue; 723 724 if (bbd[e->dest->index].heap) 725 { 726 key = bb_to_key (e->dest); 727 if (key != bbd[e->dest->index].node->key) 728 { 729 if (dump_file) 730 { 731 fprintf (dump_file, 732 "Changing key for bb %d from %ld to %ld.\n", 733 e->dest->index, 734 (long) bbd[e->dest->index].node->key, key); 735 } 736 fibheap_replace_key (bbd[e->dest->index].heap, 737 bbd[e->dest->index].node, 738 key); 739 } 740 } 741 } 742 } 743 744 fibheap_delete (*heap); 745 746 /* "Return" the new heap. */ 747 *heap = new_heap; 748} 749 750/* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add 751 it to trace after BB, mark OLD_BB visited and update pass' data structures 752 (TRACE is a number of trace which OLD_BB is duplicated to). */ 753 754static basic_block 755copy_bb (basic_block old_bb, edge e, basic_block bb, int trace) 756{ 757 basic_block new_bb; 758 759 new_bb = duplicate_block (old_bb, e, bb); 760 BB_COPY_PARTITION (new_bb, old_bb); 761 762 gcc_assert (e->dest == new_bb); 763 gcc_assert (!e->dest->il.rtl->visited); 764 765 if (dump_file) 766 fprintf (dump_file, 767 "Duplicated bb %d (created bb %d)\n", 768 old_bb->index, new_bb->index); 769 new_bb->il.rtl->visited = trace; 770 new_bb->aux = bb->aux; 771 bb->aux = new_bb; 772 773 if (new_bb->index >= array_size || last_basic_block > array_size) 774 { 775 int i; 776 int new_size; 777 778 new_size = MAX (last_basic_block, new_bb->index + 1); 779 new_size = GET_ARRAY_SIZE (new_size); 780 bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size); 781 for (i = array_size; i < new_size; i++) 782 { 783 bbd[i].start_of_trace = -1; 784 bbd[i].in_trace = -1; 785 bbd[i].end_of_trace = -1; 786 bbd[i].heap = NULL; 787 bbd[i].node = NULL; 788 } 789 array_size = new_size; 790 791 if (dump_file) 792 { 793 fprintf (dump_file, 794 "Growing the dynamic array to %d elements.\n", 795 array_size); 796 } 797 } 798 799 bbd[new_bb->index].in_trace = trace; 800 801 return new_bb; 802} 803 804/* Compute and return the key (for the heap) of the basic block BB. */ 805 806static fibheapkey_t 807bb_to_key (basic_block bb) 808{ 809 edge e; 810 edge_iterator ei; 811 int priority = 0; 812 813 /* Do not start in probably never executed blocks. */ 814 815 if (BB_PARTITION (bb) == BB_COLD_PARTITION 816 || probably_never_executed_bb_p (bb)) 817 return BB_FREQ_MAX; 818 819 /* Prefer blocks whose predecessor is an end of some trace 820 or whose predecessor edge is EDGE_DFS_BACK. */ 821 FOR_EACH_EDGE (e, ei, bb->preds) 822 { 823 if ((e->src != ENTRY_BLOCK_PTR && bbd[e->src->index].end_of_trace >= 0) 824 || (e->flags & EDGE_DFS_BACK)) 825 { 826 int edge_freq = EDGE_FREQUENCY (e); 827 828 if (edge_freq > priority) 829 priority = edge_freq; 830 } 831 } 832 833 if (priority) 834 /* The block with priority should have significantly lower key. */ 835 return -(100 * BB_FREQ_MAX + 100 * priority + bb->frequency); 836 return -bb->frequency; 837} 838 839/* Return true when the edge E from basic block BB is better than the temporary 840 best edge (details are in function). The probability of edge E is PROB. The 841 frequency of the successor is FREQ. The current best probability is 842 BEST_PROB, the best frequency is BEST_FREQ. 843 The edge is considered to be equivalent when PROB does not differ much from 844 BEST_PROB; similarly for frequency. */ 845 846static bool 847better_edge_p (const_basic_block bb, const_edge e, int prob, int freq, int best_prob, 848 int best_freq, const_edge cur_best_edge) 849{ 850 bool is_better_edge; 851 852 /* The BEST_* values do not have to be best, but can be a bit smaller than 853 maximum values. */ 854 int diff_prob = best_prob / 10; 855 int diff_freq = best_freq / 10; 856 857 if (prob > best_prob + diff_prob) 858 /* The edge has higher probability than the temporary best edge. */ 859 is_better_edge = true; 860 else if (prob < best_prob - diff_prob) 861 /* The edge has lower probability than the temporary best edge. */ 862 is_better_edge = false; 863 else if (freq < best_freq - diff_freq) 864 /* The edge and the temporary best edge have almost equivalent 865 probabilities. The higher frequency of a successor now means 866 that there is another edge going into that successor. 867 This successor has lower frequency so it is better. */ 868 is_better_edge = true; 869 else if (freq > best_freq + diff_freq) 870 /* This successor has higher frequency so it is worse. */ 871 is_better_edge = false; 872 else if (e->dest->prev_bb == bb) 873 /* The edges have equivalent probabilities and the successors 874 have equivalent frequencies. Select the previous successor. */ 875 is_better_edge = true; 876 else 877 is_better_edge = false; 878 879 /* If we are doing hot/cold partitioning, make sure that we always favor 880 non-crossing edges over crossing edges. */ 881 882 if (!is_better_edge 883 && flag_reorder_blocks_and_partition 884 && cur_best_edge 885 && (cur_best_edge->flags & EDGE_CROSSING) 886 && !(e->flags & EDGE_CROSSING)) 887 is_better_edge = true; 888 889 return is_better_edge; 890} 891 892/* Connect traces in array TRACES, N_TRACES is the count of traces. */ 893 894static void 895connect_traces (int n_traces, struct trace *traces) 896{ 897 int i; 898 bool *connected; 899 bool two_passes; 900 int last_trace; 901 int current_pass; 902 int current_partition; 903 int freq_threshold; 904 gcov_type count_threshold; 905 906 freq_threshold = max_entry_frequency * DUPLICATION_THRESHOLD / 1000; 907 if (max_entry_count < INT_MAX / 1000) 908 count_threshold = max_entry_count * DUPLICATION_THRESHOLD / 1000; 909 else 910 count_threshold = max_entry_count / 1000 * DUPLICATION_THRESHOLD; 911 912 connected = XCNEWVEC (bool, n_traces); 913 last_trace = -1; 914 current_pass = 1; 915 current_partition = BB_PARTITION (traces[0].first); 916 two_passes = false; 917 918 if (flag_reorder_blocks_and_partition) 919 for (i = 0; i < n_traces && !two_passes; i++) 920 if (BB_PARTITION (traces[0].first) 921 != BB_PARTITION (traces[i].first)) 922 two_passes = true; 923 924 for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++) 925 { 926 int t = i; 927 int t2; 928 edge e, best; 929 int best_len; 930 931 if (i >= n_traces) 932 { 933 gcc_assert (two_passes && current_pass == 1); 934 i = 0; 935 t = i; 936 current_pass = 2; 937 if (current_partition == BB_HOT_PARTITION) 938 current_partition = BB_COLD_PARTITION; 939 else 940 current_partition = BB_HOT_PARTITION; 941 } 942 943 if (connected[t]) 944 continue; 945 946 if (two_passes 947 && BB_PARTITION (traces[t].first) != current_partition) 948 continue; 949 950 connected[t] = true; 951 952 /* Find the predecessor traces. */ 953 for (t2 = t; t2 > 0;) 954 { 955 edge_iterator ei; 956 best = NULL; 957 best_len = 0; 958 FOR_EACH_EDGE (e, ei, traces[t2].first->preds) 959 { 960 int si = e->src->index; 961 962 if (e->src != ENTRY_BLOCK_PTR 963 && (e->flags & EDGE_CAN_FALLTHRU) 964 && !(e->flags & EDGE_COMPLEX) 965 && bbd[si].end_of_trace >= 0 966 && !connected[bbd[si].end_of_trace] 967 && (BB_PARTITION (e->src) == current_partition) 968 && (!best 969 || e->probability > best->probability 970 || (e->probability == best->probability 971 && traces[bbd[si].end_of_trace].length > best_len))) 972 { 973 best = e; 974 best_len = traces[bbd[si].end_of_trace].length; 975 } 976 } 977 if (best) 978 { 979 best->src->aux = best->dest; 980 t2 = bbd[best->src->index].end_of_trace; 981 connected[t2] = true; 982 983 if (dump_file) 984 { 985 fprintf (dump_file, "Connection: %d %d\n", 986 best->src->index, best->dest->index); 987 } 988 } 989 else 990 break; 991 } 992 993 if (last_trace >= 0) 994 traces[last_trace].last->aux = traces[t2].first; 995 last_trace = t; 996 997 /* Find the successor traces. */ 998 while (1) 999 { 1000 /* Find the continuation of the chain. */ 1001 edge_iterator ei; 1002 best = NULL; 1003 best_len = 0; 1004 FOR_EACH_EDGE (e, ei, traces[t].last->succs) 1005 { 1006 int di = e->dest->index; 1007 1008 if (e->dest != EXIT_BLOCK_PTR 1009 && (e->flags & EDGE_CAN_FALLTHRU) 1010 && !(e->flags & EDGE_COMPLEX) 1011 && bbd[di].start_of_trace >= 0 1012 && !connected[bbd[di].start_of_trace] 1013 && (BB_PARTITION (e->dest) == current_partition) 1014 && (!best 1015 || e->probability > best->probability 1016 || (e->probability == best->probability 1017 && traces[bbd[di].start_of_trace].length > best_len))) 1018 { 1019 best = e; 1020 best_len = traces[bbd[di].start_of_trace].length; 1021 } 1022 } 1023 1024 if (best) 1025 { 1026 if (dump_file) 1027 { 1028 fprintf (dump_file, "Connection: %d %d\n", 1029 best->src->index, best->dest->index); 1030 } 1031 t = bbd[best->dest->index].start_of_trace; 1032 traces[last_trace].last->aux = traces[t].first; 1033 connected[t] = true; 1034 last_trace = t; 1035 } 1036 else 1037 { 1038 /* Try to connect the traces by duplication of 1 block. */ 1039 edge e2; 1040 basic_block next_bb = NULL; 1041 bool try_copy = false; 1042 1043 FOR_EACH_EDGE (e, ei, traces[t].last->succs) 1044 if (e->dest != EXIT_BLOCK_PTR 1045 && (e->flags & EDGE_CAN_FALLTHRU) 1046 && !(e->flags & EDGE_COMPLEX) 1047 && (!best || e->probability > best->probability)) 1048 { 1049 edge_iterator ei; 1050 edge best2 = NULL; 1051 int best2_len = 0; 1052 1053 /* If the destination is a start of a trace which is only 1054 one block long, then no need to search the successor 1055 blocks of the trace. Accept it. */ 1056 if (bbd[e->dest->index].start_of_trace >= 0 1057 && traces[bbd[e->dest->index].start_of_trace].length 1058 == 1) 1059 { 1060 best = e; 1061 try_copy = true; 1062 continue; 1063 } 1064 1065 FOR_EACH_EDGE (e2, ei, e->dest->succs) 1066 { 1067 int di = e2->dest->index; 1068 1069 if (e2->dest == EXIT_BLOCK_PTR 1070 || ((e2->flags & EDGE_CAN_FALLTHRU) 1071 && !(e2->flags & EDGE_COMPLEX) 1072 && bbd[di].start_of_trace >= 0 1073 && !connected[bbd[di].start_of_trace] 1074 && (BB_PARTITION (e2->dest) == current_partition) 1075 && (EDGE_FREQUENCY (e2) >= freq_threshold) 1076 && (e2->count >= count_threshold) 1077 && (!best2 1078 || e2->probability > best2->probability 1079 || (e2->probability == best2->probability 1080 && traces[bbd[di].start_of_trace].length 1081 > best2_len)))) 1082 { 1083 best = e; 1084 best2 = e2; 1085 if (e2->dest != EXIT_BLOCK_PTR) 1086 best2_len = traces[bbd[di].start_of_trace].length; 1087 else 1088 best2_len = INT_MAX; 1089 next_bb = e2->dest; 1090 try_copy = true; 1091 } 1092 } 1093 } 1094 1095 if (flag_reorder_blocks_and_partition) 1096 try_copy = false; 1097 1098 /* Copy tiny blocks always; copy larger blocks only when the 1099 edge is traversed frequently enough. */ 1100 if (try_copy 1101 && copy_bb_p (best->dest, 1102 optimize_edge_for_speed_p (best) 1103 && EDGE_FREQUENCY (best) >= freq_threshold 1104 && best->count >= count_threshold)) 1105 { 1106 basic_block new_bb; 1107 1108 if (dump_file) 1109 { 1110 fprintf (dump_file, "Connection: %d %d ", 1111 traces[t].last->index, best->dest->index); 1112 if (!next_bb) 1113 fputc ('\n', dump_file); 1114 else if (next_bb == EXIT_BLOCK_PTR) 1115 fprintf (dump_file, "exit\n"); 1116 else 1117 fprintf (dump_file, "%d\n", next_bb->index); 1118 } 1119 1120 new_bb = copy_bb (best->dest, best, traces[t].last, t); 1121 traces[t].last = new_bb; 1122 if (next_bb && next_bb != EXIT_BLOCK_PTR) 1123 { 1124 t = bbd[next_bb->index].start_of_trace; 1125 traces[last_trace].last->aux = traces[t].first; 1126 connected[t] = true; 1127 last_trace = t; 1128 } 1129 else 1130 break; /* Stop finding the successor traces. */ 1131 } 1132 else 1133 break; /* Stop finding the successor traces. */ 1134 } 1135 } 1136 } 1137 1138 if (dump_file) 1139 { 1140 basic_block bb; 1141 1142 fprintf (dump_file, "Final order:\n"); 1143 for (bb = traces[0].first; bb; bb = (basic_block) bb->aux) 1144 fprintf (dump_file, "%d ", bb->index); 1145 fprintf (dump_file, "\n"); 1146 fflush (dump_file); 1147 } 1148 1149 FREE (connected); 1150} 1151 1152/* Return true when BB can and should be copied. CODE_MAY_GROW is true 1153 when code size is allowed to grow by duplication. */ 1154 1155static bool 1156copy_bb_p (const_basic_block bb, int code_may_grow) 1157{ 1158 int size = 0; 1159 int max_size = uncond_jump_length; 1160 rtx insn; 1161 1162 if (!bb->frequency) 1163 return false; 1164 if (EDGE_COUNT (bb->preds) < 2) 1165 return false; 1166 if (!can_duplicate_block_p (bb)) 1167 return false; 1168 1169 /* Avoid duplicating blocks which have many successors (PR/13430). */ 1170 if (EDGE_COUNT (bb->succs) > 8) 1171 return false; 1172 1173 if (code_may_grow && optimize_bb_for_speed_p (bb)) 1174 max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS); 1175 1176 FOR_BB_INSNS (bb, insn) 1177 { 1178 if (INSN_P (insn)) 1179 size += get_attr_min_length (insn); 1180 } 1181 1182 if (size <= max_size) 1183 return true; 1184 1185 if (dump_file) 1186 { 1187 fprintf (dump_file, 1188 "Block %d can't be copied because its size = %d.\n", 1189 bb->index, size); 1190 } 1191 1192 return false; 1193} 1194 1195/* Return the length of unconditional jump instruction. */ 1196 1197static int 1198get_uncond_jump_length (void) 1199{ 1200 rtx label, jump; 1201 int length; 1202 1203 label = emit_label_before (gen_label_rtx (), get_insns ()); 1204 jump = emit_jump_insn (gen_jump (label)); 1205 1206 length = get_attr_min_length (jump); 1207 1208 delete_insn (jump); 1209 delete_insn (label); 1210 return length; 1211} 1212 1213/* Find the basic blocks that are rarely executed and need to be moved to 1214 a separate section of the .o file (to cut down on paging and improve 1215 cache locality). */ 1216 1217static void 1218find_rarely_executed_basic_blocks_and_crossing_edges (edge **crossing_edges, 1219 int *n_crossing_edges, 1220 int *max_idx) 1221{ 1222 basic_block bb; 1223 edge e; 1224 int i; 1225 edge_iterator ei; 1226 1227 /* Mark which partition (hot/cold) each basic block belongs in. */ 1228 1229 FOR_EACH_BB (bb) 1230 { 1231 if (probably_never_executed_bb_p (bb)) 1232 BB_SET_PARTITION (bb, BB_COLD_PARTITION); 1233 else 1234 BB_SET_PARTITION (bb, BB_HOT_PARTITION); 1235 } 1236 1237 /* Mark every edge that crosses between sections. */ 1238 1239 i = 0; 1240 FOR_EACH_BB (bb) 1241 FOR_EACH_EDGE (e, ei, bb->succs) 1242 { 1243 if (e->src != ENTRY_BLOCK_PTR 1244 && e->dest != EXIT_BLOCK_PTR 1245 && BB_PARTITION (e->src) != BB_PARTITION (e->dest)) 1246 { 1247 e->flags |= EDGE_CROSSING; 1248 if (i == *max_idx) 1249 { 1250 *max_idx *= 2; 1251 *crossing_edges = XRESIZEVEC (edge, *crossing_edges, *max_idx); 1252 } 1253 (*crossing_edges)[i++] = e; 1254 } 1255 else 1256 e->flags &= ~EDGE_CROSSING; 1257 } 1258 *n_crossing_edges = i; 1259} 1260 1261/* If any destination of a crossing edge does not have a label, add label; 1262 Convert any fall-through crossing edges (for blocks that do not contain 1263 a jump) to unconditional jumps. */ 1264 1265static void 1266add_labels_and_missing_jumps (edge *crossing_edges, int n_crossing_edges) 1267{ 1268 int i; 1269 basic_block src; 1270 basic_block dest; 1271 rtx label; 1272 rtx barrier; 1273 rtx new_jump; 1274 1275 for (i=0; i < n_crossing_edges; i++) 1276 { 1277 if (crossing_edges[i]) 1278 { 1279 src = crossing_edges[i]->src; 1280 dest = crossing_edges[i]->dest; 1281 1282 /* Make sure dest has a label. */ 1283 1284 if (dest && (dest != EXIT_BLOCK_PTR)) 1285 { 1286 label = block_label (dest); 1287 1288 /* Make sure source block ends with a jump. If the 1289 source block does not end with a jump it might end 1290 with a call_insn; this case will be handled in 1291 fix_up_fall_thru_edges function. */ 1292 1293 if (src && (src != ENTRY_BLOCK_PTR)) 1294 { 1295 if (!JUMP_P (BB_END (src)) 1296 && !block_ends_with_call_p (src) 1297 && !can_throw_internal (BB_END (src))) 1298 /* bb just falls through. */ 1299 { 1300 /* make sure there's only one successor */ 1301 gcc_assert (single_succ_p (src)); 1302 1303 /* Find label in dest block. */ 1304 label = block_label (dest); 1305 1306 new_jump = emit_jump_insn_after (gen_jump (label), 1307 BB_END (src)); 1308 barrier = emit_barrier_after (new_jump); 1309 JUMP_LABEL (new_jump) = label; 1310 LABEL_NUSES (label) += 1; 1311 src->il.rtl->footer = unlink_insn_chain (barrier, barrier); 1312 /* Mark edge as non-fallthru. */ 1313 crossing_edges[i]->flags &= ~EDGE_FALLTHRU; 1314 } /* end: 'if (!JUMP_P ... ' */ 1315 } /* end: 'if (src && src !=...' */ 1316 } /* end: 'if (dest && dest !=...' */ 1317 } /* end: 'if (crossing_edges[i]...' */ 1318 } /* end for loop */ 1319} 1320 1321/* Find any bb's where the fall-through edge is a crossing edge (note that 1322 these bb's must also contain a conditional jump or end with a call 1323 instruction; we've already dealt with fall-through edges for blocks 1324 that didn't have a conditional jump or didn't end with call instruction 1325 in the call to add_labels_and_missing_jumps). Convert the fall-through 1326 edge to non-crossing edge by inserting a new bb to fall-through into. 1327 The new bb will contain an unconditional jump (crossing edge) to the 1328 original fall through destination. */ 1329 1330static void 1331fix_up_fall_thru_edges (void) 1332{ 1333 basic_block cur_bb; 1334 basic_block new_bb; 1335 edge succ1; 1336 edge succ2; 1337 edge fall_thru; 1338 edge cond_jump = NULL; 1339 edge e; 1340 bool cond_jump_crosses; 1341 int invert_worked; 1342 rtx old_jump; 1343 rtx fall_thru_label; 1344 rtx barrier; 1345 1346 FOR_EACH_BB (cur_bb) 1347 { 1348 fall_thru = NULL; 1349 if (EDGE_COUNT (cur_bb->succs) > 0) 1350 succ1 = EDGE_SUCC (cur_bb, 0); 1351 else 1352 succ1 = NULL; 1353 1354 if (EDGE_COUNT (cur_bb->succs) > 1) 1355 succ2 = EDGE_SUCC (cur_bb, 1); 1356 else 1357 succ2 = NULL; 1358 1359 /* Find the fall-through edge. */ 1360 1361 if (succ1 1362 && (succ1->flags & EDGE_FALLTHRU)) 1363 { 1364 fall_thru = succ1; 1365 cond_jump = succ2; 1366 } 1367 else if (succ2 1368 && (succ2->flags & EDGE_FALLTHRU)) 1369 { 1370 fall_thru = succ2; 1371 cond_jump = succ1; 1372 } 1373 else if (succ1 1374 && (block_ends_with_call_p (cur_bb) 1375 || can_throw_internal (BB_END (cur_bb)))) 1376 { 1377 edge e; 1378 edge_iterator ei; 1379 1380 /* Find EDGE_CAN_FALLTHRU edge. */ 1381 FOR_EACH_EDGE (e, ei, cur_bb->succs) 1382 if (e->flags & EDGE_CAN_FALLTHRU) 1383 { 1384 fall_thru = e; 1385 break; 1386 } 1387 } 1388 1389 if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR)) 1390 { 1391 /* Check to see if the fall-thru edge is a crossing edge. */ 1392 1393 if (fall_thru->flags & EDGE_CROSSING) 1394 { 1395 /* The fall_thru edge crosses; now check the cond jump edge, if 1396 it exists. */ 1397 1398 cond_jump_crosses = true; 1399 invert_worked = 0; 1400 old_jump = BB_END (cur_bb); 1401 1402 /* Find the jump instruction, if there is one. */ 1403 1404 if (cond_jump) 1405 { 1406 if (!(cond_jump->flags & EDGE_CROSSING)) 1407 cond_jump_crosses = false; 1408 1409 /* We know the fall-thru edge crosses; if the cond 1410 jump edge does NOT cross, and its destination is the 1411 next block in the bb order, invert the jump 1412 (i.e. fix it so the fall thru does not cross and 1413 the cond jump does). */ 1414 1415 if (!cond_jump_crosses 1416 && cur_bb->aux == cond_jump->dest) 1417 { 1418 /* Find label in fall_thru block. We've already added 1419 any missing labels, so there must be one. */ 1420 1421 fall_thru_label = block_label (fall_thru->dest); 1422 1423 if (old_jump && JUMP_P (old_jump) && fall_thru_label) 1424 invert_worked = invert_jump (old_jump, 1425 fall_thru_label,0); 1426 if (invert_worked) 1427 { 1428 fall_thru->flags &= ~EDGE_FALLTHRU; 1429 cond_jump->flags |= EDGE_FALLTHRU; 1430 update_br_prob_note (cur_bb); 1431 e = fall_thru; 1432 fall_thru = cond_jump; 1433 cond_jump = e; 1434 cond_jump->flags |= EDGE_CROSSING; 1435 fall_thru->flags &= ~EDGE_CROSSING; 1436 } 1437 } 1438 } 1439 1440 if (cond_jump_crosses || !invert_worked) 1441 { 1442 /* This is the case where both edges out of the basic 1443 block are crossing edges. Here we will fix up the 1444 fall through edge. The jump edge will be taken care 1445 of later. The EDGE_CROSSING flag of fall_thru edge 1446 is unset before the call to force_nonfallthru 1447 function because if a new basic-block is created 1448 this edge remains in the current section boundary 1449 while the edge between new_bb and the fall_thru->dest 1450 becomes EDGE_CROSSING. */ 1451 1452 fall_thru->flags &= ~EDGE_CROSSING; 1453 new_bb = force_nonfallthru (fall_thru); 1454 1455 if (new_bb) 1456 { 1457 new_bb->aux = cur_bb->aux; 1458 cur_bb->aux = new_bb; 1459 1460 /* Make sure new fall-through bb is in same 1461 partition as bb it's falling through from. */ 1462 1463 BB_COPY_PARTITION (new_bb, cur_bb); 1464 single_succ_edge (new_bb)->flags |= EDGE_CROSSING; 1465 } 1466 else 1467 { 1468 /* If a new basic-block was not created; restore 1469 the EDGE_CROSSING flag. */ 1470 fall_thru->flags |= EDGE_CROSSING; 1471 } 1472 1473 /* Add barrier after new jump */ 1474 1475 if (new_bb) 1476 { 1477 barrier = emit_barrier_after (BB_END (new_bb)); 1478 new_bb->il.rtl->footer = unlink_insn_chain (barrier, 1479 barrier); 1480 } 1481 else 1482 { 1483 barrier = emit_barrier_after (BB_END (cur_bb)); 1484 cur_bb->il.rtl->footer = unlink_insn_chain (barrier, 1485 barrier); 1486 } 1487 } 1488 } 1489 } 1490 } 1491} 1492 1493/* This function checks the destination block of a "crossing jump" to 1494 see if it has any crossing predecessors that begin with a code label 1495 and end with an unconditional jump. If so, it returns that predecessor 1496 block. (This is to avoid creating lots of new basic blocks that all 1497 contain unconditional jumps to the same destination). */ 1498 1499static basic_block 1500find_jump_block (basic_block jump_dest) 1501{ 1502 basic_block source_bb = NULL; 1503 edge e; 1504 rtx insn; 1505 edge_iterator ei; 1506 1507 FOR_EACH_EDGE (e, ei, jump_dest->preds) 1508 if (e->flags & EDGE_CROSSING) 1509 { 1510 basic_block src = e->src; 1511 1512 /* Check each predecessor to see if it has a label, and contains 1513 only one executable instruction, which is an unconditional jump. 1514 If so, we can use it. */ 1515 1516 if (LABEL_P (BB_HEAD (src))) 1517 for (insn = BB_HEAD (src); 1518 !INSN_P (insn) && insn != NEXT_INSN (BB_END (src)); 1519 insn = NEXT_INSN (insn)) 1520 { 1521 if (INSN_P (insn) 1522 && insn == BB_END (src) 1523 && JUMP_P (insn) 1524 && !any_condjump_p (insn)) 1525 { 1526 source_bb = src; 1527 break; 1528 } 1529 } 1530 1531 if (source_bb) 1532 break; 1533 } 1534 1535 return source_bb; 1536} 1537 1538/* Find all BB's with conditional jumps that are crossing edges; 1539 insert a new bb and make the conditional jump branch to the new 1540 bb instead (make the new bb same color so conditional branch won't 1541 be a 'crossing' edge). Insert an unconditional jump from the 1542 new bb to the original destination of the conditional jump. */ 1543 1544static void 1545fix_crossing_conditional_branches (void) 1546{ 1547 basic_block cur_bb; 1548 basic_block new_bb; 1549 basic_block last_bb; 1550 basic_block dest; 1551 edge succ1; 1552 edge succ2; 1553 edge crossing_edge; 1554 edge new_edge; 1555 rtx old_jump; 1556 rtx set_src; 1557 rtx old_label = NULL_RTX; 1558 rtx new_label; 1559 rtx new_jump; 1560 rtx barrier; 1561 1562 last_bb = EXIT_BLOCK_PTR->prev_bb; 1563 1564 FOR_EACH_BB (cur_bb) 1565 { 1566 crossing_edge = NULL; 1567 if (EDGE_COUNT (cur_bb->succs) > 0) 1568 succ1 = EDGE_SUCC (cur_bb, 0); 1569 else 1570 succ1 = NULL; 1571 1572 if (EDGE_COUNT (cur_bb->succs) > 1) 1573 succ2 = EDGE_SUCC (cur_bb, 1); 1574 else 1575 succ2 = NULL; 1576 1577 /* We already took care of fall-through edges, so only one successor 1578 can be a crossing edge. */ 1579 1580 if (succ1 && (succ1->flags & EDGE_CROSSING)) 1581 crossing_edge = succ1; 1582 else if (succ2 && (succ2->flags & EDGE_CROSSING)) 1583 crossing_edge = succ2; 1584 1585 if (crossing_edge) 1586 { 1587 old_jump = BB_END (cur_bb); 1588 1589 /* Check to make sure the jump instruction is a 1590 conditional jump. */ 1591 1592 set_src = NULL_RTX; 1593 1594 if (any_condjump_p (old_jump)) 1595 { 1596 if (GET_CODE (PATTERN (old_jump)) == SET) 1597 set_src = SET_SRC (PATTERN (old_jump)); 1598 else if (GET_CODE (PATTERN (old_jump)) == PARALLEL) 1599 { 1600 set_src = XVECEXP (PATTERN (old_jump), 0,0); 1601 if (GET_CODE (set_src) == SET) 1602 set_src = SET_SRC (set_src); 1603 else 1604 set_src = NULL_RTX; 1605 } 1606 } 1607 1608 if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE)) 1609 { 1610 if (GET_CODE (XEXP (set_src, 1)) == PC) 1611 old_label = XEXP (set_src, 2); 1612 else if (GET_CODE (XEXP (set_src, 2)) == PC) 1613 old_label = XEXP (set_src, 1); 1614 1615 /* Check to see if new bb for jumping to that dest has 1616 already been created; if so, use it; if not, create 1617 a new one. */ 1618 1619 new_bb = find_jump_block (crossing_edge->dest); 1620 1621 if (new_bb) 1622 new_label = block_label (new_bb); 1623 else 1624 { 1625 /* Create new basic block to be dest for 1626 conditional jump. */ 1627 1628 new_bb = create_basic_block (NULL, NULL, last_bb); 1629 new_bb->aux = last_bb->aux; 1630 last_bb->aux = new_bb; 1631 last_bb = new_bb; 1632 /* Put appropriate instructions in new bb. */ 1633 1634 new_label = gen_label_rtx (); 1635 emit_label_before (new_label, BB_HEAD (new_bb)); 1636 BB_HEAD (new_bb) = new_label; 1637 1638 if (GET_CODE (old_label) == LABEL_REF) 1639 { 1640 old_label = JUMP_LABEL (old_jump); 1641 new_jump = emit_jump_insn_after (gen_jump 1642 (old_label), 1643 BB_END (new_bb)); 1644 } 1645 else 1646 { 1647 gcc_assert (HAVE_return 1648 && GET_CODE (old_label) == RETURN); 1649 new_jump = emit_jump_insn_after (gen_return (), 1650 BB_END (new_bb)); 1651 } 1652 1653 barrier = emit_barrier_after (new_jump); 1654 JUMP_LABEL (new_jump) = old_label; 1655 new_bb->il.rtl->footer = unlink_insn_chain (barrier, 1656 barrier); 1657 1658 /* Make sure new bb is in same partition as source 1659 of conditional branch. */ 1660 BB_COPY_PARTITION (new_bb, cur_bb); 1661 } 1662 1663 /* Make old jump branch to new bb. */ 1664 1665 redirect_jump (old_jump, new_label, 0); 1666 1667 /* Remove crossing_edge as predecessor of 'dest'. */ 1668 1669 dest = crossing_edge->dest; 1670 1671 redirect_edge_succ (crossing_edge, new_bb); 1672 1673 /* Make a new edge from new_bb to old dest; new edge 1674 will be a successor for new_bb and a predecessor 1675 for 'dest'. */ 1676 1677 if (EDGE_COUNT (new_bb->succs) == 0) 1678 new_edge = make_edge (new_bb, dest, 0); 1679 else 1680 new_edge = EDGE_SUCC (new_bb, 0); 1681 1682 crossing_edge->flags &= ~EDGE_CROSSING; 1683 new_edge->flags |= EDGE_CROSSING; 1684 } 1685 } 1686 } 1687} 1688 1689/* Find any unconditional branches that cross between hot and cold 1690 sections. Convert them into indirect jumps instead. */ 1691 1692static void 1693fix_crossing_unconditional_branches (void) 1694{ 1695 basic_block cur_bb; 1696 rtx last_insn; 1697 rtx label; 1698 rtx label_addr; 1699 rtx indirect_jump_sequence; 1700 rtx jump_insn = NULL_RTX; 1701 rtx new_reg; 1702 rtx cur_insn; 1703 edge succ; 1704 1705 FOR_EACH_BB (cur_bb) 1706 { 1707 last_insn = BB_END (cur_bb); 1708 1709 if (EDGE_COUNT (cur_bb->succs) < 1) 1710 continue; 1711 1712 succ = EDGE_SUCC (cur_bb, 0); 1713 1714 /* Check to see if bb ends in a crossing (unconditional) jump. At 1715 this point, no crossing jumps should be conditional. */ 1716 1717 if (JUMP_P (last_insn) 1718 && (succ->flags & EDGE_CROSSING)) 1719 { 1720 rtx label2, table; 1721 1722 gcc_assert (!any_condjump_p (last_insn)); 1723 1724 /* Make sure the jump is not already an indirect or table jump. */ 1725 1726 if (!computed_jump_p (last_insn) 1727 && !tablejump_p (last_insn, &label2, &table)) 1728 { 1729 /* We have found a "crossing" unconditional branch. Now 1730 we must convert it to an indirect jump. First create 1731 reference of label, as target for jump. */ 1732 1733 label = JUMP_LABEL (last_insn); 1734 label_addr = gen_rtx_LABEL_REF (Pmode, label); 1735 LABEL_NUSES (label) += 1; 1736 1737 /* Get a register to use for the indirect jump. */ 1738 1739 new_reg = gen_reg_rtx (Pmode); 1740 1741 /* Generate indirect the jump sequence. */ 1742 1743 start_sequence (); 1744 emit_move_insn (new_reg, label_addr); 1745 emit_indirect_jump (new_reg); 1746 indirect_jump_sequence = get_insns (); 1747 end_sequence (); 1748 1749 /* Make sure every instruction in the new jump sequence has 1750 its basic block set to be cur_bb. */ 1751 1752 for (cur_insn = indirect_jump_sequence; cur_insn; 1753 cur_insn = NEXT_INSN (cur_insn)) 1754 { 1755 if (!BARRIER_P (cur_insn)) 1756 BLOCK_FOR_INSN (cur_insn) = cur_bb; 1757 if (JUMP_P (cur_insn)) 1758 jump_insn = cur_insn; 1759 } 1760 1761 /* Insert the new (indirect) jump sequence immediately before 1762 the unconditional jump, then delete the unconditional jump. */ 1763 1764 emit_insn_before (indirect_jump_sequence, last_insn); 1765 delete_insn (last_insn); 1766 1767 /* Make BB_END for cur_bb be the jump instruction (NOT the 1768 barrier instruction at the end of the sequence...). */ 1769 1770 BB_END (cur_bb) = jump_insn; 1771 } 1772 } 1773 } 1774} 1775 1776/* Add REG_CROSSING_JUMP note to all crossing jump insns. */ 1777 1778static void 1779add_reg_crossing_jump_notes (void) 1780{ 1781 basic_block bb; 1782 edge e; 1783 edge_iterator ei; 1784 1785 FOR_EACH_BB (bb) 1786 FOR_EACH_EDGE (e, ei, bb->succs) 1787 if ((e->flags & EDGE_CROSSING) 1788 && JUMP_P (BB_END (e->src))) 1789 add_reg_note (BB_END (e->src), REG_CROSSING_JUMP, NULL_RTX); 1790} 1791 1792/* Hot and cold basic blocks are partitioned and put in separate 1793 sections of the .o file, to reduce paging and improve cache 1794 performance (hopefully). This can result in bits of code from the 1795 same function being widely separated in the .o file. However this 1796 is not obvious to the current bb structure. Therefore we must take 1797 care to ensure that: 1). There are no fall_thru edges that cross 1798 between sections; 2). For those architectures which have "short" 1799 conditional branches, all conditional branches that attempt to 1800 cross between sections are converted to unconditional branches; 1801 and, 3). For those architectures which have "short" unconditional 1802 branches, all unconditional branches that attempt to cross between 1803 sections are converted to indirect jumps. 1804 1805 The code for fixing up fall_thru edges that cross between hot and 1806 cold basic blocks does so by creating new basic blocks containing 1807 unconditional branches to the appropriate label in the "other" 1808 section. The new basic block is then put in the same (hot or cold) 1809 section as the original conditional branch, and the fall_thru edge 1810 is modified to fall into the new basic block instead. By adding 1811 this level of indirection we end up with only unconditional branches 1812 crossing between hot and cold sections. 1813 1814 Conditional branches are dealt with by adding a level of indirection. 1815 A new basic block is added in the same (hot/cold) section as the 1816 conditional branch, and the conditional branch is retargeted to the 1817 new basic block. The new basic block contains an unconditional branch 1818 to the original target of the conditional branch (in the other section). 1819 1820 Unconditional branches are dealt with by converting them into 1821 indirect jumps. */ 1822 1823static void 1824fix_edges_for_rarely_executed_code (edge *crossing_edges, 1825 int n_crossing_edges) 1826{ 1827 /* Make sure the source of any crossing edge ends in a jump and the 1828 destination of any crossing edge has a label. */ 1829 1830 add_labels_and_missing_jumps (crossing_edges, n_crossing_edges); 1831 1832 /* Convert all crossing fall_thru edges to non-crossing fall 1833 thrus to unconditional jumps (that jump to the original fall 1834 thru dest). */ 1835 1836 fix_up_fall_thru_edges (); 1837 1838 /* If the architecture does not have conditional branches that can 1839 span all of memory, convert crossing conditional branches into 1840 crossing unconditional branches. */ 1841 1842 if (!HAS_LONG_COND_BRANCH) 1843 fix_crossing_conditional_branches (); 1844 1845 /* If the architecture does not have unconditional branches that 1846 can span all of memory, convert crossing unconditional branches 1847 into indirect jumps. Since adding an indirect jump also adds 1848 a new register usage, update the register usage information as 1849 well. */ 1850 1851 if (!HAS_LONG_UNCOND_BRANCH) 1852 fix_crossing_unconditional_branches (); 1853 1854 add_reg_crossing_jump_notes (); 1855} 1856 1857/* Verify, in the basic block chain, that there is at most one switch 1858 between hot/cold partitions. This is modelled on 1859 rtl_verify_flow_info_1, but it cannot go inside that function 1860 because this condition will not be true until after 1861 reorder_basic_blocks is called. */ 1862 1863static void 1864verify_hot_cold_block_grouping (void) 1865{ 1866 basic_block bb; 1867 int err = 0; 1868 bool switched_sections = false; 1869 int current_partition = 0; 1870 1871 FOR_EACH_BB (bb) 1872 { 1873 if (!current_partition) 1874 current_partition = BB_PARTITION (bb); 1875 if (BB_PARTITION (bb) != current_partition) 1876 { 1877 if (switched_sections) 1878 { 1879 error ("multiple hot/cold transitions found (bb %i)", 1880 bb->index); 1881 err = 1; 1882 } 1883 else 1884 { 1885 switched_sections = true; 1886 current_partition = BB_PARTITION (bb); 1887 } 1888 } 1889 } 1890 1891 gcc_assert(!err); 1892} 1893 1894/* Reorder basic blocks. The main entry point to this file. FLAGS is 1895 the set of flags to pass to cfg_layout_initialize(). */ 1896 1897void 1898reorder_basic_blocks (void) 1899{ 1900 int n_traces; 1901 int i; 1902 struct trace *traces; 1903 1904 gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT); 1905 1906 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1) 1907 return; 1908 1909 set_edge_can_fallthru_flag (); 1910 mark_dfs_back_edges (); 1911 1912 /* We are estimating the length of uncond jump insn only once since the code 1913 for getting the insn length always returns the minimal length now. */ 1914 if (uncond_jump_length == 0) 1915 uncond_jump_length = get_uncond_jump_length (); 1916 1917 /* We need to know some information for each basic block. */ 1918 array_size = GET_ARRAY_SIZE (last_basic_block); 1919 bbd = XNEWVEC (bbro_basic_block_data, array_size); 1920 for (i = 0; i < array_size; i++) 1921 { 1922 bbd[i].start_of_trace = -1; 1923 bbd[i].in_trace = -1; 1924 bbd[i].end_of_trace = -1; 1925 bbd[i].heap = NULL; 1926 bbd[i].node = NULL; 1927 } 1928 1929 traces = XNEWVEC (struct trace, n_basic_blocks); 1930 n_traces = 0; 1931 find_traces (&n_traces, traces); 1932 connect_traces (n_traces, traces); 1933 FREE (traces); 1934 FREE (bbd); 1935 1936 relink_block_chain (/*stay_in_cfglayout_mode=*/true); 1937 1938 if (dump_file) 1939 dump_flow_info (dump_file, dump_flags); 1940 1941 if (flag_reorder_blocks_and_partition) 1942 verify_hot_cold_block_grouping (); 1943} 1944 1945/* Determine which partition the first basic block in the function 1946 belongs to, then find the first basic block in the current function 1947 that belongs to a different section, and insert a 1948 NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the 1949 instruction stream. When writing out the assembly code, 1950 encountering this note will make the compiler switch between the 1951 hot and cold text sections. */ 1952 1953static void 1954insert_section_boundary_note (void) 1955{ 1956 basic_block bb; 1957 rtx new_note; 1958 int first_partition = 0; 1959 1960 if (flag_reorder_blocks_and_partition) 1961 FOR_EACH_BB (bb) 1962 { 1963 if (!first_partition) 1964 first_partition = BB_PARTITION (bb); 1965 if (BB_PARTITION (bb) != first_partition) 1966 { 1967 new_note = emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, 1968 BB_HEAD (bb)); 1969 /* ??? This kind of note always lives between basic blocks, 1970 but add_insn_before will set BLOCK_FOR_INSN anyway. */ 1971 BLOCK_FOR_INSN (new_note) = NULL; 1972 break; 1973 } 1974 } 1975} 1976 1977/* Duplicate the blocks containing computed gotos. This basically unfactors 1978 computed gotos that were factored early on in the compilation process to 1979 speed up edge based data flow. We used to not unfactoring them again, 1980 which can seriously pessimize code with many computed jumps in the source 1981 code, such as interpreters. See e.g. PR15242. */ 1982 1983static bool 1984gate_duplicate_computed_gotos (void) 1985{ 1986 if (targetm.cannot_modify_jumps_p ()) 1987 return false; 1988 return (optimize > 0 1989 && flag_expensive_optimizations 1990 && ! optimize_function_for_size_p (cfun)); 1991} 1992 1993 1994static unsigned int 1995duplicate_computed_gotos (void) 1996{ 1997 basic_block bb, new_bb; 1998 bitmap candidates; 1999 int max_size; 2000 2001 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1) 2002 return 0; 2003 2004 cfg_layout_initialize (0); 2005 2006 /* We are estimating the length of uncond jump insn only once 2007 since the code for getting the insn length always returns 2008 the minimal length now. */ 2009 if (uncond_jump_length == 0) 2010 uncond_jump_length = get_uncond_jump_length (); 2011 2012 max_size = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS); 2013 candidates = BITMAP_ALLOC (NULL); 2014 2015 /* Look for blocks that end in a computed jump, and see if such blocks 2016 are suitable for unfactoring. If a block is a candidate for unfactoring, 2017 mark it in the candidates. */ 2018 FOR_EACH_BB (bb) 2019 { 2020 rtx insn; 2021 edge e; 2022 edge_iterator ei; 2023 int size, all_flags; 2024 2025 /* Build the reorder chain for the original order of blocks. */ 2026 if (bb->next_bb != EXIT_BLOCK_PTR) 2027 bb->aux = bb->next_bb; 2028 2029 /* Obviously the block has to end in a computed jump. */ 2030 if (!computed_jump_p (BB_END (bb))) 2031 continue; 2032 2033 /* Only consider blocks that can be duplicated. */ 2034 if (find_reg_note (BB_END (bb), REG_CROSSING_JUMP, NULL_RTX) 2035 || !can_duplicate_block_p (bb)) 2036 continue; 2037 2038 /* Make sure that the block is small enough. */ 2039 size = 0; 2040 FOR_BB_INSNS (bb, insn) 2041 if (INSN_P (insn)) 2042 { 2043 size += get_attr_min_length (insn); 2044 if (size > max_size) 2045 break; 2046 } 2047 if (size > max_size) 2048 continue; 2049 2050 /* Final check: there must not be any incoming abnormal edges. */ 2051 all_flags = 0; 2052 FOR_EACH_EDGE (e, ei, bb->preds) 2053 all_flags |= e->flags; 2054 if (all_flags & EDGE_COMPLEX) 2055 continue; 2056 2057 bitmap_set_bit (candidates, bb->index); 2058 } 2059 2060 /* Nothing to do if there is no computed jump here. */ 2061 if (bitmap_empty_p (candidates)) 2062 goto done; 2063 2064 /* Duplicate computed gotos. */ 2065 FOR_EACH_BB (bb) 2066 { 2067 if (bb->il.rtl->visited) 2068 continue; 2069 2070 bb->il.rtl->visited = 1; 2071 2072 /* BB must have one outgoing edge. That edge must not lead to 2073 the exit block or the next block. 2074 The destination must have more than one predecessor. */ 2075 if (!single_succ_p (bb) 2076 || single_succ (bb) == EXIT_BLOCK_PTR 2077 || single_succ (bb) == bb->next_bb 2078 || single_pred_p (single_succ (bb))) 2079 continue; 2080 2081 /* The successor block has to be a duplication candidate. */ 2082 if (!bitmap_bit_p (candidates, single_succ (bb)->index)) 2083 continue; 2084 2085 new_bb = duplicate_block (single_succ (bb), single_succ_edge (bb), bb); 2086 new_bb->aux = bb->aux; 2087 bb->aux = new_bb; 2088 new_bb->il.rtl->visited = 1; 2089 } 2090 2091done: 2092 cfg_layout_finalize (); 2093 2094 BITMAP_FREE (candidates); 2095 return 0; 2096} 2097 2098struct rtl_opt_pass pass_duplicate_computed_gotos = 2099{ 2100 { 2101 RTL_PASS, 2102 "compgotos", /* name */ 2103 gate_duplicate_computed_gotos, /* gate */ 2104 duplicate_computed_gotos, /* execute */ 2105 NULL, /* sub */ 2106 NULL, /* next */ 2107 0, /* static_pass_number */ 2108 TV_REORDER_BLOCKS, /* tv_id */ 2109 0, /* properties_required */ 2110 0, /* properties_provided */ 2111 0, /* properties_destroyed */ 2112 0, /* todo_flags_start */ 2113 TODO_dump_func | TODO_verify_rtl_sharing,/* todo_flags_finish */ 2114 } 2115}; 2116 2117 2118/* This function is the main 'entrance' for the optimization that 2119 partitions hot and cold basic blocks into separate sections of the 2120 .o file (to improve performance and cache locality). Ideally it 2121 would be called after all optimizations that rearrange the CFG have 2122 been called. However part of this optimization may introduce new 2123 register usage, so it must be called before register allocation has 2124 occurred. This means that this optimization is actually called 2125 well before the optimization that reorders basic blocks (see 2126 function above). 2127 2128 This optimization checks the feedback information to determine 2129 which basic blocks are hot/cold, updates flags on the basic blocks 2130 to indicate which section they belong in. This information is 2131 later used for writing out sections in the .o file. Because hot 2132 and cold sections can be arbitrarily large (within the bounds of 2133 memory), far beyond the size of a single function, it is necessary 2134 to fix up all edges that cross section boundaries, to make sure the 2135 instructions used can actually span the required distance. The 2136 fixes are described below. 2137 2138 Fall-through edges must be changed into jumps; it is not safe or 2139 legal to fall through across a section boundary. Whenever a 2140 fall-through edge crossing a section boundary is encountered, a new 2141 basic block is inserted (in the same section as the fall-through 2142 source), and the fall through edge is redirected to the new basic 2143 block. The new basic block contains an unconditional jump to the 2144 original fall-through target. (If the unconditional jump is 2145 insufficient to cross section boundaries, that is dealt with a 2146 little later, see below). 2147 2148 In order to deal with architectures that have short conditional 2149 branches (which cannot span all of memory) we take any conditional 2150 jump that attempts to cross a section boundary and add a level of 2151 indirection: it becomes a conditional jump to a new basic block, in 2152 the same section. The new basic block contains an unconditional 2153 jump to the original target, in the other section. 2154 2155 For those architectures whose unconditional branch is also 2156 incapable of reaching all of memory, those unconditional jumps are 2157 converted into indirect jumps, through a register. 2158 2159 IMPORTANT NOTE: This optimization causes some messy interactions 2160 with the cfg cleanup optimizations; those optimizations want to 2161 merge blocks wherever possible, and to collapse indirect jump 2162 sequences (change "A jumps to B jumps to C" directly into "A jumps 2163 to C"). Those optimizations can undo the jump fixes that 2164 partitioning is required to make (see above), in order to ensure 2165 that jumps attempting to cross section boundaries are really able 2166 to cover whatever distance the jump requires (on many architectures 2167 conditional or unconditional jumps are not able to reach all of 2168 memory). Therefore tests have to be inserted into each such 2169 optimization to make sure that it does not undo stuff necessary to 2170 cross partition boundaries. This would be much less of a problem 2171 if we could perform this optimization later in the compilation, but 2172 unfortunately the fact that we may need to create indirect jumps 2173 (through registers) requires that this optimization be performed 2174 before register allocation. */ 2175 2176static void 2177partition_hot_cold_basic_blocks (void) 2178{ 2179 edge *crossing_edges; 2180 int n_crossing_edges; 2181 int max_edges = 2 * last_basic_block; 2182 2183 if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1) 2184 return; 2185 2186 crossing_edges = XCNEWVEC (edge, max_edges); 2187 2188 find_rarely_executed_basic_blocks_and_crossing_edges (&crossing_edges, 2189 &n_crossing_edges, 2190 &max_edges); 2191 2192 if (n_crossing_edges > 0) 2193 fix_edges_for_rarely_executed_code (crossing_edges, n_crossing_edges); 2194 2195 free (crossing_edges); 2196} 2197 2198static bool 2199gate_handle_reorder_blocks (void) 2200{ 2201 if (targetm.cannot_modify_jumps_p ()) 2202 return false; 2203 return (optimize > 0); 2204} 2205 2206 2207/* Reorder basic blocks. */ 2208static unsigned int 2209rest_of_handle_reorder_blocks (void) 2210{ 2211 basic_block bb; 2212 2213 /* Last attempt to optimize CFG, as scheduling, peepholing and insn 2214 splitting possibly introduced more crossjumping opportunities. */ 2215 cfg_layout_initialize (CLEANUP_EXPENSIVE); 2216 2217 if ((flag_reorder_blocks || flag_reorder_blocks_and_partition) 2218 /* Don't reorder blocks when optimizing for size because extra jump insns may 2219 be created; also barrier may create extra padding. 2220 2221 More correctly we should have a block reordering mode that tried to 2222 minimize the combined size of all the jumps. This would more or less 2223 automatically remove extra jumps, but would also try to use more short 2224 jumps instead of long jumps. */ 2225 && optimize_function_for_speed_p (cfun)) 2226 { 2227 reorder_basic_blocks (); 2228 cleanup_cfg (CLEANUP_EXPENSIVE); 2229 } 2230 2231 FOR_EACH_BB (bb) 2232 if (bb->next_bb != EXIT_BLOCK_PTR) 2233 bb->aux = bb->next_bb; 2234 cfg_layout_finalize (); 2235 2236 /* Add NOTE_INSN_SWITCH_TEXT_SECTIONS notes. */ 2237 insert_section_boundary_note (); 2238 return 0; 2239} 2240 2241struct rtl_opt_pass pass_reorder_blocks = 2242{ 2243 { 2244 RTL_PASS, 2245 "bbro", /* name */ 2246 gate_handle_reorder_blocks, /* gate */ 2247 rest_of_handle_reorder_blocks, /* execute */ 2248 NULL, /* sub */ 2249 NULL, /* next */ 2250 0, /* static_pass_number */ 2251 TV_REORDER_BLOCKS, /* tv_id */ 2252 0, /* properties_required */ 2253 0, /* properties_provided */ 2254 0, /* properties_destroyed */ 2255 0, /* todo_flags_start */ 2256 TODO_dump_func | TODO_verify_rtl_sharing,/* todo_flags_finish */ 2257 } 2258}; 2259 2260static bool 2261gate_handle_partition_blocks (void) 2262{ 2263 /* The optimization to partition hot/cold basic blocks into separate 2264 sections of the .o file does not work well with linkonce or with 2265 user defined section attributes. Don't call it if either case 2266 arises. */ 2267 2268 return (flag_reorder_blocks_and_partition 2269 && !DECL_ONE_ONLY (current_function_decl) 2270 && !user_defined_section_attribute); 2271} 2272 2273/* Partition hot and cold basic blocks. */ 2274static unsigned int 2275rest_of_handle_partition_blocks (void) 2276{ 2277 partition_hot_cold_basic_blocks (); 2278 return 0; 2279} 2280 2281struct rtl_opt_pass pass_partition_blocks = 2282{ 2283 { 2284 RTL_PASS, 2285 "bbpart", /* name */ 2286 gate_handle_partition_blocks, /* gate */ 2287 rest_of_handle_partition_blocks, /* execute */ 2288 NULL, /* sub */ 2289 NULL, /* next */ 2290 0, /* static_pass_number */ 2291 TV_REORDER_BLOCKS, /* tv_id */ 2292 PROP_cfglayout, /* properties_required */ 2293 0, /* properties_provided */ 2294 0, /* properties_destroyed */ 2295 0, /* todo_flags_start */ 2296 TODO_dump_func | TODO_verify_rtl_sharing/* todo_flags_finish */ 2297 } 2298}; 2299