1/* Read and annotate call graph profile from the auto profile data file. 2 Copyright (C) 2014-2015 Free Software Foundation, Inc. 3 Contributed by Dehao Chen (dehao@google.com) 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING3. If not see 19<http://www.gnu.org/licenses/>. */ 20 21#include "config.h" 22#include "system.h" 23 24#include <string.h> 25#include <map> 26#include <set> 27 28#include "coretypes.h" 29#include "hash-set.h" 30#include "machmode.h" 31#include "vec.h" 32#include "double-int.h" 33#include "input.h" 34#include "alias.h" 35#include "symtab.h" 36#include "options.h" 37#include "wide-int.h" 38#include "inchash.h" 39#include "tree.h" 40#include "fold-const.h" 41#include "tree-pass.h" 42#include "flags.h" 43#include "predict.h" 44#include "vec.h" 45#include "hashtab.h" 46#include "hash-set.h" 47#include "machmode.h" 48#include "tm.h" 49#include "hard-reg-set.h" 50#include "input.h" 51#include "function.h" 52#include "dominance.h" 53#include "cfg.h" 54#include "basic-block.h" 55#include "diagnostic-core.h" 56#include "gcov-io.h" 57#include "profile.h" 58#include "langhooks.h" 59#include "opts.h" 60#include "tree-pass.h" 61#include "cfgloop.h" 62#include "tree-ssa-alias.h" 63#include "tree-cfg.h" 64#include "tree-cfgcleanup.h" 65#include "tree-ssa-operands.h" 66#include "tree-into-ssa.h" 67#include "internal-fn.h" 68#include "is-a.h" 69#include "gimple-expr.h" 70#include "gimple.h" 71#include "gimple-iterator.h" 72#include "gimple-ssa.h" 73#include "hash-map.h" 74#include "plugin-api.h" 75#include "ipa-ref.h" 76#include "cgraph.h" 77#include "value-prof.h" 78#include "coverage.h" 79#include "params.h" 80#include "alloc-pool.h" 81#include "symbol-summary.h" 82#include "ipa-prop.h" 83#include "ipa-inline.h" 84#include "tree-inline.h" 85#include "stringpool.h" 86#include "auto-profile.h" 87 88/* The following routines implements AutoFDO optimization. 89 90 This optimization uses sampling profiles to annotate basic block counts 91 and uses heuristics to estimate branch probabilities. 92 93 There are three phases in AutoFDO: 94 95 Phase 1: Read profile from the profile data file. 96 The following info is read from the profile datafile: 97 * string_table: a map between function name and its index. 98 * autofdo_source_profile: a map from function_instance name to 99 function_instance. This is represented as a forest of 100 function_instances. 101 * WorkingSet: a histogram of how many instructions are covered for a 102 given percentage of total cycles. This is describing the binary 103 level information (not source level). This info is used to help 104 decide if we want aggressive optimizations that could increase 105 code footprint (e.g. loop unroll etc.) 106 A function instance is an instance of function that could either be a 107 standalone symbol, or a clone of a function that is inlined into another 108 function. 109 110 Phase 2: Early inline + value profile transformation. 111 Early inline uses autofdo_source_profile to find if a callsite is: 112 * inlined in the profiled binary. 113 * callee body is hot in the profiling run. 114 If both condition satisfies, early inline will inline the callsite 115 regardless of the code growth. 116 Phase 2 is an iterative process. During each iteration, we also check 117 if an indirect callsite is promoted and inlined in the profiling run. 118 If yes, vpt will happen to force promote it and in the next iteration, 119 einline will inline the promoted callsite in the next iteration. 120 121 Phase 3: Annotate control flow graph. 122 AutoFDO uses a separate pass to: 123 * Annotate basic block count 124 * Estimate branch probability 125 126 After the above 3 phases, all profile is readily annotated on the GCC IR. 127 AutoFDO tries to reuse all FDO infrastructure as much as possible to make 128 use of the profile. E.g. it uses existing mechanism to calculate the basic 129 block/edge frequency, as well as the cgraph node/edge count. 130*/ 131 132#define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo" 133#define AUTO_PROFILE_VERSION 1 134 135namespace autofdo 136{ 137 138/* Represent a source location: (function_decl, lineno). */ 139typedef std::pair<tree, unsigned> decl_lineno; 140 141/* Represent an inline stack. vector[0] is the leaf node. */ 142typedef auto_vec<decl_lineno> inline_stack; 143 144/* String array that stores function names. */ 145typedef auto_vec<char *> string_vector; 146 147/* Map from function name's index in string_table to target's 148 execution count. */ 149typedef std::map<unsigned, gcov_type> icall_target_map; 150 151/* Set of gimple stmts. Used to track if the stmt has already been promoted 152 to direct call. */ 153typedef std::set<gimple> stmt_set; 154 155/* Represent count info of an inline stack. */ 156struct count_info 157{ 158 /* Sampled count of the inline stack. */ 159 gcov_type count; 160 161 /* Map from indirect call target to its sample count. */ 162 icall_target_map targets; 163 164 /* Whether this inline stack is already used in annotation. 165 166 Each inline stack should only be used to annotate IR once. 167 This will be enforced when instruction-level discriminator 168 is supported. */ 169 bool annotated; 170}; 171 172/* operator< for "const char *". */ 173struct string_compare 174{ 175 bool operator()(const char *a, const char *b) const 176 { 177 return strcmp (a, b) < 0; 178 } 179}; 180 181/* Store a string array, indexed by string position in the array. */ 182class string_table 183{ 184public: 185 string_table () 186 {} 187 188 ~string_table (); 189 190 /* For a given string, returns its index. */ 191 int get_index (const char *name) const; 192 193 /* For a given decl, returns the index of the decl name. */ 194 int get_index_by_decl (tree decl) const; 195 196 /* For a given index, returns the string. */ 197 const char *get_name (int index) const; 198 199 /* Read profile, return TRUE on success. */ 200 bool read (); 201 202private: 203 typedef std::map<const char *, unsigned, string_compare> string_index_map; 204 string_vector vector_; 205 string_index_map map_; 206}; 207 208/* Profile of a function instance: 209 1. total_count of the function. 210 2. head_count (entry basic block count) of the function (only valid when 211 function is a top-level function_instance, i.e. it is the original copy 212 instead of the inlined copy). 213 3. map from source location (decl_lineno) to profile (count_info). 214 4. map from callsite to callee function_instance. */ 215class function_instance 216{ 217public: 218 typedef auto_vec<function_instance *> function_instance_stack; 219 220 /* Read the profile and return a function_instance with head count as 221 HEAD_COUNT. Recursively read callsites to create nested function_instances 222 too. STACK is used to track the recursive creation process. */ 223 static function_instance * 224 read_function_instance (function_instance_stack *stack, 225 gcov_type head_count); 226 227 /* Recursively deallocate all callsites (nested function_instances). */ 228 ~function_instance (); 229 230 /* Accessors. */ 231 int 232 name () const 233 { 234 return name_; 235 } 236 gcov_type 237 total_count () const 238 { 239 return total_count_; 240 } 241 gcov_type 242 head_count () const 243 { 244 return head_count_; 245 } 246 247 /* Traverse callsites of the current function_instance to find one at the 248 location of LINENO and callee name represented in DECL. */ 249 function_instance *get_function_instance_by_decl (unsigned lineno, 250 tree decl) const; 251 252 /* Store the profile info for LOC in INFO. Return TRUE if profile info 253 is found. */ 254 bool get_count_info (location_t loc, count_info *info) const; 255 256 /* Read the inlined indirect call target profile for STMT and store it in 257 MAP, return the total count for all inlined indirect calls. */ 258 gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const; 259 260 /* Sum of counts that is used during annotation. */ 261 gcov_type total_annotated_count () const; 262 263 /* Mark LOC as annotated. */ 264 void mark_annotated (location_t loc); 265 266private: 267 /* Callsite, represented as (decl_lineno, callee_function_name_index). */ 268 typedef std::pair<unsigned, unsigned> callsite; 269 270 /* Map from callsite to callee function_instance. */ 271 typedef std::map<callsite, function_instance *> callsite_map; 272 273 function_instance (unsigned name, gcov_type head_count) 274 : name_ (name), total_count_ (0), head_count_ (head_count) 275 { 276 } 277 278 /* Map from source location (decl_lineno) to profile (count_info). */ 279 typedef std::map<unsigned, count_info> position_count_map; 280 281 /* function_instance name index in the string_table. */ 282 unsigned name_; 283 284 /* Total sample count. */ 285 gcov_type total_count_; 286 287 /* Entry BB's sample count. */ 288 gcov_type head_count_; 289 290 /* Map from callsite location to callee function_instance. */ 291 callsite_map callsites; 292 293 /* Map from source location to count_info. */ 294 position_count_map pos_counts; 295}; 296 297/* Profile for all functions. */ 298class autofdo_source_profile 299{ 300public: 301 static autofdo_source_profile * 302 create () 303 { 304 autofdo_source_profile *map = new autofdo_source_profile (); 305 306 if (map->read ()) 307 return map; 308 delete map; 309 return NULL; 310 } 311 312 ~autofdo_source_profile (); 313 314 /* For a given DECL, returns the top-level function_instance. */ 315 function_instance *get_function_instance_by_decl (tree decl) const; 316 317 /* Find count_info for a given gimple STMT. If found, store the count_info 318 in INFO and return true; otherwise return false. */ 319 bool get_count_info (gimple stmt, count_info *info) const; 320 321 /* Find total count of the callee of EDGE. */ 322 gcov_type get_callsite_total_count (struct cgraph_edge *edge) const; 323 324 /* Update value profile INFO for STMT from the inlined indirect callsite. 325 Return true if INFO is updated. */ 326 bool update_inlined_ind_target (gcall *stmt, count_info *info); 327 328 /* Mark LOC as annotated. */ 329 void mark_annotated (location_t loc); 330 331private: 332 /* Map from function_instance name index (in string_table) to 333 function_instance. */ 334 typedef std::map<unsigned, function_instance *> name_function_instance_map; 335 336 autofdo_source_profile () {} 337 338 /* Read AutoFDO profile and returns TRUE on success. */ 339 bool read (); 340 341 /* Return the function_instance in the profile that correspond to the 342 inline STACK. */ 343 function_instance * 344 get_function_instance_by_inline_stack (const inline_stack &stack) const; 345 346 name_function_instance_map map_; 347}; 348 349/* Store the strings read from the profile data file. */ 350static string_table *afdo_string_table; 351 352/* Store the AutoFDO source profile. */ 353static autofdo_source_profile *afdo_source_profile; 354 355/* gcov_ctr_summary structure to store the profile_info. */ 356static struct gcov_ctr_summary *afdo_profile_info; 357 358/* Helper functions. */ 359 360/* Return the original name of NAME: strip the suffix that starts 361 with '.' Caller is responsible for freeing RET. */ 362 363static char * 364get_original_name (const char *name) 365{ 366 char *ret = xstrdup (name); 367 char *find = strchr (ret, '.'); 368 if (find != NULL) 369 *find = 0; 370 return ret; 371} 372 373/* Return the combined location, which is a 32bit integer in which 374 higher 16 bits stores the line offset of LOC to the start lineno 375 of DECL, The lower 16 bits stores the discriminator. */ 376 377static unsigned 378get_combined_location (location_t loc, tree decl) 379{ 380 /* TODO: allow more bits for line and less bits for discriminator. */ 381 if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16)) 382 warning_at (loc, OPT_Woverflow, "Offset exceeds 16 bytes."); 383 return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16); 384} 385 386/* Return the function decl of a given lexical BLOCK. */ 387 388static tree 389get_function_decl_from_block (tree block) 390{ 391 tree decl; 392 393 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block) == UNKNOWN_LOCATION)) 394 return NULL_TREE; 395 396 for (decl = BLOCK_ABSTRACT_ORIGIN (block); 397 decl && (TREE_CODE (decl) == BLOCK); 398 decl = BLOCK_ABSTRACT_ORIGIN (decl)) 399 if (TREE_CODE (decl) == FUNCTION_DECL) 400 break; 401 return decl; 402} 403 404/* Store inline stack for STMT in STACK. */ 405 406static void 407get_inline_stack (location_t locus, inline_stack *stack) 408{ 409 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) 410 return; 411 412 tree block = LOCATION_BLOCK (locus); 413 if (block && TREE_CODE (block) == BLOCK) 414 { 415 int level = 0; 416 for (block = BLOCK_SUPERCONTEXT (block); 417 block && (TREE_CODE (block) == BLOCK); 418 block = BLOCK_SUPERCONTEXT (block)) 419 { 420 location_t tmp_locus = BLOCK_SOURCE_LOCATION (block); 421 if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION) 422 continue; 423 424 tree decl = get_function_decl_from_block (block); 425 stack->safe_push ( 426 std::make_pair (decl, get_combined_location (locus, decl))); 427 locus = tmp_locus; 428 level++; 429 } 430 } 431 stack->safe_push ( 432 std::make_pair (current_function_decl, 433 get_combined_location (locus, current_function_decl))); 434} 435 436/* Return STMT's combined location, which is a 32bit integer in which 437 higher 16 bits stores the line offset of LOC to the start lineno 438 of DECL, The lower 16 bits stores the discriminator. */ 439 440static unsigned 441get_relative_location_for_stmt (gimple stmt) 442{ 443 location_t locus = gimple_location (stmt); 444 if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION) 445 return UNKNOWN_LOCATION; 446 447 for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK); 448 block = BLOCK_SUPERCONTEXT (block)) 449 if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION) 450 return get_combined_location (locus, 451 get_function_decl_from_block (block)); 452 return get_combined_location (locus, current_function_decl); 453} 454 455/* Return true if BB contains indirect call. */ 456 457static bool 458has_indirect_call (basic_block bb) 459{ 460 gimple_stmt_iterator gsi; 461 462 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 463 { 464 gimple stmt = gsi_stmt (gsi); 465 if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt) 466 && (gimple_call_fn (stmt) == NULL 467 || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL)) 468 return true; 469 } 470 return false; 471} 472 473/* Member functions for string_table. */ 474 475/* Deconstructor. */ 476 477string_table::~string_table () 478{ 479 for (unsigned i = 0; i < vector_.length (); i++) 480 free (vector_[i]); 481} 482 483 484/* Return the index of a given function NAME. Return -1 if NAME is not 485 found in string table. */ 486 487int 488string_table::get_index (const char *name) const 489{ 490 if (name == NULL) 491 return -1; 492 string_index_map::const_iterator iter = map_.find (name); 493 if (iter == map_.end ()) 494 return -1; 495 496 return iter->second; 497} 498 499/* Return the index of a given function DECL. Return -1 if DECL is not 500 found in string table. */ 501 502int 503string_table::get_index_by_decl (tree decl) const 504{ 505 char *name 506 = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); 507 int ret = get_index (name); 508 free (name); 509 if (ret != -1) 510 return ret; 511 ret = get_index (lang_hooks.dwarf_name (decl, 0)); 512 if (ret != -1) 513 return ret; 514 if (DECL_ABSTRACT_ORIGIN (decl)) 515 return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl)); 516 517 return -1; 518} 519 520/* Return the function name of a given INDEX. */ 521 522const char * 523string_table::get_name (int index) const 524{ 525 gcc_assert (index > 0 && index < (int)vector_.length ()); 526 return vector_[index]; 527} 528 529/* Read the string table. Return TRUE if reading is successful. */ 530 531bool 532string_table::read () 533{ 534 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES) 535 return false; 536 /* Skip the length of the section. */ 537 gcov_read_unsigned (); 538 /* Read in the file name table. */ 539 unsigned string_num = gcov_read_unsigned (); 540 for (unsigned i = 0; i < string_num; i++) 541 { 542 vector_.safe_push (get_original_name (gcov_read_string ())); 543 map_[vector_.last ()] = i; 544 } 545 return true; 546} 547 548/* Member functions for function_instance. */ 549 550function_instance::~function_instance () 551{ 552 for (callsite_map::iterator iter = callsites.begin (); 553 iter != callsites.end (); ++iter) 554 delete iter->second; 555} 556 557/* Traverse callsites of the current function_instance to find one at the 558 location of LINENO and callee name represented in DECL. */ 559 560function_instance * 561function_instance::get_function_instance_by_decl (unsigned lineno, 562 tree decl) const 563{ 564 int func_name_idx = afdo_string_table->get_index_by_decl (decl); 565 if (func_name_idx != -1) 566 { 567 callsite_map::const_iterator ret 568 = callsites.find (std::make_pair (lineno, func_name_idx)); 569 if (ret != callsites.end ()) 570 return ret->second; 571 } 572 func_name_idx 573 = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0)); 574 if (func_name_idx != -1) 575 { 576 callsite_map::const_iterator ret 577 = callsites.find (std::make_pair (lineno, func_name_idx)); 578 if (ret != callsites.end ()) 579 return ret->second; 580 } 581 if (DECL_ABSTRACT_ORIGIN (decl)) 582 return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl)); 583 584 return NULL; 585} 586 587/* Store the profile info for LOC in INFO. Return TRUE if profile info 588 is found. */ 589 590bool 591function_instance::get_count_info (location_t loc, count_info *info) const 592{ 593 position_count_map::const_iterator iter = pos_counts.find (loc); 594 if (iter == pos_counts.end ()) 595 return false; 596 *info = iter->second; 597 return true; 598} 599 600/* Mark LOC as annotated. */ 601 602void 603function_instance::mark_annotated (location_t loc) 604{ 605 position_count_map::iterator iter = pos_counts.find (loc); 606 if (iter == pos_counts.end ()) 607 return; 608 iter->second.annotated = true; 609} 610 611/* Read the inlined indirect call target profile for STMT and store it in 612 MAP, return the total count for all inlined indirect calls. */ 613 614gcov_type 615function_instance::find_icall_target_map (gcall *stmt, 616 icall_target_map *map) const 617{ 618 gcov_type ret = 0; 619 unsigned stmt_offset = get_relative_location_for_stmt (stmt); 620 621 for (callsite_map::const_iterator iter = callsites.begin (); 622 iter != callsites.end (); ++iter) 623 { 624 unsigned callee = iter->second->name (); 625 /* Check if callsite location match the stmt. */ 626 if (iter->first.first != stmt_offset) 627 continue; 628 struct cgraph_node *node = cgraph_node::get_for_asmname ( 629 get_identifier (afdo_string_table->get_name (callee))); 630 if (node == NULL) 631 continue; 632 if (!check_ic_target (stmt, node)) 633 continue; 634 (*map)[callee] = iter->second->total_count (); 635 ret += iter->second->total_count (); 636 } 637 return ret; 638} 639 640/* Read the profile and create a function_instance with head count as 641 HEAD_COUNT. Recursively read callsites to create nested function_instances 642 too. STACK is used to track the recursive creation process. */ 643 644/* function instance profile format: 645 646 ENTRY_COUNT: 8 bytes 647 NAME_INDEX: 4 bytes 648 NUM_POS_COUNTS: 4 bytes 649 NUM_CALLSITES: 4 byte 650 POS_COUNT_1: 651 POS_1_OFFSET: 4 bytes 652 NUM_TARGETS: 4 bytes 653 COUNT: 8 bytes 654 TARGET_1: 655 VALUE_PROFILE_TYPE: 4 bytes 656 TARGET_IDX: 8 bytes 657 COUNT: 8 bytes 658 TARGET_2 659 ... 660 TARGET_n 661 POS_COUNT_2 662 ... 663 POS_COUNT_N 664 CALLSITE_1: 665 CALLSITE_1_OFFSET: 4 bytes 666 FUNCTION_INSTANCE_PROFILE (nested) 667 CALLSITE_2 668 ... 669 CALLSITE_n. */ 670 671function_instance * 672function_instance::read_function_instance (function_instance_stack *stack, 673 gcov_type head_count) 674{ 675 unsigned name = gcov_read_unsigned (); 676 unsigned num_pos_counts = gcov_read_unsigned (); 677 unsigned num_callsites = gcov_read_unsigned (); 678 function_instance *s = new function_instance (name, head_count); 679 stack->safe_push (s); 680 681 for (unsigned i = 0; i < num_pos_counts; i++) 682 { 683 unsigned offset = gcov_read_unsigned () & 0xffff0000; 684 unsigned num_targets = gcov_read_unsigned (); 685 gcov_type count = gcov_read_counter (); 686 s->pos_counts[offset].count = count; 687 for (unsigned j = 0; j < stack->length (); j++) 688 (*stack)[j]->total_count_ += count; 689 for (unsigned j = 0; j < num_targets; j++) 690 { 691 /* Only indirect call target histogram is supported now. */ 692 gcov_read_unsigned (); 693 gcov_type target_idx = gcov_read_counter (); 694 s->pos_counts[offset].targets[target_idx] = gcov_read_counter (); 695 } 696 } 697 for (unsigned i = 0; i < num_callsites; i++) 698 { 699 unsigned offset = gcov_read_unsigned (); 700 function_instance *callee_function_instance 701 = read_function_instance (stack, 0); 702 s->callsites[std::make_pair (offset, callee_function_instance->name ())] 703 = callee_function_instance; 704 } 705 stack->pop (); 706 return s; 707} 708 709/* Sum of counts that is used during annotation. */ 710 711gcov_type 712function_instance::total_annotated_count () const 713{ 714 gcov_type ret = 0; 715 for (callsite_map::const_iterator iter = callsites.begin (); 716 iter != callsites.end (); ++iter) 717 ret += iter->second->total_annotated_count (); 718 for (position_count_map::const_iterator iter = pos_counts.begin (); 719 iter != pos_counts.end (); ++iter) 720 if (iter->second.annotated) 721 ret += iter->second.count; 722 return ret; 723} 724 725/* Member functions for autofdo_source_profile. */ 726 727autofdo_source_profile::~autofdo_source_profile () 728{ 729 for (name_function_instance_map::const_iterator iter = map_.begin (); 730 iter != map_.end (); ++iter) 731 delete iter->second; 732} 733 734/* For a given DECL, returns the top-level function_instance. */ 735 736function_instance * 737autofdo_source_profile::get_function_instance_by_decl (tree decl) const 738{ 739 int index = afdo_string_table->get_index_by_decl (decl); 740 if (index == -1) 741 return NULL; 742 name_function_instance_map::const_iterator ret = map_.find (index); 743 return ret == map_.end () ? NULL : ret->second; 744} 745 746/* Find count_info for a given gimple STMT. If found, store the count_info 747 in INFO and return true; otherwise return false. */ 748 749bool 750autofdo_source_profile::get_count_info (gimple stmt, count_info *info) const 751{ 752 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) 753 return false; 754 755 inline_stack stack; 756 get_inline_stack (gimple_location (stmt), &stack); 757 if (stack.length () == 0) 758 return false; 759 function_instance *s = get_function_instance_by_inline_stack (stack); 760 if (s == NULL) 761 return false; 762 return s->get_count_info (stack[0].second, info); 763} 764 765/* Mark LOC as annotated. */ 766 767void 768autofdo_source_profile::mark_annotated (location_t loc) 769{ 770 inline_stack stack; 771 get_inline_stack (loc, &stack); 772 if (stack.length () == 0) 773 return; 774 function_instance *s = get_function_instance_by_inline_stack (stack); 775 if (s == NULL) 776 return; 777 s->mark_annotated (stack[0].second); 778} 779 780/* Update value profile INFO for STMT from the inlined indirect callsite. 781 Return true if INFO is updated. */ 782 783bool 784autofdo_source_profile::update_inlined_ind_target (gcall *stmt, 785 count_info *info) 786{ 787 if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus) 788 return false; 789 790 count_info old_info; 791 get_count_info (stmt, &old_info); 792 gcov_type total = 0; 793 for (icall_target_map::const_iterator iter = old_info.targets.begin (); 794 iter != old_info.targets.end (); ++iter) 795 total += iter->second; 796 797 /* Program behavior changed, original promoted (and inlined) target is not 798 hot any more. Will avoid promote the original target. 799 800 To check if original promoted target is still hot, we check the total 801 count of the unpromoted targets (stored in old_info). If it is no less 802 than half of the callsite count (stored in INFO), the original promoted 803 target is considered not hot any more. */ 804 if (total >= info->count / 2) 805 return false; 806 807 inline_stack stack; 808 get_inline_stack (gimple_location (stmt), &stack); 809 if (stack.length () == 0) 810 return false; 811 function_instance *s = get_function_instance_by_inline_stack (stack); 812 if (s == NULL) 813 return false; 814 icall_target_map map; 815 if (s->find_icall_target_map (stmt, &map) == 0) 816 return false; 817 for (icall_target_map::const_iterator iter = map.begin (); 818 iter != map.end (); ++iter) 819 info->targets[iter->first] = iter->second; 820 return true; 821} 822 823/* Find total count of the callee of EDGE. */ 824 825gcov_type 826autofdo_source_profile::get_callsite_total_count ( 827 struct cgraph_edge *edge) const 828{ 829 inline_stack stack; 830 stack.safe_push (std::make_pair (edge->callee->decl, 0)); 831 get_inline_stack (gimple_location (edge->call_stmt), &stack); 832 833 function_instance *s = get_function_instance_by_inline_stack (stack); 834 if (s == NULL 835 || afdo_string_table->get_index (IDENTIFIER_POINTER ( 836 DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ()) 837 return 0; 838 839 return s->total_count (); 840} 841 842/* Read AutoFDO profile and returns TRUE on success. */ 843 844/* source profile format: 845 846 GCOV_TAG_AFDO_FUNCTION: 4 bytes 847 LENGTH: 4 bytes 848 NUM_FUNCTIONS: 4 bytes 849 FUNCTION_INSTANCE_1 850 FUNCTION_INSTANCE_2 851 ... 852 FUNCTION_INSTANCE_N. */ 853 854bool 855autofdo_source_profile::read () 856{ 857 if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION) 858 { 859 inform (0, "Not expected TAG."); 860 return false; 861 } 862 863 /* Skip the length of the section. */ 864 gcov_read_unsigned (); 865 866 /* Read in the function/callsite profile, and store it in local 867 data structure. */ 868 unsigned function_num = gcov_read_unsigned (); 869 for (unsigned i = 0; i < function_num; i++) 870 { 871 function_instance::function_instance_stack stack; 872 function_instance *s = function_instance::read_function_instance ( 873 &stack, gcov_read_counter ()); 874 afdo_profile_info->sum_all += s->total_count (); 875 map_[s->name ()] = s; 876 } 877 return true; 878} 879 880/* Return the function_instance in the profile that correspond to the 881 inline STACK. */ 882 883function_instance * 884autofdo_source_profile::get_function_instance_by_inline_stack ( 885 const inline_stack &stack) const 886{ 887 name_function_instance_map::const_iterator iter = map_.find ( 888 afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first)); 889 if (iter == map_.end()) 890 return NULL; 891 function_instance *s = iter->second; 892 for (unsigned i = stack.length() - 1; i > 0; i--) 893 { 894 s = s->get_function_instance_by_decl ( 895 stack[i].second, stack[i - 1].first); 896 if (s == NULL) 897 return NULL; 898 } 899 return s; 900} 901 902/* Module profile is only used by LIPO. Here we simply ignore it. */ 903 904static void 905fake_read_autofdo_module_profile () 906{ 907 /* Read in the module info. */ 908 gcov_read_unsigned (); 909 910 /* Skip the length of the section. */ 911 gcov_read_unsigned (); 912 913 /* Read in the file name table. */ 914 unsigned total_module_num = gcov_read_unsigned (); 915 gcc_assert (total_module_num == 0); 916} 917 918/* Read data from profile data file. */ 919 920static void 921read_profile (void) 922{ 923 if (gcov_open (auto_profile_file, 1) == 0) 924 error ("Cannot open profile file %s.", auto_profile_file); 925 926 if (gcov_read_unsigned () != GCOV_DATA_MAGIC) 927 error ("AutoFDO profile magic number does not mathch."); 928 929 /* Skip the version number. */ 930 unsigned version = gcov_read_unsigned (); 931 if (version != AUTO_PROFILE_VERSION) 932 error ("AutoFDO profile version %u does match %u.", 933 version, AUTO_PROFILE_VERSION); 934 935 /* Skip the empty integer. */ 936 gcov_read_unsigned (); 937 938 /* string_table. */ 939 afdo_string_table = new string_table (); 940 if (!afdo_string_table->read()) 941 error ("Cannot read string table from %s.", auto_profile_file); 942 943 /* autofdo_source_profile. */ 944 afdo_source_profile = autofdo_source_profile::create (); 945 if (afdo_source_profile == NULL) 946 error ("Cannot read function profile from %s.", auto_profile_file); 947 948 /* autofdo_module_profile. */ 949 fake_read_autofdo_module_profile (); 950 951 /* Read in the working set. */ 952 if (gcov_read_unsigned () != GCOV_TAG_AFDO_WORKING_SET) 953 error ("Cannot read working set from %s.", auto_profile_file); 954 955 /* Skip the length of the section. */ 956 gcov_read_unsigned (); 957 gcov_working_set_t set[128]; 958 for (unsigned i = 0; i < 128; i++) 959 { 960 set[i].num_counters = gcov_read_unsigned (); 961 set[i].min_counter = gcov_read_counter (); 962 } 963 add_working_set (set); 964} 965 966/* From AutoFDO profiles, find values inside STMT for that we want to measure 967 histograms for indirect-call optimization. 968 969 This function is actually served for 2 purposes: 970 * before annotation, we need to mark histogram, promote and inline 971 * after annotation, we just need to mark, and let follow-up logic to 972 decide if it needs to promote and inline. */ 973 974static void 975afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map, 976 bool transform) 977{ 978 gimple gs = gsi_stmt (*gsi); 979 tree callee; 980 981 if (map.size () == 0) 982 return; 983 gcall *stmt = dyn_cast <gcall *> (gs); 984 if ((!stmt) || gimple_call_fndecl (stmt) != NULL_TREE) 985 return; 986 987 callee = gimple_call_fn (stmt); 988 989 histogram_value hist = gimple_alloc_histogram_value ( 990 cfun, HIST_TYPE_INDIR_CALL, stmt, callee); 991 hist->n_counters = 3; 992 hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters); 993 gimple_add_histogram_value (cfun, stmt, hist); 994 995 gcov_type total = 0; 996 icall_target_map::const_iterator max_iter = map.end (); 997 998 for (icall_target_map::const_iterator iter = map.begin (); 999 iter != map.end (); ++iter) 1000 { 1001 total += iter->second; 1002 if (max_iter == map.end () || max_iter->second < iter->second) 1003 max_iter = iter; 1004 } 1005 1006 hist->hvalue.counters[0] 1007 = (unsigned long long)afdo_string_table->get_name (max_iter->first); 1008 hist->hvalue.counters[1] = max_iter->second; 1009 hist->hvalue.counters[2] = total; 1010 1011 if (!transform) 1012 return; 1013 1014 struct cgraph_edge *indirect_edge 1015 = cgraph_node::get (current_function_decl)->get_edge (stmt); 1016 struct cgraph_node *direct_call = cgraph_node::get_for_asmname ( 1017 get_identifier ((const char *) hist->hvalue.counters[0])); 1018 1019 if (direct_call == NULL || !check_ic_target (stmt, direct_call)) 1020 return; 1021 if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL) 1022 return; 1023 struct cgraph_edge *new_edge 1024 = indirect_edge->make_speculative (direct_call, 0, 0); 1025 new_edge->redirect_call_stmt_to_callee (); 1026 gimple_remove_histogram_value (cfun, stmt, hist); 1027 inline_call (new_edge, true, NULL, NULL, false); 1028} 1029 1030/* From AutoFDO profiles, find values inside STMT for that we want to measure 1031 histograms and adds them to list VALUES. */ 1032 1033static void 1034afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map, 1035 bool transform) 1036{ 1037 afdo_indirect_call (gsi, map, transform); 1038} 1039 1040typedef std::set<basic_block> bb_set; 1041typedef std::set<edge> edge_set; 1042 1043static bool 1044is_bb_annotated (const basic_block bb, const bb_set &annotated) 1045{ 1046 return annotated.find (bb) != annotated.end (); 1047} 1048 1049static void 1050set_bb_annotated (basic_block bb, bb_set *annotated) 1051{ 1052 annotated->insert (bb); 1053} 1054 1055static bool 1056is_edge_annotated (const edge e, const edge_set &annotated) 1057{ 1058 return annotated.find (e) != annotated.end (); 1059} 1060 1061static void 1062set_edge_annotated (edge e, edge_set *annotated) 1063{ 1064 annotated->insert (e); 1065} 1066 1067/* For a given BB, set its execution count. Attach value profile if a stmt 1068 is not in PROMOTED, because we only want to promote an indirect call once. 1069 Return TRUE if BB is annotated. */ 1070 1071static bool 1072afdo_set_bb_count (basic_block bb, const stmt_set &promoted) 1073{ 1074 gimple_stmt_iterator gsi; 1075 edge e; 1076 edge_iterator ei; 1077 gcov_type max_count = 0; 1078 bool has_annotated = false; 1079 1080 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1081 { 1082 count_info info; 1083 gimple stmt = gsi_stmt (gsi); 1084 if (gimple_clobber_p (stmt) || is_gimple_debug (stmt)) 1085 continue; 1086 if (afdo_source_profile->get_count_info (stmt, &info)) 1087 { 1088 if (info.count > max_count) 1089 max_count = info.count; 1090 has_annotated = true; 1091 if (info.targets.size () > 0 1092 && promoted.find (stmt) == promoted.end ()) 1093 afdo_vpt (&gsi, info.targets, false); 1094 } 1095 } 1096 1097 if (!has_annotated) 1098 return false; 1099 1100 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1101 afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi))); 1102 for (gphi_iterator gpi = gsi_start_phis (bb); 1103 !gsi_end_p (gpi); 1104 gsi_next (&gpi)) 1105 { 1106 gphi *phi = gpi.phi (); 1107 size_t i; 1108 for (i = 0; i < gimple_phi_num_args (phi); i++) 1109 afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i)); 1110 } 1111 FOR_EACH_EDGE (e, ei, bb->succs) 1112 afdo_source_profile->mark_annotated (e->goto_locus); 1113 1114 bb->count = max_count; 1115 return true; 1116} 1117 1118/* BB1 and BB2 are in an equivalent class iff: 1119 1. BB1 dominates BB2. 1120 2. BB2 post-dominates BB1. 1121 3. BB1 and BB2 are in the same loop nest. 1122 This function finds the equivalent class for each basic block, and 1123 stores a pointer to the first BB in its equivalent class. Meanwhile, 1124 set bb counts for the same equivalent class to be idenical. Update 1125 ANNOTATED_BB for the first BB in its equivalent class. */ 1126 1127static void 1128afdo_find_equiv_class (bb_set *annotated_bb) 1129{ 1130 basic_block bb; 1131 1132 FOR_ALL_BB_FN (bb, cfun) 1133 bb->aux = NULL; 1134 1135 FOR_ALL_BB_FN (bb, cfun) 1136 { 1137 vec<basic_block> dom_bbs; 1138 basic_block bb1; 1139 int i; 1140 1141 if (bb->aux != NULL) 1142 continue; 1143 bb->aux = bb; 1144 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb); 1145 FOR_EACH_VEC_ELT (dom_bbs, i, bb1) 1146 if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1) 1147 && bb1->loop_father == bb->loop_father) 1148 { 1149 bb1->aux = bb; 1150 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb)) 1151 { 1152 bb->count = bb1->count; 1153 set_bb_annotated (bb, annotated_bb); 1154 } 1155 } 1156 dom_bbs = get_dominated_by (CDI_POST_DOMINATORS, bb); 1157 FOR_EACH_VEC_ELT (dom_bbs, i, bb1) 1158 if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1) 1159 && bb1->loop_father == bb->loop_father) 1160 { 1161 bb1->aux = bb; 1162 if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb)) 1163 { 1164 bb->count = bb1->count; 1165 set_bb_annotated (bb, annotated_bb); 1166 } 1167 } 1168 } 1169} 1170 1171/* If a basic block's count is known, and only one of its in/out edges' count 1172 is unknown, its count can be calculated. Meanwhile, if all of the in/out 1173 edges' counts are known, then the basic block's unknown count can also be 1174 calculated. 1175 IS_SUCC is true if out edges of a basic blocks are examined. 1176 Update ANNOTATED_BB and ANNOTATED_EDGE accordingly. 1177 Return TRUE if any basic block/edge count is changed. */ 1178 1179static bool 1180afdo_propagate_edge (bool is_succ, bb_set *annotated_bb, 1181 edge_set *annotated_edge) 1182{ 1183 basic_block bb; 1184 bool changed = false; 1185 1186 FOR_EACH_BB_FN (bb, cfun) 1187 { 1188 edge e, unknown_edge = NULL; 1189 edge_iterator ei; 1190 int num_unknown_edge = 0; 1191 gcov_type total_known_count = 0; 1192 1193 FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds) 1194 if (!is_edge_annotated (e, *annotated_edge)) 1195 num_unknown_edge++, unknown_edge = e; 1196 else 1197 total_known_count += e->count; 1198 1199 if (num_unknown_edge == 0) 1200 { 1201 if (total_known_count > bb->count) 1202 { 1203 bb->count = total_known_count; 1204 changed = true; 1205 } 1206 if (!is_bb_annotated (bb, *annotated_bb)) 1207 { 1208 set_bb_annotated (bb, annotated_bb); 1209 changed = true; 1210 } 1211 } 1212 else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb)) 1213 { 1214 if (bb->count >= total_known_count) 1215 unknown_edge->count = bb->count - total_known_count; 1216 else 1217 unknown_edge->count = 0; 1218 set_edge_annotated (unknown_edge, annotated_edge); 1219 changed = true; 1220 } 1221 } 1222 return changed; 1223} 1224 1225/* Special propagation for circuit expressions. Because GCC translates 1226 control flow into data flow for circuit expressions. E.g. 1227 BB1: 1228 if (a && b) 1229 BB2 1230 else 1231 BB3 1232 1233 will be translated into: 1234 1235 BB1: 1236 if (a) 1237 goto BB.t1 1238 else 1239 goto BB.t3 1240 BB.t1: 1241 if (b) 1242 goto BB.t2 1243 else 1244 goto BB.t3 1245 BB.t2: 1246 goto BB.t3 1247 BB.t3: 1248 tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2) 1249 if (tmp) 1250 goto BB2 1251 else 1252 goto BB3 1253 1254 In this case, we need to propagate through PHI to determine the edge 1255 count of BB1->BB.t1, BB.t1->BB.t2. 1256 Update ANNOTATED_EDGE accordingly. */ 1257 1258static void 1259afdo_propagate_circuit (const bb_set &annotated_bb, edge_set *annotated_edge) 1260{ 1261 basic_block bb; 1262 FOR_ALL_BB_FN (bb, cfun) 1263 { 1264 gimple def_stmt; 1265 tree cmp_rhs, cmp_lhs; 1266 gimple cmp_stmt = last_stmt (bb); 1267 edge e; 1268 edge_iterator ei; 1269 1270 if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND) 1271 continue; 1272 cmp_rhs = gimple_cond_rhs (cmp_stmt); 1273 cmp_lhs = gimple_cond_lhs (cmp_stmt); 1274 if (!TREE_CONSTANT (cmp_rhs) 1275 || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs))) 1276 continue; 1277 if (TREE_CODE (cmp_lhs) != SSA_NAME) 1278 continue; 1279 if (!is_bb_annotated (bb, annotated_bb)) 1280 continue; 1281 def_stmt = SSA_NAME_DEF_STMT (cmp_lhs); 1282 while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN 1283 && gimple_assign_single_p (def_stmt) 1284 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME) 1285 def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)); 1286 if (!def_stmt) 1287 continue; 1288 gphi *phi_stmt = dyn_cast <gphi *> (def_stmt); 1289 if (!phi_stmt) 1290 continue; 1291 FOR_EACH_EDGE (e, ei, bb->succs) 1292 { 1293 unsigned i, total = 0; 1294 edge only_one; 1295 bool check_value_one = (((integer_onep (cmp_rhs)) 1296 ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR)) 1297 ^ ((e->flags & EDGE_TRUE_VALUE) != 0)); 1298 if (!is_edge_annotated (e, *annotated_edge)) 1299 continue; 1300 for (i = 0; i < gimple_phi_num_args (phi_stmt); i++) 1301 { 1302 tree val = gimple_phi_arg_def (phi_stmt, i); 1303 edge ep = gimple_phi_arg_edge (phi_stmt, i); 1304 1305 if (!TREE_CONSTANT (val) 1306 || !(integer_zerop (val) || integer_onep (val))) 1307 continue; 1308 if (check_value_one ^ integer_onep (val)) 1309 continue; 1310 total++; 1311 only_one = ep; 1312 if (e->probability == 0 && !is_edge_annotated (ep, *annotated_edge)) 1313 { 1314 ep->probability = 0; 1315 ep->count = 0; 1316 set_edge_annotated (ep, annotated_edge); 1317 } 1318 } 1319 if (total == 1 && !is_edge_annotated (only_one, *annotated_edge)) 1320 { 1321 only_one->probability = e->probability; 1322 only_one->count = e->count; 1323 set_edge_annotated (only_one, annotated_edge); 1324 } 1325 } 1326 } 1327} 1328 1329/* Propagate the basic block count and edge count on the control flow 1330 graph. We do the propagation iteratively until stablize. */ 1331 1332static void 1333afdo_propagate (bb_set *annotated_bb, edge_set *annotated_edge) 1334{ 1335 basic_block bb; 1336 bool changed = true; 1337 int i = 0; 1338 1339 FOR_ALL_BB_FN (bb, cfun) 1340 { 1341 bb->count = ((basic_block)bb->aux)->count; 1342 if (is_bb_annotated ((const basic_block)bb->aux, *annotated_bb)) 1343 set_bb_annotated (bb, annotated_bb); 1344 } 1345 1346 while (changed && i++ < 10) 1347 { 1348 changed = false; 1349 1350 if (afdo_propagate_edge (true, annotated_bb, annotated_edge)) 1351 changed = true; 1352 if (afdo_propagate_edge (false, annotated_bb, annotated_edge)) 1353 changed = true; 1354 afdo_propagate_circuit (*annotated_bb, annotated_edge); 1355 } 1356} 1357 1358/* Propagate counts on control flow graph and calculate branch 1359 probabilities. */ 1360 1361static void 1362afdo_calculate_branch_prob (bb_set *annotated_bb, edge_set *annotated_edge) 1363{ 1364 basic_block bb; 1365 bool has_sample = false; 1366 1367 FOR_EACH_BB_FN (bb, cfun) 1368 if (bb->count > 0) 1369 has_sample = true; 1370 1371 if (!has_sample) 1372 return; 1373 1374 calculate_dominance_info (CDI_POST_DOMINATORS); 1375 calculate_dominance_info (CDI_DOMINATORS); 1376 loop_optimizer_init (0); 1377 1378 afdo_find_equiv_class (annotated_bb); 1379 afdo_propagate (annotated_bb, annotated_edge); 1380 1381 FOR_EACH_BB_FN (bb, cfun) 1382 { 1383 edge e; 1384 edge_iterator ei; 1385 int num_unknown_succ = 0; 1386 gcov_type total_count = 0; 1387 1388 FOR_EACH_EDGE (e, ei, bb->succs) 1389 { 1390 if (!is_edge_annotated (e, *annotated_edge)) 1391 num_unknown_succ++; 1392 else 1393 total_count += e->count; 1394 } 1395 if (num_unknown_succ == 0 && total_count > 0) 1396 { 1397 FOR_EACH_EDGE (e, ei, bb->succs) 1398 e->probability = (double)e->count * REG_BR_PROB_BASE / total_count; 1399 } 1400 } 1401 FOR_ALL_BB_FN (bb, cfun) 1402 { 1403 edge e; 1404 edge_iterator ei; 1405 1406 FOR_EACH_EDGE (e, ei, bb->succs) 1407 e->count = (double)bb->count * e->probability / REG_BR_PROB_BASE; 1408 bb->aux = NULL; 1409 } 1410 1411 loop_optimizer_finalize (); 1412 free_dominance_info (CDI_DOMINATORS); 1413 free_dominance_info (CDI_POST_DOMINATORS); 1414} 1415 1416/* Perform value profile transformation using AutoFDO profile. Add the 1417 promoted stmts to PROMOTED_STMTS. Return TRUE if there is any 1418 indirect call promoted. */ 1419 1420static bool 1421afdo_vpt_for_early_inline (stmt_set *promoted_stmts) 1422{ 1423 basic_block bb; 1424 if (afdo_source_profile->get_function_instance_by_decl ( 1425 current_function_decl) == NULL) 1426 return false; 1427 1428 compute_inline_parameters (cgraph_node::get (current_function_decl), true); 1429 1430 bool has_vpt = false; 1431 FOR_EACH_BB_FN (bb, cfun) 1432 { 1433 if (!has_indirect_call (bb)) 1434 continue; 1435 gimple_stmt_iterator gsi; 1436 1437 gcov_type bb_count = 0; 1438 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1439 { 1440 count_info info; 1441 gimple stmt = gsi_stmt (gsi); 1442 if (afdo_source_profile->get_count_info (stmt, &info)) 1443 bb_count = MAX (bb_count, info.count); 1444 } 1445 1446 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1447 { 1448 gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi)); 1449 /* IC_promotion and early_inline_2 is done in multiple iterations. 1450 No need to promoted the stmt if its in promoted_stmts (means 1451 it is already been promoted in the previous iterations). */ 1452 if ((!stmt) || gimple_call_fn (stmt) == NULL 1453 || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL 1454 || promoted_stmts->find (stmt) != promoted_stmts->end ()) 1455 continue; 1456 1457 count_info info; 1458 afdo_source_profile->get_count_info (stmt, &info); 1459 info.count = bb_count; 1460 if (afdo_source_profile->update_inlined_ind_target (stmt, &info)) 1461 { 1462 /* Promote the indirect call and update the promoted_stmts. */ 1463 promoted_stmts->insert (stmt); 1464 afdo_vpt (&gsi, info.targets, true); 1465 has_vpt = true; 1466 } 1467 } 1468 } 1469 1470 if (has_vpt) 1471 { 1472 optimize_inline_calls (current_function_decl); 1473 return true; 1474 } 1475 1476 return false; 1477} 1478 1479/* Annotate auto profile to the control flow graph. Do not annotate value 1480 profile for stmts in PROMOTED_STMTS. */ 1481 1482static void 1483afdo_annotate_cfg (const stmt_set &promoted_stmts) 1484{ 1485 basic_block bb; 1486 bb_set annotated_bb; 1487 edge_set annotated_edge; 1488 const function_instance *s 1489 = afdo_source_profile->get_function_instance_by_decl ( 1490 current_function_decl); 1491 1492 if (s == NULL) 1493 return; 1494 cgraph_node::get (current_function_decl)->count = s->head_count (); 1495 ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = s->head_count (); 1496 gcov_type max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; 1497 1498 FOR_EACH_BB_FN (bb, cfun) 1499 { 1500 edge e; 1501 edge_iterator ei; 1502 1503 bb->count = 0; 1504 FOR_EACH_EDGE (e, ei, bb->succs) 1505 e->count = 0; 1506 1507 if (afdo_set_bb_count (bb, promoted_stmts)) 1508 set_bb_annotated (bb, &annotated_bb); 1509 if (bb->count > max_count) 1510 max_count = bb->count; 1511 } 1512 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count 1513 > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count) 1514 { 1515 ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count 1516 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; 1517 set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb); 1518 } 1519 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count 1520 > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count) 1521 { 1522 EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count 1523 = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; 1524 set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb); 1525 } 1526 afdo_source_profile->mark_annotated ( 1527 DECL_SOURCE_LOCATION (current_function_decl)); 1528 afdo_source_profile->mark_annotated (cfun->function_start_locus); 1529 afdo_source_profile->mark_annotated (cfun->function_end_locus); 1530 if (max_count > 0) 1531 { 1532 afdo_calculate_branch_prob (&annotated_bb, &annotated_edge); 1533 counts_to_freqs (); 1534 profile_status_for_fn (cfun) = PROFILE_READ; 1535 } 1536 if (flag_value_profile_transformations) 1537 { 1538 gimple_value_profile_transformations (); 1539 free_dominance_info (CDI_DOMINATORS); 1540 free_dominance_info (CDI_POST_DOMINATORS); 1541 update_ssa (TODO_update_ssa); 1542 } 1543} 1544 1545/* Wrapper function to invoke early inliner. */ 1546 1547static void 1548early_inline () 1549{ 1550 compute_inline_parameters (cgraph_node::get (current_function_decl), true); 1551 unsigned todo = early_inliner (cfun); 1552 if (todo & TODO_update_ssa_any) 1553 update_ssa (TODO_update_ssa); 1554} 1555 1556/* Use AutoFDO profile to annoate the control flow graph. 1557 Return the todo flag. */ 1558 1559static unsigned int 1560auto_profile (void) 1561{ 1562 struct cgraph_node *node; 1563 1564 if (symtab->state == FINISHED) 1565 return 0; 1566 1567 init_node_map (true); 1568 profile_info = autofdo::afdo_profile_info; 1569 1570 FOR_EACH_FUNCTION (node) 1571 { 1572 if (!gimple_has_body_p (node->decl)) 1573 continue; 1574 1575 /* Don't profile functions produced for builtin stuff. */ 1576 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) 1577 continue; 1578 1579 push_cfun (DECL_STRUCT_FUNCTION (node->decl)); 1580 1581 /* First do indirect call promotion and early inline to make the 1582 IR match the profiled binary before actual annotation. 1583 1584 This is needed because an indirect call might have been promoted 1585 and inlined in the profiled binary. If we do not promote and 1586 inline these indirect calls before annotation, the profile for 1587 these promoted functions will be lost. 1588 1589 e.g. foo() --indirect_call--> bar() 1590 In profiled binary, the callsite is promoted and inlined, making 1591 the profile look like: 1592 1593 foo: { 1594 loc_foo_1: count_1 1595 bar@loc_foo_2: { 1596 loc_bar_1: count_2 1597 loc_bar_2: count_3 1598 } 1599 } 1600 1601 Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined. 1602 If we perform annotation on it, the profile inside bar@loc_foo2 1603 will be wasted. 1604 1605 To avoid this, we promote loc_foo_2 and inline the promoted bar 1606 function before annotation, so the profile inside bar@loc_foo2 1607 will be useful. */ 1608 autofdo::stmt_set promoted_stmts; 1609 for (int i = 0; i < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS); i++) 1610 { 1611 if (!flag_value_profile_transformations 1612 || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts)) 1613 break; 1614 early_inline (); 1615 } 1616 1617 early_inline (); 1618 autofdo::afdo_annotate_cfg (promoted_stmts); 1619 compute_function_frequency (); 1620 1621 /* Local pure-const may imply need to fixup the cfg. */ 1622 if (execute_fixup_cfg () & TODO_cleanup_cfg) 1623 cleanup_tree_cfg (); 1624 1625 free_dominance_info (CDI_DOMINATORS); 1626 free_dominance_info (CDI_POST_DOMINATORS); 1627 cgraph_edge::rebuild_edges (); 1628 compute_inline_parameters (cgraph_node::get (current_function_decl), true); 1629 pop_cfun (); 1630 } 1631 1632 return TODO_rebuild_cgraph_edges; 1633} 1634} /* namespace autofdo. */ 1635 1636/* Read the profile from the profile data file. */ 1637 1638void 1639read_autofdo_file (void) 1640{ 1641 if (auto_profile_file == NULL) 1642 auto_profile_file = DEFAULT_AUTO_PROFILE_FILE; 1643 1644 autofdo::afdo_profile_info = (struct gcov_ctr_summary *)xcalloc ( 1645 1, sizeof (struct gcov_ctr_summary)); 1646 autofdo::afdo_profile_info->runs = 1; 1647 autofdo::afdo_profile_info->sum_max = 0; 1648 autofdo::afdo_profile_info->sum_all = 0; 1649 1650 /* Read the profile from the profile file. */ 1651 autofdo::read_profile (); 1652} 1653 1654/* Free the resources. */ 1655 1656void 1657end_auto_profile (void) 1658{ 1659 delete autofdo::afdo_source_profile; 1660 delete autofdo::afdo_string_table; 1661 profile_info = NULL; 1662} 1663 1664/* Returns TRUE if EDGE is hot enough to be inlined early. */ 1665 1666bool 1667afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge) 1668{ 1669 gcov_type count 1670 = autofdo::afdo_source_profile->get_callsite_total_count (edge); 1671 1672 if (count > 0) 1673 { 1674 bool is_hot; 1675 const struct gcov_ctr_summary *saved_profile_info = profile_info; 1676 /* At early inline stage, profile_info is not set yet. We need to 1677 temporarily set it to afdo_profile_info to calculate hotness. */ 1678 profile_info = autofdo::afdo_profile_info; 1679 is_hot = maybe_hot_count_p (NULL, count); 1680 profile_info = saved_profile_info; 1681 return is_hot; 1682 } 1683 1684 return false; 1685} 1686 1687namespace 1688{ 1689 1690const pass_data pass_data_ipa_auto_profile = { 1691 SIMPLE_IPA_PASS, "afdo", /* name */ 1692 OPTGROUP_NONE, /* optinfo_flags */ 1693 TV_IPA_AUTOFDO, /* tv_id */ 1694 0, /* properties_required */ 1695 0, /* properties_provided */ 1696 0, /* properties_destroyed */ 1697 0, /* todo_flags_start */ 1698 0, /* todo_flags_finish */ 1699}; 1700 1701class pass_ipa_auto_profile : public simple_ipa_opt_pass 1702{ 1703public: 1704 pass_ipa_auto_profile (gcc::context *ctxt) 1705 : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt) 1706 { 1707 } 1708 1709 /* opt_pass methods: */ 1710 virtual bool 1711 gate (function *) 1712 { 1713 return flag_auto_profile; 1714 } 1715 virtual unsigned int 1716 execute (function *) 1717 { 1718 return autofdo::auto_profile (); 1719 } 1720}; // class pass_ipa_auto_profile 1721 1722} // anon namespace 1723 1724simple_ipa_opt_pass * 1725make_pass_ipa_auto_profile (gcc::context *ctxt) 1726{ 1727 return new pass_ipa_auto_profile (ctxt); 1728} 1729