1/* Read and annotate call graph profile from the auto profile data file.
2   Copyright (C) 2014-2022 Free Software Foundation, Inc.
3   Contributed by Dehao Chen (dehao@google.com)
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21#include "config.h"
22#define INCLUDE_MAP
23#define INCLUDE_SET
24#include "system.h"
25#include "coretypes.h"
26#include "backend.h"
27#include "tree.h"
28#include "gimple.h"
29#include "predict.h"
30#include "alloc-pool.h"
31#include "tree-pass.h"
32#include "ssa.h"
33#include "cgraph.h"
34#include "gcov-io.h"
35#include "diagnostic-core.h"
36#include "profile.h"
37#include "langhooks.h"
38#include "cfgloop.h"
39#include "tree-cfg.h"
40#include "tree-cfgcleanup.h"
41#include "tree-into-ssa.h"
42#include "gimple-iterator.h"
43#include "value-prof.h"
44#include "symbol-summary.h"
45#include "ipa-prop.h"
46#include "ipa-fnsummary.h"
47#include "ipa-inline.h"
48#include "tree-inline.h"
49#include "auto-profile.h"
50#include "tree-pretty-print.h"
51#include "gimple-pretty-print.h"
52
53/* The following routines implements AutoFDO optimization.
54
55   This optimization uses sampling profiles to annotate basic block counts
56   and uses heuristics to estimate branch probabilities.
57
58   There are three phases in AutoFDO:
59
60   Phase 1: Read profile from the profile data file.
61     The following info is read from the profile datafile:
62        * string_table: a map between function name and its index.
63        * autofdo_source_profile: a map from function_instance name to
64          function_instance. This is represented as a forest of
65          function_instances.
66        * WorkingSet: a histogram of how many instructions are covered for a
67          given percentage of total cycles. This is describing the binary
68          level information (not source level). This info is used to help
69          decide if we want aggressive optimizations that could increase
70          code footprint (e.g. loop unroll etc.)
71     A function instance is an instance of function that could either be a
72     standalone symbol, or a clone of a function that is inlined into another
73     function.
74
75   Phase 2: Early inline + value profile transformation.
76     Early inline uses autofdo_source_profile to find if a callsite is:
77        * inlined in the profiled binary.
78        * callee body is hot in the profiling run.
79     If both condition satisfies, early inline will inline the callsite
80     regardless of the code growth.
81     Phase 2 is an iterative process. During each iteration, we also check
82     if an indirect callsite is promoted and inlined in the profiling run.
83     If yes, vpt will happen to force promote it and in the next iteration,
84     einline will inline the promoted callsite in the next iteration.
85
86   Phase 3: Annotate control flow graph.
87     AutoFDO uses a separate pass to:
88        * Annotate basic block count
89        * Estimate branch probability
90
91   After the above 3 phases, all profile is readily annotated on the GCC IR.
92   AutoFDO tries to reuse all FDO infrastructure as much as possible to make
93   use of the profile. E.g. it uses existing mechanism to calculate the basic
94   block/edge frequency, as well as the cgraph node/edge count.
95*/
96
97#define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo"
98#define AUTO_PROFILE_VERSION 2
99
100namespace autofdo
101{
102
103/* Intermediate edge info used when propagating AutoFDO profile information.
104   We can't edge->count() directly since it's computed from edge's probability
105   while probability is yet not decided during propagation.  */
106#define AFDO_EINFO(e)                     ((class edge_info *) e->aux)
107class edge_info
108{
109public:
110  edge_info () : count_ (profile_count::zero ().afdo ()), annotated_ (false) {}
111  bool is_annotated () const { return annotated_; }
112  void set_annotated () { annotated_ = true; }
113  profile_count get_count () const { return count_; }
114  void set_count (profile_count count) { count_ = count; }
115private:
116  profile_count count_;
117  bool annotated_;
118};
119
120/* Represent a source location: (function_decl, lineno).  */
121typedef std::pair<tree, unsigned> decl_lineno;
122
123/* Represent an inline stack. vector[0] is the leaf node.  */
124typedef auto_vec<decl_lineno> inline_stack;
125
126/* String array that stores function names.  */
127typedef auto_vec<char *> string_vector;
128
129/* Map from function name's index in string_table to target's
130   execution count.  */
131typedef std::map<unsigned, gcov_type> icall_target_map;
132
133/* Set of gimple stmts. Used to track if the stmt has already been promoted
134   to direct call.  */
135typedef std::set<gimple *> stmt_set;
136
137/* Represent count info of an inline stack.  */
138class count_info
139{
140public:
141  /* Sampled count of the inline stack.  */
142  gcov_type count;
143
144  /* Map from indirect call target to its sample count.  */
145  icall_target_map targets;
146
147  /* Whether this inline stack is already used in annotation.
148
149     Each inline stack should only be used to annotate IR once.
150     This will be enforced when instruction-level discriminator
151     is supported.  */
152  bool annotated;
153};
154
155/* operator< for "const char *".  */
156struct string_compare
157{
158  bool operator()(const char *a, const char *b) const
159  {
160    return strcmp (a, b) < 0;
161  }
162};
163
164/* Store a string array, indexed by string position in the array.  */
165class string_table
166{
167public:
168  string_table ()
169  {}
170
171  ~string_table ();
172
173  /* For a given string, returns its index.  */
174  int get_index (const char *name) const;
175
176  /* For a given decl, returns the index of the decl name.  */
177  int get_index_by_decl (tree decl) const;
178
179  /* For a given index, returns the string.  */
180  const char *get_name (int index) const;
181
182  /* Read profile, return TRUE on success.  */
183  bool read ();
184
185private:
186  typedef std::map<const char *, unsigned, string_compare> string_index_map;
187  string_vector vector_;
188  string_index_map map_;
189};
190
191/* Profile of a function instance:
192     1. total_count of the function.
193     2. head_count (entry basic block count) of the function (only valid when
194        function is a top-level function_instance, i.e. it is the original copy
195        instead of the inlined copy).
196     3. map from source location (decl_lineno) to profile (count_info).
197     4. map from callsite to callee function_instance.  */
198class function_instance
199{
200public:
201  typedef auto_vec<function_instance *> function_instance_stack;
202
203  /* Read the profile and return a function_instance with head count as
204     HEAD_COUNT. Recursively read callsites to create nested function_instances
205     too. STACK is used to track the recursive creation process.  */
206  static function_instance *
207  read_function_instance (function_instance_stack *stack,
208                          gcov_type head_count);
209
210  /* Recursively deallocate all callsites (nested function_instances).  */
211  ~function_instance ();
212
213  /* Accessors.  */
214  int
215  name () const
216  {
217    return name_;
218  }
219  gcov_type
220  total_count () const
221  {
222    return total_count_;
223  }
224  gcov_type
225  head_count () const
226  {
227    return head_count_;
228  }
229
230  /* Traverse callsites of the current function_instance to find one at the
231     location of LINENO and callee name represented in DECL.  */
232  function_instance *get_function_instance_by_decl (unsigned lineno,
233                                                    tree decl) const;
234
235  /* Store the profile info for LOC in INFO. Return TRUE if profile info
236     is found.  */
237  bool get_count_info (location_t loc, count_info *info) const;
238
239  /* Read the inlined indirect call target profile for STMT and store it in
240     MAP, return the total count for all inlined indirect calls.  */
241  gcov_type find_icall_target_map (gcall *stmt, icall_target_map *map) const;
242
243  /* Sum of counts that is used during annotation.  */
244  gcov_type total_annotated_count () const;
245
246  /* Mark LOC as annotated.  */
247  void mark_annotated (location_t loc);
248
249private:
250  /* Callsite, represented as (decl_lineno, callee_function_name_index).  */
251  typedef std::pair<unsigned, unsigned> callsite;
252
253  /* Map from callsite to callee function_instance.  */
254  typedef std::map<callsite, function_instance *> callsite_map;
255
256  function_instance (unsigned name, gcov_type head_count)
257      : name_ (name), total_count_ (0), head_count_ (head_count)
258  {
259  }
260
261  /* Map from source location (decl_lineno) to profile (count_info).  */
262  typedef std::map<unsigned, count_info> position_count_map;
263
264  /* function_instance name index in the string_table.  */
265  unsigned name_;
266
267  /* Total sample count.  */
268  gcov_type total_count_;
269
270  /* Entry BB's sample count.  */
271  gcov_type head_count_;
272
273  /* Map from callsite location to callee function_instance.  */
274  callsite_map callsites;
275
276  /* Map from source location to count_info.  */
277  position_count_map pos_counts;
278};
279
280/* Profile for all functions.  */
281class autofdo_source_profile
282{
283public:
284  static autofdo_source_profile *
285  create ()
286  {
287    autofdo_source_profile *map = new autofdo_source_profile ();
288
289    if (map->read ())
290      return map;
291    delete map;
292    return NULL;
293  }
294
295  ~autofdo_source_profile ();
296
297  /* For a given DECL, returns the top-level function_instance.  */
298  function_instance *get_function_instance_by_decl (tree decl) const;
299
300  /* Find count_info for a given gimple STMT. If found, store the count_info
301     in INFO and return true; otherwise return false.  */
302  bool get_count_info (gimple *stmt, count_info *info) const;
303
304  /* Find total count of the callee of EDGE.  */
305  gcov_type get_callsite_total_count (struct cgraph_edge *edge) const;
306
307  /* Update value profile INFO for STMT from the inlined indirect callsite.
308     Return true if INFO is updated.  */
309  bool update_inlined_ind_target (gcall *stmt, count_info *info);
310
311  /* Mark LOC as annotated.  */
312  void mark_annotated (location_t loc);
313
314private:
315  /* Map from function_instance name index (in string_table) to
316     function_instance.  */
317  typedef std::map<unsigned, function_instance *> name_function_instance_map;
318
319  autofdo_source_profile () {}
320
321  /* Read AutoFDO profile and returns TRUE on success.  */
322  bool read ();
323
324  /* Return the function_instance in the profile that correspond to the
325     inline STACK.  */
326  function_instance *
327  get_function_instance_by_inline_stack (const inline_stack &stack) const;
328
329  name_function_instance_map map_;
330};
331
332/* Store the strings read from the profile data file.  */
333static string_table *afdo_string_table;
334
335/* Store the AutoFDO source profile.  */
336static autofdo_source_profile *afdo_source_profile;
337
338/* gcov_summary structure to store the profile_info.  */
339static gcov_summary *afdo_profile_info;
340
341/* Helper functions.  */
342
343/* Return the original name of NAME: strip the suffix that starts
344   with '.' Caller is responsible for freeing RET.  */
345
346static char *
347get_original_name (const char *name)
348{
349  char *ret = xstrdup (name);
350  char *find = strchr (ret, '.');
351  if (find != NULL)
352    *find = 0;
353  return ret;
354}
355
356/* Return the combined location, which is a 32bit integer in which
357   higher 16 bits stores the line offset of LOC to the start lineno
358   of DECL, The lower 16 bits stores the discriminator.  */
359
360static unsigned
361get_combined_location (location_t loc, tree decl)
362{
363  /* TODO: allow more bits for line and less bits for discriminator.  */
364  if (LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl) >= (1<<16))
365    warning_at (loc, OPT_Woverflow, "offset exceeds 16 bytes");
366  return ((LOCATION_LINE (loc) - DECL_SOURCE_LINE (decl)) << 16);
367}
368
369/* Return the function decl of a given lexical BLOCK.  */
370
371static tree
372get_function_decl_from_block (tree block)
373{
374  if (!inlined_function_outer_scope_p (block))
375    return NULL_TREE;
376
377  return BLOCK_ABSTRACT_ORIGIN (block);
378}
379
380/* Store inline stack for STMT in STACK.  */
381
382static void
383get_inline_stack (location_t locus, inline_stack *stack)
384{
385  if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
386    return;
387
388  tree block = LOCATION_BLOCK (locus);
389  if (block && TREE_CODE (block) == BLOCK)
390    {
391      int level = 0;
392      for (block = BLOCK_SUPERCONTEXT (block);
393           block && (TREE_CODE (block) == BLOCK);
394           block = BLOCK_SUPERCONTEXT (block))
395        {
396          location_t tmp_locus = BLOCK_SOURCE_LOCATION (block);
397          if (LOCATION_LOCUS (tmp_locus) == UNKNOWN_LOCATION)
398            continue;
399
400          tree decl = get_function_decl_from_block (block);
401          stack->safe_push (
402              std::make_pair (decl, get_combined_location (locus, decl)));
403          locus = tmp_locus;
404          level++;
405        }
406    }
407  stack->safe_push (
408      std::make_pair (current_function_decl,
409                      get_combined_location (locus, current_function_decl)));
410}
411
412/* Return STMT's combined location, which is a 32bit integer in which
413   higher 16 bits stores the line offset of LOC to the start lineno
414   of DECL, The lower 16 bits stores the discriminator.  */
415
416static unsigned
417get_relative_location_for_stmt (gimple *stmt)
418{
419  location_t locus = gimple_location (stmt);
420  if (LOCATION_LOCUS (locus) == UNKNOWN_LOCATION)
421    return UNKNOWN_LOCATION;
422
423  for (tree block = gimple_block (stmt); block && (TREE_CODE (block) == BLOCK);
424       block = BLOCK_SUPERCONTEXT (block))
425    if (LOCATION_LOCUS (BLOCK_SOURCE_LOCATION (block)) != UNKNOWN_LOCATION)
426      return get_combined_location (locus,
427                                    get_function_decl_from_block (block));
428  return get_combined_location (locus, current_function_decl);
429}
430
431/* Return true if BB contains indirect call.  */
432
433static bool
434has_indirect_call (basic_block bb)
435{
436  gimple_stmt_iterator gsi;
437
438  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
439    {
440      gimple *stmt = gsi_stmt (gsi);
441      if (gimple_code (stmt) == GIMPLE_CALL && !gimple_call_internal_p (stmt)
442          && (gimple_call_fn (stmt) == NULL
443              || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))
444        return true;
445    }
446  return false;
447}
448
449/* Member functions for string_table.  */
450
451/* Deconstructor.  */
452
453string_table::~string_table ()
454{
455  for (unsigned i = 0; i < vector_.length (); i++)
456    free (vector_[i]);
457}
458
459
460/* Return the index of a given function NAME. Return -1 if NAME is not
461   found in string table.  */
462
463int
464string_table::get_index (const char *name) const
465{
466  if (name == NULL)
467    return -1;
468  string_index_map::const_iterator iter = map_.find (name);
469  if (iter == map_.end ())
470    return -1;
471
472  return iter->second;
473}
474
475/* Return the index of a given function DECL. Return -1 if DECL is not
476   found in string table.  */
477
478int
479string_table::get_index_by_decl (tree decl) const
480{
481  char *name
482      = get_original_name (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)));
483  int ret = get_index (name);
484  free (name);
485  if (ret != -1)
486    return ret;
487  ret = get_index (lang_hooks.dwarf_name (decl, 0));
488  if (ret != -1)
489    return ret;
490  if (DECL_FROM_INLINE (decl))
491    return get_index_by_decl (DECL_ABSTRACT_ORIGIN (decl));
492
493  return -1;
494}
495
496/* Return the function name of a given INDEX.  */
497
498const char *
499string_table::get_name (int index) const
500{
501  gcc_assert (index > 0 && index < (int)vector_.length ());
502  return vector_[index];
503}
504
505/* Read the string table. Return TRUE if reading is successful.  */
506
507bool
508string_table::read ()
509{
510  if (gcov_read_unsigned () != GCOV_TAG_AFDO_FILE_NAMES)
511    return false;
512  /* Skip the length of the section.  */
513  gcov_read_unsigned ();
514  /* Read in the file name table.  */
515  unsigned string_num = gcov_read_unsigned ();
516  for (unsigned i = 0; i < string_num; i++)
517    {
518      vector_.safe_push (get_original_name (gcov_read_string ()));
519      map_[vector_.last ()] = i;
520    }
521  return true;
522}
523
524/* Member functions for function_instance.  */
525
526function_instance::~function_instance ()
527{
528  for (callsite_map::iterator iter = callsites.begin ();
529       iter != callsites.end (); ++iter)
530    delete iter->second;
531}
532
533/* Traverse callsites of the current function_instance to find one at the
534   location of LINENO and callee name represented in DECL.  */
535
536function_instance *
537function_instance::get_function_instance_by_decl (unsigned lineno,
538                                                  tree decl) const
539{
540  int func_name_idx = afdo_string_table->get_index_by_decl (decl);
541  if (func_name_idx != -1)
542    {
543      callsite_map::const_iterator ret
544          = callsites.find (std::make_pair (lineno, func_name_idx));
545      if (ret != callsites.end ())
546        return ret->second;
547    }
548  func_name_idx
549      = afdo_string_table->get_index (lang_hooks.dwarf_name (decl, 0));
550  if (func_name_idx != -1)
551    {
552      callsite_map::const_iterator ret
553          = callsites.find (std::make_pair (lineno, func_name_idx));
554      if (ret != callsites.end ())
555        return ret->second;
556    }
557  if (DECL_FROM_INLINE (decl))
558    return get_function_instance_by_decl (lineno, DECL_ABSTRACT_ORIGIN (decl));
559
560  return NULL;
561}
562
563/* Store the profile info for LOC in INFO. Return TRUE if profile info
564   is found.  */
565
566bool
567function_instance::get_count_info (location_t loc, count_info *info) const
568{
569  position_count_map::const_iterator iter = pos_counts.find (loc);
570  if (iter == pos_counts.end ())
571    return false;
572  *info = iter->second;
573  return true;
574}
575
576/* Mark LOC as annotated.  */
577
578void
579function_instance::mark_annotated (location_t loc)
580{
581  position_count_map::iterator iter = pos_counts.find (loc);
582  if (iter == pos_counts.end ())
583    return;
584  iter->second.annotated = true;
585}
586
587/* Read the inlined indirect call target profile for STMT and store it in
588   MAP, return the total count for all inlined indirect calls.  */
589
590gcov_type
591function_instance::find_icall_target_map (gcall *stmt,
592                                          icall_target_map *map) const
593{
594  gcov_type ret = 0;
595  unsigned stmt_offset = get_relative_location_for_stmt (stmt);
596
597  for (callsite_map::const_iterator iter = callsites.begin ();
598       iter != callsites.end (); ++iter)
599    {
600      unsigned callee = iter->second->name ();
601      /* Check if callsite location match the stmt.  */
602      if (iter->first.first != stmt_offset)
603        continue;
604      struct cgraph_node *node = cgraph_node::get_for_asmname (
605          get_identifier (afdo_string_table->get_name (callee)));
606      if (node == NULL)
607        continue;
608      (*map)[callee] = iter->second->total_count ();
609      ret += iter->second->total_count ();
610    }
611  return ret;
612}
613
614/* Read the profile and create a function_instance with head count as
615   HEAD_COUNT. Recursively read callsites to create nested function_instances
616   too. STACK is used to track the recursive creation process.  */
617
618/* function instance profile format:
619
620   ENTRY_COUNT: 8 bytes
621   NAME_INDEX: 4 bytes
622   NUM_POS_COUNTS: 4 bytes
623   NUM_CALLSITES: 4 byte
624   POS_COUNT_1:
625     POS_1_OFFSET: 4 bytes
626     NUM_TARGETS: 4 bytes
627     COUNT: 8 bytes
628     TARGET_1:
629       VALUE_PROFILE_TYPE: 4 bytes
630       TARGET_IDX: 8 bytes
631       COUNT: 8 bytes
632     TARGET_2
633     ...
634     TARGET_n
635   POS_COUNT_2
636   ...
637   POS_COUNT_N
638   CALLSITE_1:
639     CALLSITE_1_OFFSET: 4 bytes
640     FUNCTION_INSTANCE_PROFILE (nested)
641   CALLSITE_2
642   ...
643   CALLSITE_n.  */
644
645function_instance *
646function_instance::read_function_instance (function_instance_stack *stack,
647                                           gcov_type head_count)
648{
649  unsigned name = gcov_read_unsigned ();
650  unsigned num_pos_counts = gcov_read_unsigned ();
651  unsigned num_callsites = gcov_read_unsigned ();
652  function_instance *s = new function_instance (name, head_count);
653  stack->safe_push (s);
654
655  for (unsigned i = 0; i < num_pos_counts; i++)
656    {
657      unsigned offset = gcov_read_unsigned () & 0xffff0000;
658      unsigned num_targets = gcov_read_unsigned ();
659      gcov_type count = gcov_read_counter ();
660      s->pos_counts[offset].count = count;
661      for (unsigned j = 0; j < stack->length (); j++)
662        (*stack)[j]->total_count_ += count;
663      for (unsigned j = 0; j < num_targets; j++)
664        {
665          /* Only indirect call target histogram is supported now.  */
666          gcov_read_unsigned ();
667          gcov_type target_idx = gcov_read_counter ();
668          s->pos_counts[offset].targets[target_idx] = gcov_read_counter ();
669        }
670    }
671  for (unsigned i = 0; i < num_callsites; i++)
672    {
673      unsigned offset = gcov_read_unsigned ();
674      function_instance *callee_function_instance
675          = read_function_instance (stack, 0);
676      s->callsites[std::make_pair (offset, callee_function_instance->name ())]
677          = callee_function_instance;
678    }
679  stack->pop ();
680  return s;
681}
682
683/* Sum of counts that is used during annotation.  */
684
685gcov_type
686function_instance::total_annotated_count () const
687{
688  gcov_type ret = 0;
689  for (callsite_map::const_iterator iter = callsites.begin ();
690       iter != callsites.end (); ++iter)
691    ret += iter->second->total_annotated_count ();
692  for (position_count_map::const_iterator iter = pos_counts.begin ();
693       iter != pos_counts.end (); ++iter)
694    if (iter->second.annotated)
695      ret += iter->second.count;
696  return ret;
697}
698
699/* Member functions for autofdo_source_profile.  */
700
701autofdo_source_profile::~autofdo_source_profile ()
702{
703  for (name_function_instance_map::const_iterator iter = map_.begin ();
704       iter != map_.end (); ++iter)
705    delete iter->second;
706}
707
708/* For a given DECL, returns the top-level function_instance.  */
709
710function_instance *
711autofdo_source_profile::get_function_instance_by_decl (tree decl) const
712{
713  int index = afdo_string_table->get_index_by_decl (decl);
714  if (index == -1)
715    return NULL;
716  name_function_instance_map::const_iterator ret = map_.find (index);
717  return ret == map_.end () ? NULL : ret->second;
718}
719
720/* Find count_info for a given gimple STMT. If found, store the count_info
721   in INFO and return true; otherwise return false.  */
722
723bool
724autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
725{
726  if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
727    return false;
728
729  inline_stack stack;
730  get_inline_stack (gimple_location (stmt), &stack);
731  if (stack.length () == 0)
732    return false;
733  function_instance *s = get_function_instance_by_inline_stack (stack);
734  if (s == NULL)
735    return false;
736  return s->get_count_info (stack[0].second, info);
737}
738
739/* Mark LOC as annotated.  */
740
741void
742autofdo_source_profile::mark_annotated (location_t loc)
743{
744  inline_stack stack;
745  get_inline_stack (loc, &stack);
746  if (stack.length () == 0)
747    return;
748  function_instance *s = get_function_instance_by_inline_stack (stack);
749  if (s == NULL)
750    return;
751  s->mark_annotated (stack[0].second);
752}
753
754/* Update value profile INFO for STMT from the inlined indirect callsite.
755   Return true if INFO is updated.  */
756
757bool
758autofdo_source_profile::update_inlined_ind_target (gcall *stmt,
759                                                   count_info *info)
760{
761  if (dump_file)
762    {
763      fprintf (dump_file, "Checking indirect call -> direct call ");
764      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
765    }
766
767  if (LOCATION_LOCUS (gimple_location (stmt)) == cfun->function_end_locus)
768    {
769      if (dump_file)
770	fprintf (dump_file, " good locus\n");
771      return false;
772    }
773
774  count_info old_info;
775  get_count_info (stmt, &old_info);
776  gcov_type total = 0;
777  for (icall_target_map::const_iterator iter = old_info.targets.begin ();
778       iter != old_info.targets.end (); ++iter)
779    total += iter->second;
780
781  /* Program behavior changed, original promoted (and inlined) target is not
782     hot any more. Will avoid promote the original target.
783
784     To check if original promoted target is still hot, we check the total
785     count of the unpromoted targets (stored in TOTAL). If a callsite count
786     (stored in INFO) is smaller than half of the total count, the original
787     promoted target is considered not hot any more.  */
788  if (info->count < total / 2)
789    {
790      if (dump_file)
791	fprintf (dump_file, " not hot anymore %ld < %ld",
792		 (long)info->count,
793		 (long)total /2);
794      return false;
795    }
796
797  inline_stack stack;
798  get_inline_stack (gimple_location (stmt), &stack);
799  if (stack.length () == 0)
800    {
801      if (dump_file)
802	fprintf (dump_file, " no inline stack\n");
803      return false;
804    }
805  function_instance *s = get_function_instance_by_inline_stack (stack);
806  if (s == NULL)
807    {
808      if (dump_file)
809	fprintf (dump_file, " function not found in inline stack\n");
810      return false;
811    }
812  icall_target_map map;
813  if (s->find_icall_target_map (stmt, &map) == 0)
814    {
815      if (dump_file)
816	fprintf (dump_file, " no target map\n");
817      return false;
818    }
819  for (icall_target_map::const_iterator iter = map.begin ();
820       iter != map.end (); ++iter)
821    info->targets[iter->first] = iter->second;
822  if (dump_file)
823    fprintf (dump_file, " looks good\n");
824  return true;
825}
826
827/* Find total count of the callee of EDGE.  */
828
829gcov_type
830autofdo_source_profile::get_callsite_total_count (
831    struct cgraph_edge *edge) const
832{
833  inline_stack stack;
834  stack.safe_push (std::make_pair (edge->callee->decl, 0));
835  get_inline_stack (gimple_location (edge->call_stmt), &stack);
836
837  function_instance *s = get_function_instance_by_inline_stack (stack);
838  if (s == NULL
839      || afdo_string_table->get_index (IDENTIFIER_POINTER (
840             DECL_ASSEMBLER_NAME (edge->callee->decl))) != s->name ())
841    return 0;
842
843  return s->total_count ();
844}
845
846/* Read AutoFDO profile and returns TRUE on success.  */
847
848/* source profile format:
849
850   GCOV_TAG_AFDO_FUNCTION: 4 bytes
851   LENGTH: 4 bytes
852   NUM_FUNCTIONS: 4 bytes
853   FUNCTION_INSTANCE_1
854   FUNCTION_INSTANCE_2
855   ...
856   FUNCTION_INSTANCE_N.  */
857
858bool
859autofdo_source_profile::read ()
860{
861  if (gcov_read_unsigned () != GCOV_TAG_AFDO_FUNCTION)
862    {
863      inform (UNKNOWN_LOCATION, "Not expected TAG.");
864      return false;
865    }
866
867  /* Skip the length of the section.  */
868  gcov_read_unsigned ();
869
870  /* Read in the function/callsite profile, and store it in local
871     data structure.  */
872  unsigned function_num = gcov_read_unsigned ();
873  for (unsigned i = 0; i < function_num; i++)
874    {
875      function_instance::function_instance_stack stack;
876      function_instance *s = function_instance::read_function_instance (
877          &stack, gcov_read_counter ());
878      map_[s->name ()] = s;
879    }
880  return true;
881}
882
883/* Return the function_instance in the profile that correspond to the
884   inline STACK.  */
885
886function_instance *
887autofdo_source_profile::get_function_instance_by_inline_stack (
888    const inline_stack &stack) const
889{
890  name_function_instance_map::const_iterator iter = map_.find (
891      afdo_string_table->get_index_by_decl (stack[stack.length () - 1].first));
892  if (iter == map_.end())
893    return NULL;
894  function_instance *s = iter->second;
895  for (unsigned i = stack.length() - 1; i > 0; i--)
896    {
897      s = s->get_function_instance_by_decl (
898          stack[i].second, stack[i - 1].first);
899      if (s == NULL)
900        return NULL;
901    }
902  return s;
903}
904
905/* Module profile is only used by LIPO. Here we simply ignore it.  */
906
907static void
908fake_read_autofdo_module_profile ()
909{
910  /* Read in the module info.  */
911  gcov_read_unsigned ();
912
913  /* Skip the length of the section.  */
914  gcov_read_unsigned ();
915
916  /* Read in the file name table.  */
917  unsigned total_module_num = gcov_read_unsigned ();
918  gcc_assert (total_module_num == 0);
919}
920
921/* Read data from profile data file.  */
922
923static void
924read_profile (void)
925{
926  if (gcov_open (auto_profile_file, 1) == 0)
927    {
928      error ("cannot open profile file %s", auto_profile_file);
929      return;
930    }
931
932  if (gcov_read_unsigned () != GCOV_DATA_MAGIC)
933    {
934      error ("AutoFDO profile magic number does not match");
935      return;
936    }
937
938  /* Skip the version number.  */
939  unsigned version = gcov_read_unsigned ();
940  if (version != AUTO_PROFILE_VERSION)
941    {
942      error ("AutoFDO profile version %u does not match %u",
943	     version, AUTO_PROFILE_VERSION);
944      return;
945    }
946
947  /* Skip the empty integer.  */
948  gcov_read_unsigned ();
949
950  /* string_table.  */
951  afdo_string_table = new string_table ();
952  if (!afdo_string_table->read())
953    {
954      error ("cannot read string table from %s", auto_profile_file);
955      return;
956    }
957
958  /* autofdo_source_profile.  */
959  afdo_source_profile = autofdo_source_profile::create ();
960  if (afdo_source_profile == NULL)
961    {
962      error ("cannot read function profile from %s", auto_profile_file);
963      return;
964    }
965
966  /* autofdo_module_profile.  */
967  fake_read_autofdo_module_profile ();
968}
969
970/* From AutoFDO profiles, find values inside STMT for that we want to measure
971   histograms for indirect-call optimization.
972
973   This function is actually served for 2 purposes:
974     * before annotation, we need to mark histogram, promote and inline
975     * after annotation, we just need to mark, and let follow-up logic to
976       decide if it needs to promote and inline.  */
977
978static bool
979afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
980                    bool transform)
981{
982  gimple *gs = gsi_stmt (*gsi);
983  tree callee;
984
985  if (map.size () == 0)
986    return false;
987  gcall *stmt = dyn_cast <gcall *> (gs);
988  if (!stmt
989      || gimple_call_internal_p (stmt)
990      || gimple_call_fndecl (stmt) != NULL_TREE)
991    return false;
992
993  gcov_type total = 0;
994  icall_target_map::const_iterator max_iter = map.end ();
995
996  for (icall_target_map::const_iterator iter = map.begin ();
997       iter != map.end (); ++iter)
998    {
999      total += iter->second;
1000      if (max_iter == map.end () || max_iter->second < iter->second)
1001        max_iter = iter;
1002    }
1003  struct cgraph_node *direct_call = cgraph_node::get_for_asmname (
1004      get_identifier (afdo_string_table->get_name (max_iter->first)));
1005  if (direct_call == NULL || !direct_call->profile_id)
1006    return false;
1007
1008  callee = gimple_call_fn (stmt);
1009
1010  histogram_value hist = gimple_alloc_histogram_value (
1011      cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
1012  hist->n_counters = 4;
1013  hist->hvalue.counters = XNEWVEC (gcov_type, hist->n_counters);
1014  gimple_add_histogram_value (cfun, stmt, hist);
1015
1016  /* Total counter */
1017  hist->hvalue.counters[0] = total;
1018  /* Number of value/counter pairs */
1019  hist->hvalue.counters[1] = 1;
1020  /* Value */
1021  hist->hvalue.counters[2] = direct_call->profile_id;
1022  /* Counter */
1023  hist->hvalue.counters[3] = max_iter->second;
1024
1025  if (!transform)
1026    return false;
1027
1028  cgraph_node* current_function_node = cgraph_node::get (current_function_decl);
1029
1030  /* If the direct call is a recursive call, don't promote it since
1031     we are not set up to inline recursive calls at this stage. */
1032  if (direct_call == current_function_node)
1033    return false;
1034
1035  struct cgraph_edge *indirect_edge
1036      = current_function_node->get_edge (stmt);
1037
1038  if (dump_file)
1039    {
1040      fprintf (dump_file, "Indirect call -> direct call ");
1041      print_generic_expr (dump_file, callee, TDF_SLIM);
1042      fprintf (dump_file, " => ");
1043      print_generic_expr (dump_file, direct_call->decl, TDF_SLIM);
1044    }
1045
1046  if (direct_call == NULL)
1047    {
1048      if (dump_file)
1049        fprintf (dump_file, " not transforming\n");
1050      return false;
1051    }
1052  if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
1053    {
1054      if (dump_file)
1055        fprintf (dump_file, " no declaration\n");
1056      return false;
1057    }
1058
1059  if (dump_file)
1060    {
1061      fprintf (dump_file, " transformation on insn ");
1062      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1063      fprintf (dump_file, "\n");
1064    }
1065
1066  /* FIXME: Count should be initialized.  */
1067  struct cgraph_edge *new_edge
1068      = indirect_edge->make_speculative (direct_call,
1069					 profile_count::uninitialized ());
1070  cgraph_edge::redirect_call_stmt_to_callee (new_edge);
1071  gimple_remove_histogram_value (cfun, stmt, hist);
1072  inline_call (new_edge, true, NULL, NULL, false);
1073  return true;
1074}
1075
1076/* From AutoFDO profiles, find values inside STMT for that we want to measure
1077   histograms and adds them to list VALUES.  */
1078
1079static bool
1080afdo_vpt (gimple_stmt_iterator *gsi, const icall_target_map &map,
1081          bool transform)
1082{
1083  return afdo_indirect_call (gsi, map, transform);
1084}
1085
1086typedef std::set<basic_block> bb_set;
1087typedef std::set<edge> edge_set;
1088
1089static bool
1090is_bb_annotated (const basic_block bb, const bb_set &annotated)
1091{
1092  return annotated.find (bb) != annotated.end ();
1093}
1094
1095static void
1096set_bb_annotated (basic_block bb, bb_set *annotated)
1097{
1098  annotated->insert (bb);
1099}
1100
1101/* For a given BB, set its execution count. Attach value profile if a stmt
1102   is not in PROMOTED, because we only want to promote an indirect call once.
1103   Return TRUE if BB is annotated.  */
1104
1105static bool
1106afdo_set_bb_count (basic_block bb, const stmt_set &promoted)
1107{
1108  gimple_stmt_iterator gsi;
1109  edge e;
1110  edge_iterator ei;
1111  gcov_type max_count = 0;
1112  bool has_annotated = false;
1113
1114  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1115    {
1116      count_info info;
1117      gimple *stmt = gsi_stmt (gsi);
1118      if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1119        continue;
1120      if (afdo_source_profile->get_count_info (stmt, &info))
1121        {
1122          if (info.count > max_count)
1123            max_count = info.count;
1124          has_annotated = true;
1125          if (info.targets.size () > 0
1126              && promoted.find (stmt) == promoted.end ())
1127            afdo_vpt (&gsi, info.targets, false);
1128        }
1129    }
1130
1131  if (!has_annotated)
1132    return false;
1133
1134  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1135    afdo_source_profile->mark_annotated (gimple_location (gsi_stmt (gsi)));
1136  for (gphi_iterator gpi = gsi_start_phis (bb);
1137       !gsi_end_p (gpi);
1138       gsi_next (&gpi))
1139    {
1140      gphi *phi = gpi.phi ();
1141      size_t i;
1142      for (i = 0; i < gimple_phi_num_args (phi); i++)
1143        afdo_source_profile->mark_annotated (gimple_phi_arg_location (phi, i));
1144    }
1145  FOR_EACH_EDGE (e, ei, bb->succs)
1146  afdo_source_profile->mark_annotated (e->goto_locus);
1147
1148  bb->count = profile_count::from_gcov_type (max_count).afdo ();
1149  return true;
1150}
1151
1152/* BB1 and BB2 are in an equivalent class iff:
1153   1. BB1 dominates BB2.
1154   2. BB2 post-dominates BB1.
1155   3. BB1 and BB2 are in the same loop nest.
1156   This function finds the equivalent class for each basic block, and
1157   stores a pointer to the first BB in its equivalent class. Meanwhile,
1158   set bb counts for the same equivalent class to be idenical. Update
1159   ANNOTATED_BB for the first BB in its equivalent class.  */
1160
1161static void
1162afdo_find_equiv_class (bb_set *annotated_bb)
1163{
1164  basic_block bb;
1165
1166  FOR_ALL_BB_FN (bb, cfun)
1167  bb->aux = NULL;
1168
1169  FOR_ALL_BB_FN (bb, cfun)
1170  {
1171    if (bb->aux != NULL)
1172      continue;
1173    bb->aux = bb;
1174    for (basic_block bb1 : get_dominated_by (CDI_DOMINATORS, bb))
1175      if (bb1->aux == NULL && dominated_by_p (CDI_POST_DOMINATORS, bb, bb1)
1176	  && bb1->loop_father == bb->loop_father)
1177	{
1178	  bb1->aux = bb;
1179	  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1180	    {
1181	      bb->count = bb1->count;
1182	      set_bb_annotated (bb, annotated_bb);
1183	    }
1184	}
1185
1186    for (basic_block bb1 : get_dominated_by (CDI_POST_DOMINATORS, bb))
1187      if (bb1->aux == NULL && dominated_by_p (CDI_DOMINATORS, bb, bb1)
1188	  && bb1->loop_father == bb->loop_father)
1189	{
1190	  bb1->aux = bb;
1191	  if (bb1->count > bb->count && is_bb_annotated (bb1, *annotated_bb))
1192	    {
1193	      bb->count = bb1->count;
1194	      set_bb_annotated (bb, annotated_bb);
1195	    }
1196	}
1197  }
1198}
1199
1200/* If a basic block's count is known, and only one of its in/out edges' count
1201   is unknown, its count can be calculated. Meanwhile, if all of the in/out
1202   edges' counts are known, then the basic block's unknown count can also be
1203   calculated. Also, if a block has a single predecessor or successor, the block's
1204   count can be propagated to that predecessor or successor.
1205   IS_SUCC is true if out edges of a basic blocks are examined.
1206   Update ANNOTATED_BB accordingly.
1207   Return TRUE if any basic block/edge count is changed.  */
1208
1209static bool
1210afdo_propagate_edge (bool is_succ, bb_set *annotated_bb)
1211{
1212  basic_block bb;
1213  bool changed = false;
1214
1215  FOR_EACH_BB_FN (bb, cfun)
1216  {
1217    edge e, unknown_edge = NULL;
1218    edge_iterator ei;
1219    int num_unknown_edge = 0;
1220    int num_edge = 0;
1221    profile_count total_known_count = profile_count::zero ().afdo ();
1222
1223    FOR_EACH_EDGE (e, ei, is_succ ? bb->succs : bb->preds)
1224      {
1225	gcc_assert (AFDO_EINFO (e) != NULL);
1226	if (! AFDO_EINFO (e)->is_annotated ())
1227	  num_unknown_edge++, unknown_edge = e;
1228	else
1229	  total_known_count += AFDO_EINFO (e)->get_count ();
1230	num_edge++;
1231      }
1232
1233    /* Be careful not to annotate block with no successor in special cases.  */
1234    if (num_unknown_edge == 0 && total_known_count > bb->count)
1235      {
1236	bb->count = total_known_count;
1237	if (!is_bb_annotated (bb, *annotated_bb))
1238	  set_bb_annotated (bb, annotated_bb);
1239	changed = true;
1240      }
1241    else if (num_unknown_edge == 1 && is_bb_annotated (bb, *annotated_bb))
1242      {
1243	if (bb->count > total_known_count)
1244	  {
1245	      profile_count new_count = bb->count - total_known_count;
1246	      AFDO_EINFO(unknown_edge)->set_count(new_count);
1247	      if (num_edge == 1)
1248		{
1249		  basic_block succ_or_pred_bb = is_succ ? unknown_edge->dest : unknown_edge->src;
1250		  if (new_count > succ_or_pred_bb->count)
1251		    {
1252		      succ_or_pred_bb->count = new_count;
1253		      if (!is_bb_annotated (succ_or_pred_bb, *annotated_bb))
1254			set_bb_annotated (succ_or_pred_bb, annotated_bb);
1255		    }
1256		}
1257	   }
1258	else
1259	  AFDO_EINFO (unknown_edge)->set_count (profile_count::zero().afdo ());
1260	AFDO_EINFO (unknown_edge)->set_annotated ();
1261	changed = true;
1262      }
1263  }
1264  return changed;
1265}
1266
1267/* Special propagation for circuit expressions. Because GCC translates
1268   control flow into data flow for circuit expressions. E.g.
1269   BB1:
1270   if (a && b)
1271     BB2
1272   else
1273     BB3
1274
1275   will be translated into:
1276
1277   BB1:
1278     if (a)
1279       goto BB.t1
1280     else
1281       goto BB.t3
1282   BB.t1:
1283     if (b)
1284       goto BB.t2
1285     else
1286       goto BB.t3
1287   BB.t2:
1288     goto BB.t3
1289   BB.t3:
1290     tmp = PHI (0 (BB1), 0 (BB.t1), 1 (BB.t2)
1291     if (tmp)
1292       goto BB2
1293     else
1294       goto BB3
1295
1296   In this case, we need to propagate through PHI to determine the edge
1297   count of BB1->BB.t1, BB.t1->BB.t2.  */
1298
1299static void
1300afdo_propagate_circuit (const bb_set &annotated_bb)
1301{
1302  basic_block bb;
1303  FOR_ALL_BB_FN (bb, cfun)
1304  {
1305    gimple *def_stmt;
1306    tree cmp_rhs, cmp_lhs;
1307    gimple *cmp_stmt = last_stmt (bb);
1308    edge e;
1309    edge_iterator ei;
1310
1311    if (!cmp_stmt || gimple_code (cmp_stmt) != GIMPLE_COND)
1312      continue;
1313    cmp_rhs = gimple_cond_rhs (cmp_stmt);
1314    cmp_lhs = gimple_cond_lhs (cmp_stmt);
1315    if (!TREE_CONSTANT (cmp_rhs)
1316        || !(integer_zerop (cmp_rhs) || integer_onep (cmp_rhs)))
1317      continue;
1318    if (TREE_CODE (cmp_lhs) != SSA_NAME)
1319      continue;
1320    if (!is_bb_annotated (bb, annotated_bb))
1321      continue;
1322    def_stmt = SSA_NAME_DEF_STMT (cmp_lhs);
1323    while (def_stmt && gimple_code (def_stmt) == GIMPLE_ASSIGN
1324           && gimple_assign_single_p (def_stmt)
1325           && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
1326      def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
1327    if (!def_stmt)
1328      continue;
1329    gphi *phi_stmt = dyn_cast <gphi *> (def_stmt);
1330    if (!phi_stmt)
1331      continue;
1332    FOR_EACH_EDGE (e, ei, bb->succs)
1333    {
1334      unsigned i, total = 0;
1335      edge only_one;
1336      bool check_value_one = (((integer_onep (cmp_rhs))
1337                               ^ (gimple_cond_code (cmp_stmt) == EQ_EXPR))
1338                              ^ ((e->flags & EDGE_TRUE_VALUE) != 0));
1339      if (! AFDO_EINFO (e)->is_annotated ())
1340        continue;
1341      for (i = 0; i < gimple_phi_num_args (phi_stmt); i++)
1342        {
1343          tree val = gimple_phi_arg_def (phi_stmt, i);
1344          edge ep = gimple_phi_arg_edge (phi_stmt, i);
1345
1346          if (!TREE_CONSTANT (val)
1347              || !(integer_zerop (val) || integer_onep (val)))
1348            continue;
1349          if (check_value_one ^ integer_onep (val))
1350            continue;
1351          total++;
1352          only_one = ep;
1353          if (! (AFDO_EINFO (e)->get_count ()).nonzero_p ()
1354	      && ! AFDO_EINFO (ep)->is_annotated ())
1355	    {
1356	      AFDO_EINFO (ep)->set_count (profile_count::zero ().afdo ());
1357	      AFDO_EINFO (ep)->set_annotated ();
1358	    }
1359	}
1360      if (total == 1 && ! AFDO_EINFO (only_one)->is_annotated ())
1361	{
1362	  AFDO_EINFO (only_one)->set_count (AFDO_EINFO (e)->get_count ());
1363	  AFDO_EINFO (only_one)->set_annotated ();
1364	}
1365    }
1366  }
1367}
1368
1369/* Propagate the basic block count and edge count on the control flow
1370   graph. We do the propagation iteratively until stablize.  */
1371
1372static void
1373afdo_propagate (bb_set *annotated_bb)
1374{
1375  basic_block bb;
1376  bool changed = true;
1377  int i = 0;
1378
1379  FOR_ALL_BB_FN (bb, cfun)
1380  {
1381    bb->count = ((basic_block)bb->aux)->count;
1382    if (is_bb_annotated ((basic_block)bb->aux, *annotated_bb))
1383      set_bb_annotated (bb, annotated_bb);
1384  }
1385
1386  while (changed && i++ < 10)
1387    {
1388      changed = false;
1389
1390      if (afdo_propagate_edge (true, annotated_bb))
1391        changed = true;
1392      if (afdo_propagate_edge (false, annotated_bb))
1393        changed = true;
1394      afdo_propagate_circuit (*annotated_bb);
1395    }
1396}
1397
1398/* Propagate counts on control flow graph and calculate branch
1399   probabilities.  */
1400
1401static void
1402afdo_calculate_branch_prob (bb_set *annotated_bb)
1403{
1404  edge e;
1405  edge_iterator ei;
1406  basic_block bb;
1407
1408  calculate_dominance_info (CDI_POST_DOMINATORS);
1409  calculate_dominance_info (CDI_DOMINATORS);
1410  loop_optimizer_init (0);
1411
1412  FOR_ALL_BB_FN (bb, cfun)
1413    {
1414      gcc_assert (bb->aux == NULL);
1415      FOR_EACH_EDGE (e, ei, bb->succs)
1416	{
1417	  gcc_assert (e->aux == NULL);
1418	  e->aux = new edge_info ();
1419	}
1420    }
1421
1422  afdo_find_equiv_class (annotated_bb);
1423  afdo_propagate (annotated_bb);
1424
1425  FOR_EACH_BB_FN (bb, cfun)
1426  {
1427    int num_unknown_succ = 0;
1428    profile_count total_count = profile_count::zero ().afdo ();
1429
1430    FOR_EACH_EDGE (e, ei, bb->succs)
1431    {
1432      gcc_assert (AFDO_EINFO (e) != NULL);
1433      if (! AFDO_EINFO (e)->is_annotated ())
1434        num_unknown_succ++;
1435      else
1436        total_count += AFDO_EINFO (e)->get_count ();
1437    }
1438    if (num_unknown_succ == 0 && total_count > profile_count::zero ())
1439      {
1440	FOR_EACH_EDGE (e, ei, bb->succs)
1441	  e->probability
1442	    = AFDO_EINFO (e)->get_count ().probability_in (total_count);
1443      }
1444  }
1445  FOR_ALL_BB_FN (bb, cfun)
1446    {
1447      bb->aux = NULL;
1448      FOR_EACH_EDGE (e, ei, bb->succs)
1449	if (AFDO_EINFO (e) != NULL)
1450	  {
1451	    delete AFDO_EINFO (e);
1452	    e->aux = NULL;
1453	  }
1454    }
1455
1456  loop_optimizer_finalize ();
1457  free_dominance_info (CDI_DOMINATORS);
1458  free_dominance_info (CDI_POST_DOMINATORS);
1459}
1460
1461/* Perform value profile transformation using AutoFDO profile. Add the
1462   promoted stmts to PROMOTED_STMTS. Return TRUE if there is any
1463   indirect call promoted.  */
1464
1465static bool
1466afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
1467{
1468  basic_block bb;
1469  if (afdo_source_profile->get_function_instance_by_decl (
1470          current_function_decl) == NULL)
1471    return false;
1472
1473  compute_fn_summary (cgraph_node::get (current_function_decl), true);
1474
1475  bool has_vpt = false;
1476  FOR_EACH_BB_FN (bb, cfun)
1477  {
1478    if (!has_indirect_call (bb))
1479      continue;
1480    gimple_stmt_iterator gsi;
1481
1482    gcov_type bb_count = 0;
1483    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1484      {
1485        count_info info;
1486	gimple *stmt = gsi_stmt (gsi);
1487        if (afdo_source_profile->get_count_info (stmt, &info))
1488          bb_count = MAX (bb_count, info.count);
1489      }
1490
1491    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1492      {
1493        gcall *stmt = dyn_cast <gcall *> (gsi_stmt (gsi));
1494        /* IC_promotion and early_inline_2 is done in multiple iterations.
1495           No need to promoted the stmt if its in promoted_stmts (means
1496           it is already been promoted in the previous iterations).  */
1497        if ((!stmt) || gimple_call_fn (stmt) == NULL
1498            || TREE_CODE (gimple_call_fn (stmt)) == FUNCTION_DECL
1499            || promoted_stmts->find (stmt) != promoted_stmts->end ())
1500          continue;
1501
1502        count_info info;
1503        afdo_source_profile->get_count_info (stmt, &info);
1504        info.count = bb_count;
1505        if (afdo_source_profile->update_inlined_ind_target (stmt, &info))
1506          {
1507            /* Promote the indirect call and update the promoted_stmts.  */
1508            promoted_stmts->insert (stmt);
1509            if (afdo_vpt (&gsi, info.targets, true))
1510              has_vpt = true;
1511          }
1512      }
1513  }
1514
1515  if (has_vpt)
1516    {
1517      unsigned todo = optimize_inline_calls (current_function_decl);
1518      if (todo & TODO_update_ssa_any)
1519       update_ssa (TODO_update_ssa);
1520      return true;
1521    }
1522
1523  return false;
1524}
1525
1526/* Annotate auto profile to the control flow graph. Do not annotate value
1527   profile for stmts in PROMOTED_STMTS.  */
1528
1529static void
1530afdo_annotate_cfg (const stmt_set &promoted_stmts)
1531{
1532  basic_block bb;
1533  bb_set annotated_bb;
1534  const function_instance *s
1535      = afdo_source_profile->get_function_instance_by_decl (
1536          current_function_decl);
1537
1538  if (s == NULL)
1539    return;
1540  cgraph_node::get (current_function_decl)->count
1541     = profile_count::from_gcov_type (s->head_count ()).afdo ();
1542  ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1543     = profile_count::from_gcov_type (s->head_count ()).afdo ();
1544  EXIT_BLOCK_PTR_FOR_FN (cfun)->count = profile_count::zero ().afdo ();
1545  profile_count max_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1546
1547  FOR_EACH_BB_FN (bb, cfun)
1548    {
1549      /* As autoFDO uses sampling approach, we have to assume that all
1550	 counters are zero when not seen by autoFDO.  */
1551      bb->count = profile_count::zero ().afdo ();
1552      if (afdo_set_bb_count (bb, promoted_stmts))
1553	set_bb_annotated (bb, &annotated_bb);
1554      if (bb->count > max_count)
1555	max_count = bb->count;
1556    }
1557  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1558      > ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count)
1559    {
1560      ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb->count
1561          = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1562      set_bb_annotated (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb, &annotated_bb);
1563    }
1564  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count
1565      > EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count)
1566    {
1567      EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb->count
1568          = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
1569      set_bb_annotated (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb, &annotated_bb);
1570    }
1571  afdo_source_profile->mark_annotated (
1572      DECL_SOURCE_LOCATION (current_function_decl));
1573  afdo_source_profile->mark_annotated (cfun->function_start_locus);
1574  afdo_source_profile->mark_annotated (cfun->function_end_locus);
1575  if (max_count > profile_count::zero ())
1576    {
1577      /* Calculate, propagate count and probability information on CFG.  */
1578      afdo_calculate_branch_prob (&annotated_bb);
1579    }
1580  update_max_bb_count ();
1581  profile_status_for_fn (cfun) = PROFILE_READ;
1582  if (flag_value_profile_transformations)
1583    {
1584      gimple_value_profile_transformations ();
1585      free_dominance_info (CDI_DOMINATORS);
1586      free_dominance_info (CDI_POST_DOMINATORS);
1587      update_ssa (TODO_update_ssa);
1588    }
1589}
1590
1591/* Wrapper function to invoke early inliner.  */
1592
1593static void
1594early_inline ()
1595{
1596  compute_fn_summary (cgraph_node::get (current_function_decl), true);
1597  unsigned todo = early_inliner (cfun);
1598  if (todo & TODO_update_ssa_any)
1599    update_ssa (TODO_update_ssa);
1600}
1601
1602/* Use AutoFDO profile to annoate the control flow graph.
1603   Return the todo flag.  */
1604
1605static unsigned int
1606auto_profile (void)
1607{
1608  struct cgraph_node *node;
1609
1610  if (symtab->state == FINISHED)
1611    return 0;
1612
1613  init_node_map (true);
1614  profile_info = autofdo::afdo_profile_info;
1615
1616  FOR_EACH_FUNCTION (node)
1617  {
1618    if (!gimple_has_body_p (node->decl))
1619      continue;
1620
1621    /* Don't profile functions produced for builtin stuff.  */
1622    if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1623      continue;
1624
1625    push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1626
1627    /* First do indirect call promotion and early inline to make the
1628       IR match the profiled binary before actual annotation.
1629
1630       This is needed because an indirect call might have been promoted
1631       and inlined in the profiled binary. If we do not promote and
1632       inline these indirect calls before annotation, the profile for
1633       these promoted functions will be lost.
1634
1635       e.g. foo() --indirect_call--> bar()
1636       In profiled binary, the callsite is promoted and inlined, making
1637       the profile look like:
1638
1639       foo: {
1640         loc_foo_1: count_1
1641         bar@loc_foo_2: {
1642           loc_bar_1: count_2
1643           loc_bar_2: count_3
1644         }
1645       }
1646
1647       Before AutoFDO pass, loc_foo_2 is not promoted thus not inlined.
1648       If we perform annotation on it, the profile inside bar@loc_foo2
1649       will be wasted.
1650
1651       To avoid this, we promote loc_foo_2 and inline the promoted bar
1652       function before annotation, so the profile inside bar@loc_foo2
1653       will be useful.  */
1654    autofdo::stmt_set promoted_stmts;
1655    for (int i = 0; i < 10; i++)
1656      {
1657        if (!flag_value_profile_transformations
1658            || !autofdo::afdo_vpt_for_early_inline (&promoted_stmts))
1659          break;
1660        early_inline ();
1661      }
1662
1663    early_inline ();
1664    autofdo::afdo_annotate_cfg (promoted_stmts);
1665    compute_function_frequency ();
1666
1667    /* Local pure-const may imply need to fixup the cfg.  */
1668    if (execute_fixup_cfg () & TODO_cleanup_cfg)
1669      cleanup_tree_cfg ();
1670
1671    free_dominance_info (CDI_DOMINATORS);
1672    free_dominance_info (CDI_POST_DOMINATORS);
1673    cgraph_edge::rebuild_edges ();
1674    compute_fn_summary (cgraph_node::get (current_function_decl), true);
1675    pop_cfun ();
1676  }
1677
1678  return TODO_rebuild_cgraph_edges;
1679}
1680} /* namespace autofdo.  */
1681
1682/* Read the profile from the profile data file.  */
1683
1684void
1685read_autofdo_file (void)
1686{
1687  if (auto_profile_file == NULL)
1688    auto_profile_file = DEFAULT_AUTO_PROFILE_FILE;
1689
1690  autofdo::afdo_profile_info = XNEW (gcov_summary);
1691  autofdo::afdo_profile_info->runs = 1;
1692  autofdo::afdo_profile_info->sum_max = 0;
1693
1694  /* Read the profile from the profile file.  */
1695  autofdo::read_profile ();
1696}
1697
1698/* Free the resources.  */
1699
1700void
1701end_auto_profile (void)
1702{
1703  delete autofdo::afdo_source_profile;
1704  delete autofdo::afdo_string_table;
1705  profile_info = NULL;
1706}
1707
1708/* Returns TRUE if EDGE is hot enough to be inlined early.  */
1709
1710bool
1711afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge)
1712{
1713  gcov_type count
1714      = autofdo::afdo_source_profile->get_callsite_total_count (edge);
1715
1716  if (count > 0)
1717    {
1718      bool is_hot;
1719      profile_count pcount = profile_count::from_gcov_type (count).afdo ();
1720      gcov_summary *saved_profile_info = profile_info;
1721      /* At early inline stage, profile_info is not set yet. We need to
1722         temporarily set it to afdo_profile_info to calculate hotness.  */
1723      profile_info = autofdo::afdo_profile_info;
1724      is_hot = maybe_hot_count_p (NULL, pcount);
1725      profile_info = saved_profile_info;
1726      return is_hot;
1727    }
1728
1729  return false;
1730}
1731
1732namespace
1733{
1734
1735const pass_data pass_data_ipa_auto_profile = {
1736  SIMPLE_IPA_PASS, "afdo", /* name */
1737  OPTGROUP_NONE,           /* optinfo_flags */
1738  TV_IPA_AUTOFDO,          /* tv_id */
1739  0,                       /* properties_required */
1740  0,                       /* properties_provided */
1741  0,                       /* properties_destroyed */
1742  0,                       /* todo_flags_start */
1743  0,                       /* todo_flags_finish */
1744};
1745
1746class pass_ipa_auto_profile : public simple_ipa_opt_pass
1747{
1748public:
1749  pass_ipa_auto_profile (gcc::context *ctxt)
1750      : simple_ipa_opt_pass (pass_data_ipa_auto_profile, ctxt)
1751  {
1752  }
1753
1754  /* opt_pass methods: */
1755  virtual bool
1756  gate (function *)
1757  {
1758    return flag_auto_profile;
1759  }
1760  virtual unsigned int
1761  execute (function *)
1762  {
1763    return autofdo::auto_profile ();
1764  }
1765}; // class pass_ipa_auto_profile
1766
1767} // anon namespace
1768
1769simple_ipa_opt_pass *
1770make_pass_ipa_auto_profile (gcc::context *ctxt)
1771{
1772  return new pass_ipa_auto_profile (ctxt);
1773}
1774