1/* cg_print.c -  Print routines for displaying call graphs.
2
3   Copyright (C) 2000-2017 Free Software Foundation, Inc.
4
5   This file is part of GNU Binutils.
6
7   This program is free software; you can redistribute it and/or modify
8   it under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 3 of the License, or
10   (at your option) any later version.
11
12   This program is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   GNU General Public License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with this program; if not, write to the Free Software
19   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
20   02110-1301, USA.  */
21
22#include "gprof.h"
23#include "libiberty.h"
24#include "filenames.h"
25#include "search_list.h"
26#include "source.h"
27#include "symtab.h"
28#include "cg_arcs.h"
29#include "cg_print.h"
30#include "hist.h"
31#include "utils.h"
32#include "corefile.h"
33
34/* Return value of comparison functions used to sort tables.  */
35#define	LESSTHAN	-1
36#define	EQUALTO		0
37#define	GREATERTHAN	1
38
39static void print_header (void);
40static void print_cycle (Sym *);
41static int cmp_member (Sym *, Sym *);
42static void sort_members (Sym *);
43static void print_members (Sym *);
44static int cmp_arc (Arc *, Arc *);
45static void sort_parents (Sym *);
46static void print_parents (Sym *);
47static void sort_children (Sym *);
48static void print_children (Sym *);
49static void print_line (Sym *);
50static int cmp_name (const PTR, const PTR);
51static int cmp_arc_count (const PTR, const PTR);
52static int cmp_fun_nuses (const PTR, const PTR);
53static void order_and_dump_functions_by_arcs
54  (Arc **, unsigned long, int, Arc **, unsigned long *);
55
56/* Declarations of automatically generated functions to output blurbs.  */
57extern void bsd_callg_blurb (FILE * fp);
58extern void fsf_callg_blurb (FILE * fp);
59
60double print_time = 0.0;
61
62
63static void
64print_header (void)
65{
66  if (first_output)
67    first_output = FALSE;
68  else
69    printf ("\f\n");
70
71  if (!bsd_style_output)
72    {
73      if (print_descriptions)
74	printf (_("\t\t     Call graph (explanation follows)\n\n"));
75      else
76	printf (_("\t\t\tCall graph\n\n"));
77    }
78
79  printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
80	  (long) hist_scale * (long) sizeof (UNIT));
81
82  if (print_time > 0.0)
83    printf (_(" for %.2f%% of %.2f seconds\n\n"),
84	    100.0 / print_time, print_time / hz);
85  else
86    {
87      printf (_(" no time propagated\n\n"));
88
89      /* This doesn't hurt, since all the numerators will be 0.0.  */
90      print_time = 1.0;
91    }
92
93  if (bsd_style_output)
94    {
95      printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
96	      "", "", "", "", _("called"), _("total"), _("parents"));
97      printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
98	      _("index"),
99	      /* xgettext:no-c-format */
100	      _("%time"),
101	      _("self"), _("descendants"), _("called"), _("self"),
102	      _("name"), _("index"));
103      printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
104	      "", "", "", "", _("called"), _("total"), _("children"));
105      printf ("\n");
106    }
107  else
108    {
109      printf (_("index %% time    self  children    called     name\n"));
110    }
111}
112
113/* Print a cycle header.  */
114
115static void
116print_cycle (Sym *cyc)
117{
118  char buf[BUFSIZ];
119
120  sprintf (buf, "[%d]", cyc->cg.index);
121  printf (bsd_style_output
122	  ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
123	  : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
124	  100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
125	  cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
126
127  if (cyc->cg.self_calls != 0)
128    printf ("+%-7lu", cyc->cg.self_calls);
129  else
130    printf (" %7.7s", "");
131
132  printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
133}
134
135/* Compare LEFT and RIGHT membmer.  Major comparison key is
136   CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS.  */
137
138static int
139cmp_member (Sym *left, Sym *right)
140{
141  double left_time = left->cg.prop.self + left->cg.prop.child;
142  double right_time = right->cg.prop.self + right->cg.prop.child;
143  unsigned long left_calls = left->ncalls + left->cg.self_calls;
144  unsigned long right_calls = right->ncalls + right->cg.self_calls;
145
146  if (left_time > right_time)
147    return GREATERTHAN;
148
149  if (left_time < right_time)
150    return LESSTHAN;
151
152  if (left_calls > right_calls)
153    return GREATERTHAN;
154
155  if (left_calls < right_calls)
156    return LESSTHAN;
157
158  return EQUALTO;
159}
160
161/* Sort members of a cycle.  */
162
163static void
164sort_members (Sym *cyc)
165{
166  Sym *todo, *doing, *prev;
167
168  /* Detach cycle members from cyclehead,
169     and insertion sort them back on.  */
170  todo = cyc->cg.cyc.next;
171  cyc->cg.cyc.next = 0;
172
173  for (doing = todo; doing != NULL; doing = todo)
174    {
175      todo = doing->cg.cyc.next;
176
177      for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
178	{
179	  if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
180	    break;
181	}
182
183      doing->cg.cyc.next = prev->cg.cyc.next;
184      prev->cg.cyc.next = doing;
185    }
186}
187
188/* Print the members of a cycle.  */
189
190static void
191print_members (Sym *cyc)
192{
193  Sym *member;
194
195  sort_members (cyc);
196
197  for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
198    {
199      printf (bsd_style_output
200	      ? "%6.6s %5.5s %7.2f %11.2f %7lu"
201	      : "%6.6s %5.5s %7.2f %7.2f %7lu",
202	      "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
203	      member->ncalls);
204
205      if (member->cg.self_calls != 0)
206	printf ("+%-7lu", member->cg.self_calls);
207      else
208	printf (" %7.7s", "");
209
210      printf ("     ");
211      print_name (member);
212      printf ("\n");
213    }
214}
215
216/* Compare two arcs to/from the same child/parent.
217	- if one arc is a self arc, it's least.
218	- if one arc is within a cycle, it's less than.
219	- if both arcs are within a cycle, compare arc counts.
220	- if neither arc is within a cycle, compare with
221		time + child_time as major key
222		arc count as minor key.  */
223
224static int
225cmp_arc (Arc *left, Arc *right)
226{
227  Sym *left_parent = left->parent;
228  Sym *left_child = left->child;
229  Sym *right_parent = right->parent;
230  Sym *right_child = right->child;
231  double left_time, right_time;
232
233  DBG (TIMEDEBUG,
234       printf ("[cmp_arc] ");
235       print_name (left_parent);
236       printf (" calls ");
237       print_name (left_child);
238       printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
239	       left->count, left_child->ncalls);
240       printf ("[cmp_arc] ");
241       print_name (right_parent);
242       printf (" calls ");
243       print_name (right_child);
244       printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
245	       right->count, right_child->ncalls);
246       printf ("\n");
247    );
248
249  if (left_parent == left_child)
250    return LESSTHAN;		/* Left is a self call.  */
251
252  if (right_parent == right_child)
253    return GREATERTHAN;		/* Right is a self call.  */
254
255  if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
256      && left_parent->cg.cyc.num == left_child->cg.cyc.num)
257    {
258      /* Left is a call within a cycle.  */
259      if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
260	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
261	{
262	  /* Right is a call within the cycle, too.  */
263	  if (left->count < right->count)
264	    return LESSTHAN;
265
266	  if (left->count > right->count)
267	    return GREATERTHAN;
268
269	  return EQUALTO;
270	}
271      else
272	{
273	  /* Right isn't a call within the cycle.  */
274	  return LESSTHAN;
275	}
276    }
277  else
278    {
279      /* Left isn't a call within a cycle.  */
280      if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
281	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
282	{
283	  /* Right is a call within a cycle.  */
284	  return GREATERTHAN;
285	}
286      else
287	{
288	  /* Neither is a call within a cycle.  */
289	  left_time = left->time + left->child_time;
290	  right_time = right->time + right->child_time;
291
292	  if (left_time < right_time)
293	    return LESSTHAN;
294
295	  if (left_time > right_time)
296	    return GREATERTHAN;
297
298	  if (left->count < right->count)
299	    return LESSTHAN;
300
301	  if (left->count > right->count)
302	    return GREATERTHAN;
303
304	  return EQUALTO;
305	}
306    }
307}
308
309
310static void
311sort_parents (Sym * child)
312{
313  Arc *arc, *detached, sorted, *prev;
314
315  /* Unlink parents from child, then insertion sort back on to
316     sorted's parents.
317	  *arc        the arc you have detached and are inserting.
318	  *detached   the rest of the arcs to be sorted.
319	  sorted      arc list onto which you insertion sort.
320	  *prev       arc before the arc you are comparing.  */
321  sorted.next_parent = 0;
322
323  for (arc = child->cg.parents; arc; arc = detached)
324    {
325      detached = arc->next_parent;
326
327      /* Consider *arc as disconnected; insert it into sorted.  */
328      for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
329	{
330	  if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
331	    break;
332	}
333
334      arc->next_parent = prev->next_parent;
335      prev->next_parent = arc;
336    }
337
338  /* Reattach sorted arcs to child.  */
339  child->cg.parents = sorted.next_parent;
340}
341
342
343static void
344print_parents (Sym *child)
345{
346  Sym *parent;
347  Arc *arc;
348  Sym *cycle_head;
349
350  if (child->cg.cyc.head != 0)
351    cycle_head = child->cg.cyc.head;
352  else
353    cycle_head = child;
354
355  if (!child->cg.parents)
356    {
357      printf (bsd_style_output
358	      ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s     <spontaneous>\n")
359	      : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s     <spontaneous>\n"),
360	      "", "", "", "", "", "");
361      return;
362    }
363
364  sort_parents (child);
365
366  for (arc = child->cg.parents; arc; arc = arc->next_parent)
367    {
368      parent = arc->parent;
369      if (child == parent || (child->cg.cyc.num != 0
370			      && parent->cg.cyc.num == child->cg.cyc.num))
371	{
372	  /* Selfcall or call among siblings.  */
373	  printf (bsd_style_output
374		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
375		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
376		  "", "", "", "",
377		  arc->count, "");
378	  print_name (parent);
379	  printf ("\n");
380	}
381      else
382	{
383	  /* Regular parent of child.  */
384	  printf (bsd_style_output
385		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
386		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
387		  "", "",
388		  arc->time / hz, arc->child_time / hz,
389		  arc->count, cycle_head->ncalls);
390	  print_name (parent);
391	  printf ("\n");
392	}
393    }
394}
395
396
397static void
398sort_children (Sym *parent)
399{
400  Arc *arc, *detached, sorted, *prev;
401
402  /* Unlink children from parent, then insertion sort back on to
403     sorted's children.
404	  *arc        the arc you have detached and are inserting.
405	  *detached   the rest of the arcs to be sorted.
406	  sorted      arc list onto which you insertion sort.
407	  *prev       arc before the arc you are comparing.  */
408  sorted.next_child = 0;
409
410  for (arc = parent->cg.children; arc; arc = detached)
411    {
412      detached = arc->next_child;
413
414      /* Consider *arc as disconnected; insert it into sorted.  */
415      for (prev = &sorted; prev->next_child; prev = prev->next_child)
416	{
417	  if (cmp_arc (arc, prev->next_child) != LESSTHAN)
418	    break;
419	}
420
421      arc->next_child = prev->next_child;
422      prev->next_child = arc;
423    }
424
425  /* Reattach sorted children to parent.  */
426  parent->cg.children = sorted.next_child;
427}
428
429
430static void
431print_children (Sym *parent)
432{
433  Sym *child;
434  Arc *arc;
435
436  sort_children (parent);
437  arc = parent->cg.children;
438
439  for (arc = parent->cg.children; arc; arc = arc->next_child)
440    {
441      child = arc->child;
442      if (child == parent || (child->cg.cyc.num != 0
443			      && child->cg.cyc.num == parent->cg.cyc.num))
444	{
445	  /* Self call or call to sibling.  */
446	  printf (bsd_style_output
447		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
448		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
449		  "", "", "", "", arc->count, "");
450	  print_name (child);
451	  printf ("\n");
452	}
453      else
454	{
455	  /* Regular child of parent.  */
456	  printf (bsd_style_output
457		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
458		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
459		  "", "",
460		  arc->time / hz, arc->child_time / hz,
461		  arc->count, child->cg.cyc.head->ncalls);
462	  print_name (child);
463	  printf ("\n");
464	}
465    }
466}
467
468
469static void
470print_line (Sym *np)
471{
472  char buf[BUFSIZ];
473
474  sprintf (buf, "[%d]", np->cg.index);
475  printf (bsd_style_output
476	  ? "%-6.6s %5.1f %7.2f %11.2f"
477	  : "%-6.6s %5.1f %7.2f %7.2f", buf,
478	  100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
479	  np->cg.prop.self / hz, np->cg.prop.child / hz);
480
481  if ((np->ncalls + np->cg.self_calls) != 0)
482    {
483      printf (" %7lu", np->ncalls);
484
485      if (np->cg.self_calls != 0)
486	  printf ("+%-7lu ", np->cg.self_calls);
487      else
488	  printf (" %7.7s ", "");
489    }
490  else
491    {
492      printf (" %7.7s %7.7s ", "", "");
493    }
494
495  print_name (np);
496  printf ("\n");
497}
498
499
500/* Print dynamic call graph.  */
501
502void
503cg_print (Sym ** timesortsym)
504{
505  unsigned int sym_index;
506  Sym *parent;
507
508  if (print_descriptions && bsd_style_output)
509    bsd_callg_blurb (stdout);
510
511  print_header ();
512
513  for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
514    {
515      parent = timesortsym[sym_index];
516
517      if ((ignore_zeros && parent->ncalls == 0
518	   && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
519	   && parent->cg.prop.child == 0)
520	  || !parent->cg.print_flag
521	  || (line_granularity && ! parent->is_func))
522	continue;
523
524      if (!parent->name && parent->cg.cyc.num != 0)
525	{
526	  /* Cycle header.  */
527	  print_cycle (parent);
528	  print_members (parent);
529	}
530      else
531	{
532	  print_parents (parent);
533	  print_line (parent);
534	  print_children (parent);
535	}
536
537      if (bsd_style_output)
538	printf ("\n");
539
540      printf ("-----------------------------------------------\n");
541
542      if (bsd_style_output)
543	printf ("\n");
544    }
545
546  free (timesortsym);
547
548  if (print_descriptions && !bsd_style_output)
549    fsf_callg_blurb (stdout);
550}
551
552
553static int
554cmp_name (const PTR left, const PTR right)
555{
556  const Sym **npp1 = (const Sym **) left;
557  const Sym **npp2 = (const Sym **) right;
558
559  return strcmp ((*npp1)->name, (*npp2)->name);
560}
561
562
563void
564cg_print_index (void)
565{
566  unsigned int sym_index;
567  unsigned int nnames, todo, i, j;
568  int col, starting_col;
569  Sym **name_sorted_syms, *sym;
570  const char *filename;
571  char buf[20];
572  int column_width = (output_width - 1) / 3;	/* Don't write in last col!  */
573
574  /* Now, sort regular function name
575     alphabetically to create an index.  */
576  name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
577
578  for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
579    {
580      if (ignore_zeros && symtab.base[sym_index].ncalls == 0
581	  && symtab.base[sym_index].hist.time == 0)
582	continue;
583
584      name_sorted_syms[nnames++] = &symtab.base[sym_index];
585    }
586
587  qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
588
589  for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
590    name_sorted_syms[todo++] = &cycle_header[sym_index];
591
592  printf ("\f\n");
593  printf (_("Index by function name\n\n"));
594  sym_index = (todo + 2) / 3;
595
596  for (i = 0; i < sym_index; i++)
597    {
598      col = 0;
599      starting_col = 0;
600
601      for (j = i; j < todo; j += sym_index)
602	{
603	  sym = name_sorted_syms[j];
604
605	  if (sym->cg.print_flag)
606	    sprintf (buf, "[%d]", sym->cg.index);
607	  else
608	    sprintf (buf, "(%d)", sym->cg.index);
609
610	  if (j < nnames)
611	    {
612	      if (bsd_style_output)
613		{
614		  printf ("%6.6s %-19.19s", buf, sym->name);
615		}
616	      else
617		{
618		  col += strlen (buf);
619
620		  for (; col < starting_col + 5; ++col)
621		    putchar (' ');
622
623		  printf (" %s ", buf);
624		  col += print_name_only (sym);
625
626		  if (!line_granularity && sym->is_static && sym->file)
627		    {
628		      filename = sym->file->name;
629
630		      if (!print_path)
631			{
632			  filename = strrchr (filename, '/');
633
634			  if (filename)
635			    ++filename;
636			  else
637			    filename = sym->file->name;
638			}
639
640		      printf (" (%s)", filename);
641		      col += strlen (filename) + 3;
642		    }
643		}
644	    }
645	  else
646	    {
647	      if (bsd_style_output)
648		{
649		  printf ("%6.6s ", buf);
650		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
651		  printf ("%-19.19s", buf);
652		}
653	      else
654		{
655		  col += strlen (buf);
656		  for (; col < starting_col + 5; ++col)
657		    putchar (' ');
658		  printf (" %s ", buf);
659		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
660		  printf ("%s", buf);
661		  col += strlen (buf);
662		}
663	    }
664
665	  starting_col += column_width;
666	}
667
668      printf ("\n");
669    }
670
671  free (name_sorted_syms);
672}
673
674/* Compare two arcs based on their usage counts.
675   We want to sort in descending order.  */
676
677static int
678cmp_arc_count (const PTR left, const PTR right)
679{
680  const Arc **npp1 = (const Arc **) left;
681  const Arc **npp2 = (const Arc **) right;
682
683  if ((*npp1)->count > (*npp2)->count)
684    return -1;
685  else if ((*npp1)->count < (*npp2)->count)
686    return 1;
687  else
688    return 0;
689}
690
691/* Compare two funtions based on their usage counts.
692   We want to sort in descending order.  */
693
694static int
695cmp_fun_nuses (const PTR left, const PTR right)
696{
697  const Sym **npp1 = (const Sym **) left;
698  const Sym **npp2 = (const Sym **) right;
699
700  if ((*npp1)->nuses > (*npp2)->nuses)
701    return -1;
702  else if ((*npp1)->nuses < (*npp2)->nuses)
703    return 1;
704  else
705    return 0;
706}
707
708/* Print a suggested function ordering based on the profiling data.
709
710   We perform 4 major steps when ordering functions:
711
712	* Group unused functions together and place them at the
713	end of the function order.
714
715	* Search the highest use arcs (those which account for 90% of
716	the total arc count) for functions which have several parents.
717
718	Group those with the most call sites together (currently the
719	top 1.25% which have at least five different call sites).
720
721	These are emitted at the start of the function order.
722
723	* Use a greedy placement algorithm to place functions which
724	occur in the top 99% of the arcs in the profile.  Some provisions
725	are made to handle high usage arcs where the parent and/or
726	child has already been placed.
727
728	* Run the same greedy placement algorithm on the remaining
729	arcs to place the leftover functions.
730
731
732   The various "magic numbers" should (one day) be tuneable by command
733   line options.  They were arrived at by benchmarking a few applications
734   with various values to see which values produced better overall function
735   orderings.
736
737   Of course, profiling errors, machine limitations (PA long calls), and
738   poor cutoff values for the placement algorithm may limit the usefullness
739   of the resulting function order.  Improvements would be greatly appreciated.
740
741   Suggestions:
742
743	* Place the functions with many callers near the middle of the
744	list to reduce long calls.
745
746	* Propagate arc usage changes as functions are placed.  Ie if
747	func1 and func2 are placed together, arcs to/from those arcs
748	to the same parent/child should be combined, then resort the
749	arcs to choose the next one.
750
751	* Implement some global positioning algorithm to place the
752	chains made by the greedy local positioning algorithm.  Probably
753	by examining arcs which haven't been placed yet to tie two
754	chains together.
755
756	* Take a function's size and time into account in the algorithm;
757	size in particular is important on the PA (long calls).  Placing
758	many small functions onto their own page may be wise.
759
760	* Use better profiling information; many published algorithms
761	are based on call sequences through time, rather than just
762	arc counts.
763
764	* Prodecure cloning could improve performance when a small number
765	of arcs account for most of the calls to a particular function.
766
767	* Use relocation information to avoid moving unused functions
768	completely out of the code stream; this would avoid severe lossage
769	when the profile data bears little resemblance to actual runs.
770
771	* Propagation of arc usages should also improve .o link line
772	ordering which shares the same arc placement algorithm with
773	the function ordering code (in fact it is a degenerate case
774	of function ordering).  */
775
776void
777cg_print_function_ordering (void)
778{
779  unsigned long sym_index;
780  unsigned long arc_index;
781  unsigned long used, unused, scratch_index;
782  unsigned long  unplaced_arc_count, high_arc_count, scratch_arc_count;
783#ifdef __GNUC__
784  unsigned long long total_arcs, tmp_arcs_count;
785#else
786  unsigned long total_arcs, tmp_arcs_count;
787#endif
788  Sym **unused_syms, **used_syms, **scratch_syms;
789  Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
790
791  sym_index = 0;
792  used = 0;
793  unused = 0;
794  scratch_index = 0;
795  unplaced_arc_count = 0;
796  high_arc_count = 0;
797  scratch_arc_count = 0;
798
799  /* First group all the unused functions together.  */
800  unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
801  used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
802  scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
803  high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
804  scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
805  unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
806
807  /* Walk through all the functions; mark those which are never
808     called as placed (we'll emit them as a group later).  */
809  for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
810    {
811      if (symtab.base[sym_index].ncalls == 0)
812	{
813	  unused_syms[unused++] = &symtab.base[sym_index];
814	  symtab.base[sym_index].has_been_placed = 1;
815	}
816      else
817	{
818	  used_syms[used++] = &symtab.base[sym_index];
819	  symtab.base[sym_index].has_been_placed = 0;
820	  symtab.base[sym_index].next = 0;
821	  symtab.base[sym_index].prev = 0;
822	  symtab.base[sym_index].nuses = 0;
823	}
824    }
825
826  /* Sort the arcs from most used to least used.  */
827  qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
828
829  /* Compute the total arc count.  Also mark arcs as unplaced.
830
831     Note we don't compensate for overflow if that happens!
832     Overflow is much less likely when this file is compiled
833     with GCC as it can double-wide integers via long long.  */
834  total_arcs = 0;
835  for (arc_index = 0; arc_index < numarcs; arc_index++)
836    {
837      total_arcs += arcs[arc_index]->count;
838      arcs[arc_index]->has_been_placed = 0;
839    }
840
841  /* We want to pull out those functions which are referenced
842     by many highly used arcs and emit them as a group.  This
843     could probably use some tuning.  */
844  tmp_arcs_count = 0;
845  for (arc_index = 0; arc_index < numarcs; arc_index++)
846    {
847      tmp_arcs_count += arcs[arc_index]->count;
848
849      /* Count how many times each parent and child are used up
850	 to our threshold of arcs (90%).  */
851      if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
852	break;
853
854      arcs[arc_index]->child->nuses++;
855    }
856
857  /* Now sort a temporary symbol table based on the number of
858     times each function was used in the highest used arcs.  */
859  memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
860  qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
861
862  /* Now pick out those symbols we're going to emit as
863     a group.  We take up to 1.25% of the used symbols.  */
864  for (sym_index = 0; sym_index < used / 80; sym_index++)
865    {
866      Sym *sym = scratch_syms[sym_index];
867      Arc *arc;
868
869      /* If we hit symbols that aren't used from many call sites,
870	 then we can quit.  We choose five as the low limit for
871	 no particular reason.  */
872      if (sym->nuses == 5)
873	break;
874
875      /* We're going to need the arcs between these functions.
876	 Unfortunately, we don't know all these functions
877	 until we're done.  So we keep track of all the arcs
878	 to the functions we care about, then prune out those
879	 which are uninteresting.
880
881	 An interesting variation would be to quit when we found
882	 multi-call site functions which account for some percentage
883	 of the arcs.  */
884      arc = sym->cg.children;
885
886      while (arc)
887	{
888	  if (arc->parent != arc->child)
889	    scratch_arcs[scratch_arc_count++] = arc;
890	  arc->has_been_placed = 1;
891	  arc = arc->next_child;
892	}
893
894      arc = sym->cg.parents;
895
896      while (arc)
897	{
898	  if (arc->parent != arc->child)
899	    scratch_arcs[scratch_arc_count++] = arc;
900	  arc->has_been_placed = 1;
901	  arc = arc->next_parent;
902	}
903
904      /* Keep track of how many symbols we're going to place.  */
905      scratch_index = sym_index;
906
907      /* A lie, but it makes identifying
908	 these functions easier later.  */
909      sym->has_been_placed = 1;
910    }
911
912  /* Now walk through the temporary arcs and copy
913     those we care about into the high arcs array.  */
914  for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
915    {
916      Arc *arc = scratch_arcs[arc_index];
917
918      /* If this arc refers to highly used functions, then
919	 then we want to keep it.  */
920      if (arc->child->has_been_placed
921	  && arc->parent->has_been_placed)
922	{
923	  high_arcs[high_arc_count++] = scratch_arcs[arc_index];
924
925	  /* We need to turn of has_been_placed since we're going to
926	     use the main arc placement algorithm on these arcs.  */
927	  arc->child->has_been_placed = 0;
928	  arc->parent->has_been_placed = 0;
929	}
930    }
931
932  /* Dump the multi-site high usage functions which are not
933     going to be ordered by the main ordering algorithm.  */
934  for (sym_index = 0; sym_index < scratch_index; sym_index++)
935    {
936      if (scratch_syms[sym_index]->has_been_placed)
937	printf ("%s\n", scratch_syms[sym_index]->name);
938    }
939
940  /* Now we can order the multi-site high use
941     functions based on the arcs between them.  */
942  qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
943  order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
944				    unplaced_arcs, &unplaced_arc_count);
945
946  /* Order and dump the high use functions left,
947     these typically have only a few call sites.  */
948  order_and_dump_functions_by_arcs (arcs, numarcs, 0,
949				    unplaced_arcs, &unplaced_arc_count);
950
951  /* Now place the rarely used functions.  */
952  order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
953				    scratch_arcs, &scratch_arc_count);
954
955  /* Output any functions not emitted by the order_and_dump calls.  */
956  for (sym_index = 0; sym_index < used; sym_index++)
957    if (used_syms[sym_index]->has_been_placed == 0)
958      printf("%s\n", used_syms[sym_index]->name);
959
960  /* Output the unused functions.  */
961  for (sym_index = 0; sym_index < unused; sym_index++)
962    printf("%s\n", unused_syms[sym_index]->name);
963
964  unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
965  used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
966  scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
967  high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
968  scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
969  unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
970
971  free (unused_syms);
972  free (used_syms);
973  free (scratch_syms);
974  free (high_arcs);
975  free (scratch_arcs);
976  free (unplaced_arcs);
977}
978
979/* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
980   place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
981
982   If ALL is nonzero, then place all functions referenced by THE_ARCS,
983   else only place those referenced in the top 99% of the arcs in THE_ARCS.  */
984
985#define MOST 0.99
986static void
987order_and_dump_functions_by_arcs (Arc **the_arcs, unsigned long arc_count,
988				  int all, Arc **unplaced_arcs,
989				  unsigned long *unplaced_arc_count)
990{
991#ifdef __GNUC__
992  unsigned long long tmp_arcs, total_arcs;
993#else
994  unsigned long tmp_arcs, total_arcs;
995#endif
996  unsigned int arc_index;
997
998  /* If needed, compute the total arc count.
999
1000     Note we don't compensate for overflow if that happens!  */
1001  if (! all)
1002    {
1003      total_arcs = 0;
1004      for (arc_index = 0; arc_index < arc_count; arc_index++)
1005	total_arcs += the_arcs[arc_index]->count;
1006    }
1007  else
1008    total_arcs = 0;
1009
1010  tmp_arcs = 0;
1011
1012  for (arc_index = 0; arc_index < arc_count; arc_index++)
1013    {
1014      Sym *sym1, *sym2;
1015      Sym *child, *parent;
1016
1017      tmp_arcs += the_arcs[arc_index]->count;
1018
1019      /* Ignore this arc if it's already been placed.  */
1020      if (the_arcs[arc_index]->has_been_placed)
1021	continue;
1022
1023      child = the_arcs[arc_index]->child;
1024      parent = the_arcs[arc_index]->parent;
1025
1026      /* If we're not using all arcs, and this is a rarely used
1027	 arc, then put it on the unplaced_arc list.  Similarly
1028	 if both the parent and child of this arc have been placed.  */
1029      if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1030	  || child->has_been_placed || parent->has_been_placed)
1031	{
1032	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1033	  continue;
1034	}
1035
1036      /* If all slots in the parent and child are full, then there isn't
1037	 anything we can do right now.  We'll place this arc on the
1038	 unplaced arc list in the hope that a global positioning
1039	 algorithm can use it to place function chains.  */
1040      if (parent->next && parent->prev && child->next && child->prev)
1041	{
1042	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1043	  continue;
1044	}
1045
1046      /* If the parent is unattached, then find the closest
1047	 place to attach it onto child's chain.   Similarly
1048	 for the opposite case.  */
1049      if (!parent->next && !parent->prev)
1050	{
1051	  int next_count = 0;
1052	  int prev_count = 0;
1053	  Sym *prev = child;
1054	  Sym *next = child;
1055
1056	  /* Walk to the beginning and end of the child's chain.  */
1057	  while (next->next)
1058	    {
1059	      next = next->next;
1060	      next_count++;
1061	    }
1062
1063	  while (prev->prev)
1064	    {
1065	      prev = prev->prev;
1066	      prev_count++;
1067	    }
1068
1069	  /* Choose the closest.  */
1070	  child = next_count < prev_count ? next : prev;
1071	}
1072      else if (! child->next && !child->prev)
1073	{
1074	  int next_count = 0;
1075	  int prev_count = 0;
1076	  Sym *prev = parent;
1077	  Sym *next = parent;
1078
1079	  while (next->next)
1080	    {
1081	      next = next->next;
1082	      next_count++;
1083	    }
1084
1085	  while (prev->prev)
1086	    {
1087	      prev = prev->prev;
1088	      prev_count++;
1089	    }
1090
1091	  parent = prev_count < next_count ? prev : next;
1092	}
1093      else
1094	{
1095	  /* Couldn't find anywhere to attach the functions,
1096	     put the arc on the unplaced arc list.  */
1097	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1098	  continue;
1099	}
1100
1101      /* Make sure we don't tie two ends together.  */
1102      sym1 = parent;
1103      if (sym1->next)
1104	while (sym1->next)
1105	  sym1 = sym1->next;
1106      else
1107	while (sym1->prev)
1108	  sym1 = sym1->prev;
1109
1110      sym2 = child;
1111      if (sym2->next)
1112	while (sym2->next)
1113	  sym2 = sym2->next;
1114      else
1115	while (sym2->prev)
1116	  sym2 = sym2->prev;
1117
1118      if (sym1 == child
1119	  && sym2 == parent)
1120	{
1121	  /* This would tie two ends together.  */
1122	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1123	  continue;
1124	}
1125
1126      if (parent->next)
1127	{
1128	  /* Must attach to the parent's prev field.  */
1129	  if (! child->next)
1130	    {
1131	      /* parent-prev and child-next */
1132	      parent->prev = child;
1133	      child->next = parent;
1134	      the_arcs[arc_index]->has_been_placed = 1;
1135	    }
1136	}
1137      else if (parent->prev)
1138	{
1139	  /* Must attach to the parent's next field.  */
1140	  if (! child->prev)
1141	    {
1142	      /* parent-next and child-prev */
1143	      parent->next = child;
1144	      child->prev = parent;
1145	      the_arcs[arc_index]->has_been_placed = 1;
1146	    }
1147	}
1148      else
1149	{
1150	  /* Can attach to either field in the parent, depends
1151	     on where we've got space in the child.  */
1152	  if (child->prev)
1153	    {
1154	      /* parent-prev and child-next.  */
1155	      parent->prev = child;
1156	      child->next = parent;
1157	      the_arcs[arc_index]->has_been_placed = 1;
1158	    }
1159	  else
1160	    {
1161	      /* parent-next and child-prev.  */
1162	      parent->next = child;
1163	      child->prev = parent;
1164	      the_arcs[arc_index]->has_been_placed = 1;
1165	    }
1166	}
1167    }
1168
1169  /* Dump the chains of functions we've made.  */
1170  for (arc_index = 0; arc_index < arc_count; arc_index++)
1171    {
1172      Sym *sym;
1173      if (the_arcs[arc_index]->parent->has_been_placed
1174	  || the_arcs[arc_index]->child->has_been_placed)
1175	continue;
1176
1177      sym = the_arcs[arc_index]->parent;
1178
1179      /* If this symbol isn't attached to any other
1180	 symbols, then we've got a rarely used arc.
1181
1182	 Skip it for now, we'll deal with them later.  */
1183      if (sym->next == NULL
1184	  && sym->prev == NULL)
1185	continue;
1186
1187      /* Get to the start of this chain.  */
1188      while (sym->prev)
1189	sym = sym->prev;
1190
1191      while (sym)
1192	{
1193	  /* Mark it as placed.  */
1194	  sym->has_been_placed = 1;
1195	  printf ("%s\n", sym->name);
1196	  sym = sym->next;
1197	}
1198    }
1199
1200  /* If we want to place all the arcs, then output
1201     those which weren't placed by the main algorithm.  */
1202  if (all)
1203    for (arc_index = 0; arc_index < arc_count; arc_index++)
1204      {
1205	Sym *sym;
1206	if (the_arcs[arc_index]->parent->has_been_placed
1207	    || the_arcs[arc_index]->child->has_been_placed)
1208	  continue;
1209
1210	sym = the_arcs[arc_index]->parent;
1211
1212	sym->has_been_placed = 1;
1213	printf ("%s\n", sym->name);
1214      }
1215}
1216
1217/* Compare two function_map structs based on file name.
1218   We want to sort in ascending order.  */
1219
1220static int
1221cmp_symbol_map (const void * l, const void * r)
1222{
1223  return filename_cmp (((struct function_map *) l)->file_name,
1224		       ((struct function_map *) r)->file_name);
1225}
1226
1227/* Print a suggested .o ordering for files on a link line based
1228   on profiling information.  This uses the function placement
1229   code for the bulk of its work.  */
1230
1231void
1232cg_print_file_ordering (void)
1233{
1234  unsigned long scratch_arc_count;
1235  unsigned long arc_index;
1236  unsigned long sym_index;
1237  Arc **scratch_arcs;
1238  char *last;
1239
1240  scratch_arc_count = 0;
1241
1242  scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1243  for (arc_index = 0; arc_index < numarcs; arc_index++)
1244    {
1245      if (! arcs[arc_index]->parent->mapped
1246	  || ! arcs[arc_index]->child->mapped)
1247	arcs[arc_index]->has_been_placed = 1;
1248    }
1249
1250  order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1251				    scratch_arcs, &scratch_arc_count);
1252
1253  /* Output .o's not handled by the main placement algorithm.  */
1254  for (sym_index = 0; sym_index < symtab.len; sym_index++)
1255    {
1256      if (symtab.base[sym_index].mapped
1257	  && ! symtab.base[sym_index].has_been_placed)
1258	printf ("%s\n", symtab.base[sym_index].name);
1259    }
1260
1261  qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
1262
1263  /* Now output any .o's that didn't have any text symbols.  */
1264  last = NULL;
1265  for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
1266    {
1267      unsigned int index2;
1268
1269      /* Don't bother searching if this symbol
1270	 is the same as the previous one.  */
1271      if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
1272	continue;
1273
1274      for (index2 = 0; index2 < symtab.len; index2++)
1275	{
1276	  if (! symtab.base[index2].mapped)
1277	    continue;
1278
1279	  if (!filename_cmp (symtab.base[index2].name,
1280			     symbol_map[sym_index].file_name))
1281	    break;
1282	}
1283
1284      /* If we didn't find it in the symbol table, then it must
1285	 be a .o with no text symbols.  Output it last.  */
1286      if (index2 == symtab.len)
1287	printf ("%s\n", symbol_map[sym_index].file_name);
1288      last = symbol_map[sym_index].file_name;
1289    }
1290}
1291