1/* Top-level LTO routines.
2   Copyright (C) 2009-2020 Free Software Foundation, Inc.
3   Contributed by CodeSourcery, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3.  If not see
19<http://www.gnu.org/licenses/>.  */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "function.h"
26#include "bitmap.h"
27#include "basic-block.h"
28#include "tree.h"
29#include "gimple.h"
30#include "cfghooks.h"
31#include "alloc-pool.h"
32#include "tree-pass.h"
33#include "tree-streamer.h"
34#include "cgraph.h"
35#include "opts.h"
36#include "toplev.h"
37#include "stor-layout.h"
38#include "symbol-summary.h"
39#include "tree-vrp.h"
40#include "ipa-prop.h"
41#include "debug.h"
42#include "lto.h"
43#include "lto-section-names.h"
44#include "splay-tree.h"
45#include "lto-partition.h"
46#include "context.h"
47#include "pass_manager.h"
48#include "ipa-fnsummary.h"
49#include "ipa-utils.h"
50#include "gomp-constants.h"
51#include "lto-symtab.h"
52#include "stringpool.h"
53#include "fold-const.h"
54#include "attribs.h"
55#include "builtins.h"
56#include "lto-common.h"
57
58
59/* Number of parallel tasks to run, -1 if we want to use GNU Make jobserver.  */
60static int lto_parallelism;
61
62/* Return true when NODE has a clone that is analyzed (i.e. we need
63   to load its body even if the node itself is not needed).  */
64
65static bool
66has_analyzed_clone_p (struct cgraph_node *node)
67{
68  struct cgraph_node *orig = node;
69  node = node->clones;
70  if (node)
71    while (node != orig)
72      {
73	if (node->analyzed)
74	  return true;
75	if (node->clones)
76	  node = node->clones;
77	else if (node->next_sibling_clone)
78	  node = node->next_sibling_clone;
79	else
80	  {
81	    while (node != orig && !node->next_sibling_clone)
82	      node = node->clone_of;
83	    if (node != orig)
84	      node = node->next_sibling_clone;
85	  }
86      }
87  return false;
88}
89
90/* Read the function body for the function associated with NODE.  */
91
92static void
93lto_materialize_function (struct cgraph_node *node)
94{
95  tree decl;
96
97  decl = node->decl;
98  /* Read in functions with body (analyzed nodes)
99     and also functions that are needed to produce virtual clones.  */
100  if ((node->has_gimple_body_p () && node->analyzed)
101      || node->used_as_abstract_origin
102      || has_analyzed_clone_p (node))
103    {
104      /* Clones don't need to be read.  */
105      if (node->clone_of)
106	return;
107      if (DECL_FUNCTION_PERSONALITY (decl) && !first_personality_decl)
108	first_personality_decl = DECL_FUNCTION_PERSONALITY (decl);
109      /* If the file contains a function with a language specific EH
110	 personality set or with EH enabled initialize the backend EH
111	 machinery.  */
112      if (DECL_FUNCTION_PERSONALITY (decl)
113	  || opt_for_fn (decl, flag_exceptions))
114	lto_init_eh ();
115    }
116
117  /* Let the middle end know about the function.  */
118  rest_of_decl_compilation (decl, 1, 0);
119}
120
121/* Materialize all the bodies for all the nodes in the callgraph.  */
122
123static void
124materialize_cgraph (void)
125{
126  struct cgraph_node *node;
127  timevar_id_t lto_timer;
128
129  if (!quiet_flag)
130    fprintf (stderr,
131	     flag_wpa ? "Materializing decls:" : "Reading function bodies:");
132
133
134  FOR_EACH_FUNCTION (node)
135    {
136      if (node->lto_file_data)
137	{
138	  lto_materialize_function (node);
139	  lto_stats.num_input_cgraph_nodes++;
140	}
141    }
142
143
144  /* Start the appropriate timer depending on the mode that we are
145     operating in.  */
146  lto_timer = (flag_wpa) ? TV_WHOPR_WPA
147	      : (flag_ltrans) ? TV_WHOPR_LTRANS
148	      : TV_LTO;
149  timevar_push (lto_timer);
150
151  current_function_decl = NULL;
152  set_cfun (NULL);
153
154  if (!quiet_flag)
155    fprintf (stderr, "\n");
156
157  timevar_pop (lto_timer);
158}
159
160/* Actually stream out ENCODER into TEMP_FILENAME.  */
161
162static void
163stream_out (char *temp_filename, lto_symtab_encoder_t encoder, int part)
164{
165  lto_file *file = lto_obj_file_open (temp_filename, true);
166  if (!file)
167    fatal_error (input_location, "%<lto_obj_file_open()%> failed");
168  lto_set_current_out_file (file);
169
170  gcc_assert (!dump_file);
171  streamer_dump_file = dump_begin (TDI_lto_stream_out, NULL, part);
172  ipa_write_optimization_summaries (encoder);
173
174  free (CONST_CAST (char *, file->filename));
175
176  lto_set_current_out_file (NULL);
177  lto_obj_file_close (file);
178  free (file);
179  if (streamer_dump_file)
180    {
181      dump_end (TDI_lto_stream_out, streamer_dump_file);
182      streamer_dump_file = NULL;
183    }
184}
185
186/* Wait for forked process and signal errors.  */
187#ifdef HAVE_WORKING_FORK
188static void
189wait_for_child ()
190{
191  int status;
192  do
193    {
194#ifndef WCONTINUED
195#define WCONTINUED 0
196#endif
197      int w = waitpid (0, &status, WUNTRACED | WCONTINUED);
198      if (w == -1)
199	fatal_error (input_location, "waitpid failed");
200
201      if (WIFEXITED (status) && WEXITSTATUS (status))
202	fatal_error (input_location, "streaming subprocess failed");
203      else if (WIFSIGNALED (status))
204	fatal_error (input_location,
205		     "streaming subprocess was killed by signal");
206    }
207  while (!WIFEXITED (status) && !WIFSIGNALED (status));
208}
209#endif
210
211static void
212stream_out_partitions_1 (char *temp_filename, int blen, int min, int max)
213{
214   /* Write all the nodes in SET.  */
215   for (int p = min; p < max; p ++)
216     {
217       sprintf (temp_filename + blen, "%u.o", p);
218       stream_out (temp_filename, ltrans_partitions[p]->encoder, p);
219       ltrans_partitions[p]->encoder = NULL;
220     }
221}
222
223/* Stream out ENCODER into TEMP_FILENAME
224   Fork if that seems to help.  */
225
226static void
227stream_out_partitions (char *temp_filename, int blen, int min, int max,
228		       bool ARG_UNUSED (last))
229{
230#ifdef HAVE_WORKING_FORK
231  static int nruns;
232
233  if (lto_parallelism <= 1)
234    {
235      stream_out_partitions_1 (temp_filename, blen, min, max);
236      return;
237    }
238
239  /* Do not run more than LTO_PARALLELISM streamings
240     FIXME: we ignore limits on jobserver.  */
241  if (lto_parallelism > 0 && nruns >= lto_parallelism)
242    {
243      wait_for_child ();
244      nruns --;
245    }
246  /* If this is not the last parallel partition, execute new
247     streaming process.  */
248  if (!last)
249    {
250      pid_t cpid = fork ();
251
252      if (!cpid)
253	{
254	  setproctitle ("lto1-wpa-streaming");
255          stream_out_partitions_1 (temp_filename, blen, min, max);
256	  exit (0);
257	}
258      /* Fork failed; lets do the job ourseleves.  */
259      else if (cpid == -1)
260	stream_out_partitions_1 (temp_filename, blen, min, max);
261      else
262	nruns++;
263    }
264  /* Last partition; stream it and wait for all children to die.  */
265  else
266    {
267      int i;
268      stream_out_partitions_1 (temp_filename, blen, min, max);
269      for (i = 0; i < nruns; i++)
270	wait_for_child ();
271    }
272  asm_nodes_output = true;
273#else
274  stream_out_partitions_1 (temp_filename, blen, min, max);
275#endif
276}
277
278/* Write all output files in WPA mode and the file with the list of
279   LTRANS units.  */
280
281static void
282lto_wpa_write_files (void)
283{
284  unsigned i, n_sets;
285  ltrans_partition part;
286  FILE *ltrans_output_list_stream;
287  char *temp_filename;
288  auto_vec <char *>temp_filenames;
289  auto_vec <int>temp_priority;
290  size_t blen;
291
292  /* Open the LTRANS output list.  */
293  if (!ltrans_output_list)
294    fatal_error (input_location, "no LTRANS output list filename provided");
295
296  timevar_push (TV_WHOPR_WPA);
297
298  FOR_EACH_VEC_ELT (ltrans_partitions, i, part)
299    lto_stats.num_output_symtab_nodes
300    += lto_symtab_encoder_size (part->encoder);
301
302  timevar_pop (TV_WHOPR_WPA);
303
304  timevar_push (TV_WHOPR_WPA_IO);
305
306  cgraph_node *node;
307  /* Do body modifications needed for streaming before we fork out
308     worker processes.  */
309  FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
310    if (!node->clone_of && gimple_has_body_p (node->decl))
311      lto_prepare_function_for_streaming (node);
312
313  ggc_trim ();
314  report_heap_memory_use ();
315
316  /* Generate a prefix for the LTRANS unit files.  */
317  blen = strlen (ltrans_output_list);
318  temp_filename = (char *) xmalloc (blen + sizeof ("2147483648.o"));
319  strcpy (temp_filename, ltrans_output_list);
320  if (blen > sizeof (".out")
321      && strcmp (temp_filename + blen - sizeof (".out") + 1,
322		 ".out") == 0)
323    temp_filename[blen - sizeof (".out") + 1] = '\0';
324  blen = strlen (temp_filename);
325
326  n_sets = ltrans_partitions.length ();
327  unsigned sets_per_worker = n_sets;
328  if (lto_parallelism > 1)
329    {
330      if (lto_parallelism > (int)n_sets)
331	lto_parallelism = n_sets;
332      sets_per_worker = (n_sets + lto_parallelism - 1) / lto_parallelism;
333    }
334
335  for (i = 0; i < n_sets; i++)
336    {
337      ltrans_partition part = ltrans_partitions[i];
338
339      /* Write all the nodes in SET.  */
340      sprintf (temp_filename + blen, "%u.o", i);
341
342      if (!quiet_flag)
343	fprintf (stderr, " %s (%s %i insns)", temp_filename, part->name,
344		 part->insns);
345      if (symtab->dump_file)
346	{
347	  lto_symtab_encoder_iterator lsei;
348
349	  fprintf (symtab->dump_file,
350		   "Writing partition %s to file %s, %i insns\n",
351		   part->name, temp_filename, part->insns);
352	  fprintf (symtab->dump_file, "  Symbols in partition: ");
353	  for (lsei = lsei_start_in_partition (part->encoder);
354	       !lsei_end_p (lsei);
355	       lsei_next_in_partition (&lsei))
356	    {
357	      symtab_node *node = lsei_node (lsei);
358	      fprintf (symtab->dump_file, "%s ", node->dump_asm_name ());
359	    }
360	  fprintf (symtab->dump_file, "\n  Symbols in boundary: ");
361	  for (lsei = lsei_start (part->encoder); !lsei_end_p (lsei);
362	       lsei_next (&lsei))
363	    {
364	      symtab_node *node = lsei_node (lsei);
365	      if (!lto_symtab_encoder_in_partition_p (part->encoder, node))
366		{
367		  fprintf (symtab->dump_file, "%s ", node->dump_asm_name ());
368		  cgraph_node *cnode = dyn_cast <cgraph_node *> (node);
369		  if (cnode
370		      && lto_symtab_encoder_encode_body_p (part->encoder,
371							   cnode))
372		    fprintf (symtab->dump_file, "(body included)");
373		  else
374		    {
375		      varpool_node *vnode = dyn_cast <varpool_node *> (node);
376		      if (vnode
377			  && lto_symtab_encoder_encode_initializer_p (part->encoder,
378								      vnode))
379			fprintf (symtab->dump_file, "(initializer included)");
380		    }
381		}
382	    }
383	  fprintf (symtab->dump_file, "\n");
384	}
385      gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i);
386
387      temp_priority.safe_push (part->insns);
388      temp_filenames.safe_push (xstrdup (temp_filename));
389    }
390  memory_block_pool::trim (0);
391
392  for (int set = 0; set < MAX (lto_parallelism, 1); set++)
393    {
394      stream_out_partitions (temp_filename, blen, set * sets_per_worker,
395			     MIN ((set + 1) * sets_per_worker, n_sets),
396			     set == MAX (lto_parallelism, 1) - 1);
397    }
398
399  ltrans_output_list_stream = fopen (ltrans_output_list, "w");
400  if (ltrans_output_list_stream == NULL)
401    fatal_error (input_location,
402		 "opening LTRANS output list %s: %m", ltrans_output_list);
403  for (i = 0; i < n_sets; i++)
404    {
405      unsigned int len = strlen (temp_filenames[i]);
406      if (fprintf (ltrans_output_list_stream, "%i\n", temp_priority[i]) < 0
407	  || fwrite (temp_filenames[i], 1, len, ltrans_output_list_stream) < len
408	  || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1)
409	fatal_error (input_location, "writing to LTRANS output list %s: %m",
410		     ltrans_output_list);
411     free (temp_filenames[i]);
412    }
413
414  lto_stats.num_output_files += n_sets;
415
416  /* Close the LTRANS output list.  */
417  if (fclose (ltrans_output_list_stream))
418    fatal_error (input_location,
419		 "closing LTRANS output list %s: %m", ltrans_output_list);
420
421  free_ltrans_partitions ();
422  free (temp_filename);
423
424  timevar_pop (TV_WHOPR_WPA_IO);
425}
426
427/* Perform whole program analysis (WPA) on the callgraph and write out the
428   optimization plan.  */
429
430static void
431do_whole_program_analysis (void)
432{
433  symtab_node *node;
434
435  lto_parallelism = 1;
436
437  /* TODO: jobserver communication is not supported, yet.  */
438  if (!strcmp (flag_wpa, "jobserver"))
439    lto_parallelism = param_max_lto_streaming_parallelism;
440  else
441    {
442      lto_parallelism = atoi (flag_wpa);
443      if (lto_parallelism <= 0)
444	lto_parallelism = 0;
445      if (lto_parallelism >= param_max_lto_streaming_parallelism)
446	lto_parallelism = param_max_lto_streaming_parallelism;
447    }
448
449  timevar_start (TV_PHASE_OPT_GEN);
450
451  /* Note that since we are in WPA mode, materialize_cgraph will not
452     actually read in all the function bodies.  It only materializes
453     the decls and cgraph nodes so that analysis can be performed.  */
454  materialize_cgraph ();
455
456  /* Reading in the cgraph uses different timers, start timing WPA now.  */
457  timevar_push (TV_WHOPR_WPA);
458
459  if (pre_ipa_mem_report)
460    dump_memory_report ("Memory consumption before IPA");
461
462  symtab->function_flags_ready = true;
463
464  if (symtab->dump_file)
465    symtab->dump (symtab->dump_file);
466  bitmap_obstack_initialize (NULL);
467  symtab->state = IPA_SSA;
468
469  execute_ipa_pass_list (g->get_passes ()->all_regular_ipa_passes);
470
471  /* When WPA analysis raises errors, do not bother to output anything.  */
472  if (seen_error ())
473    return;
474
475  /* We are about to launch the final LTRANS phase, stop the WPA timer.  */
476  timevar_pop (TV_WHOPR_WPA);
477
478  /* We are no longer going to stream in anything.  Free some memory.  */
479  lto_free_file_name_hash ();
480
481
482  timevar_push (TV_WHOPR_PARTITIONING);
483
484  gcc_assert (!dump_file);
485  dump_file = dump_begin (partition_dump_id, NULL);
486
487  if (dump_file)
488    symtab->dump (dump_file);
489
490  symtab_node::checking_verify_symtab_nodes ();
491  bitmap_obstack_release (NULL);
492  if (flag_lto_partition == LTO_PARTITION_1TO1)
493    lto_1_to_1_map ();
494  else if (flag_lto_partition == LTO_PARTITION_MAX)
495    lto_max_map ();
496  else if (flag_lto_partition == LTO_PARTITION_ONE)
497    lto_balanced_map (1, INT_MAX);
498  else if (flag_lto_partition == LTO_PARTITION_BALANCED)
499    lto_balanced_map (param_lto_partitions,
500		      param_max_partition_size);
501  else
502    gcc_unreachable ();
503
504  /* Size summaries are needed for balanced partitioning.  Free them now so
505     the memory can be used for streamer caches.  */
506  ipa_free_size_summary ();
507
508  /* AUX pointers are used by partitioning code to bookkeep number of
509     partitions symbol is in.  This is no longer needed.  */
510  FOR_EACH_SYMBOL (node)
511    node->aux = NULL;
512
513  lto_stats.num_cgraph_partitions += ltrans_partitions.length ();
514
515  /* Find out statics that need to be promoted
516     to globals with hidden visibility because they are accessed from multiple
517     partitions.  */
518  lto_promote_cross_file_statics ();
519  if (dump_file)
520     dump_end (partition_dump_id, dump_file);
521  dump_file = NULL;
522  timevar_pop (TV_WHOPR_PARTITIONING);
523
524  timevar_stop (TV_PHASE_OPT_GEN);
525
526  /* Collect a last time - in lto_wpa_write_files we may end up forking
527     with the idea that this doesn't increase memory usage.  So we
528     absoultely do not want to collect after that.  */
529  ggc_collect ();
530
531  timevar_start (TV_PHASE_STREAM_OUT);
532  if (!quiet_flag)
533    {
534      fprintf (stderr, "\nStreaming out");
535      fflush (stderr);
536    }
537  lto_wpa_write_files ();
538  if (!quiet_flag)
539    fprintf (stderr, "\n");
540  timevar_stop (TV_PHASE_STREAM_OUT);
541
542  if (post_ipa_mem_report)
543    dump_memory_report ("Memory consumption after IPA");
544
545  /* Show the LTO report before launching LTRANS.  */
546  if (flag_lto_report || (flag_wpa && flag_lto_report_wpa))
547    print_lto_report_1 ();
548  if (mem_report_wpa)
549    dump_memory_report ("Final");
550}
551
552/* Create artificial pointers for "omp declare target link" vars.  */
553
554static void
555offload_handle_link_vars (void)
556{
557#ifdef ACCEL_COMPILER
558  varpool_node *var;
559  FOR_EACH_VARIABLE (var)
560    if (lookup_attribute ("omp declare target link",
561			  DECL_ATTRIBUTES (var->decl)))
562      {
563	tree type = build_pointer_type (TREE_TYPE (var->decl));
564	tree link_ptr_var = build_decl (UNKNOWN_LOCATION, VAR_DECL,
565					clone_function_name (var->decl,
566							     "linkptr"), type);
567	TREE_USED (link_ptr_var) = 1;
568	TREE_STATIC (link_ptr_var) = 1;
569	TREE_PUBLIC (link_ptr_var) = TREE_PUBLIC (var->decl);
570	DECL_ARTIFICIAL (link_ptr_var) = 1;
571	SET_DECL_ASSEMBLER_NAME (link_ptr_var, DECL_NAME (link_ptr_var));
572	SET_DECL_VALUE_EXPR (var->decl, build_simple_mem_ref (link_ptr_var));
573	DECL_HAS_VALUE_EXPR_P (var->decl) = 1;
574      }
575#endif
576}
577
578unsigned int
579lto_option_lang_mask (void)
580{
581  return CL_LTO;
582}
583
584/* Main entry point for the GIMPLE front end.  This front end has
585   three main personalities:
586
587   - LTO (-flto).  All the object files on the command line are
588     loaded in memory and processed as a single translation unit.
589     This is the traditional link-time optimization behavior.
590
591   - WPA (-fwpa).  Only the callgraph and summary information for
592     files in the command file are loaded.  A single callgraph
593     (without function bodies) is instantiated for the whole set of
594     files.  IPA passes are only allowed to analyze the call graph
595     and make transformation decisions.  The callgraph is
596     partitioned, each partition is written to a new object file
597     together with the transformation decisions.
598
599   - LTRANS (-fltrans).  Similar to -flto but it prevents the IPA
600     summary files from running again.  Since WPA computed summary
601     information and decided what transformations to apply, LTRANS
602     simply applies them.  */
603
604void
605lto_main (void)
606{
607  /* LTO is called as a front end, even though it is not a front end.
608     Because it is called as a front end, TV_PHASE_PARSING and
609     TV_PARSE_GLOBAL are active, and we need to turn them off while
610     doing LTO.  Later we turn them back on so they are active up in
611     toplev.c.  */
612  timevar_pop (TV_PARSE_GLOBAL);
613  timevar_stop (TV_PHASE_PARSING);
614
615  timevar_start (TV_PHASE_SETUP);
616
617  /* Initialize the LTO front end.  */
618  lto_fe_init ();
619
620  timevar_stop (TV_PHASE_SETUP);
621  timevar_start (TV_PHASE_STREAM_IN);
622
623  /* Read all the symbols and call graph from all the files in the
624     command line.  */
625  read_cgraph_and_symbols (num_in_fnames, in_fnames);
626
627  timevar_stop (TV_PHASE_STREAM_IN);
628
629  if (!seen_error ())
630    {
631      offload_handle_link_vars ();
632
633      /* If WPA is enabled analyze the whole call graph and create an
634	 optimization plan.  Otherwise, read in all the function
635	 bodies and continue with optimization.  */
636      if (flag_wpa)
637	do_whole_program_analysis ();
638      else
639	{
640	  timevar_start (TV_PHASE_OPT_GEN);
641
642	  materialize_cgraph ();
643	  if (!flag_ltrans)
644	    lto_promote_statics_nonwpa ();
645
646	  /* Annotate the CU DIE and mark the early debug phase as finished.  */
647	  debuginfo_early_start ();
648	  debug_hooks->early_finish ("<artificial>");
649	  debuginfo_early_stop ();
650
651	  /* Let the middle end know that we have read and merged all of
652	     the input files.  */
653	  symtab->compile ();
654
655	  timevar_stop (TV_PHASE_OPT_GEN);
656
657	  /* FIXME lto, if the processes spawned by WPA fail, we miss
658	     the chance to print WPA's report, so WPA will call
659	     print_lto_report before launching LTRANS.  If LTRANS was
660	     launched directly by the driver we would not need to do
661	     this.  */
662	  if (flag_lto_report || (flag_wpa && flag_lto_report_wpa))
663	    print_lto_report_1 ();
664	}
665    }
666
667  /* Here we make LTO pretend to be a parser.  */
668  timevar_start (TV_PHASE_PARSING);
669  timevar_push (TV_PARSE_GLOBAL);
670}
671