1/* Top-level LTO routines. 2 Copyright (C) 2009-2020 Free Software Foundation, Inc. 3 Contributed by CodeSourcery, Inc. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License 18along with GCC; see the file COPYING3. If not see 19<http://www.gnu.org/licenses/>. */ 20 21#include "config.h" 22#include "system.h" 23#include "coretypes.h" 24#include "tm.h" 25#include "function.h" 26#include "bitmap.h" 27#include "basic-block.h" 28#include "tree.h" 29#include "gimple.h" 30#include "cfghooks.h" 31#include "alloc-pool.h" 32#include "tree-pass.h" 33#include "tree-streamer.h" 34#include "cgraph.h" 35#include "opts.h" 36#include "toplev.h" 37#include "stor-layout.h" 38#include "symbol-summary.h" 39#include "tree-vrp.h" 40#include "ipa-prop.h" 41#include "debug.h" 42#include "lto.h" 43#include "lto-section-names.h" 44#include "splay-tree.h" 45#include "lto-partition.h" 46#include "context.h" 47#include "pass_manager.h" 48#include "ipa-fnsummary.h" 49#include "ipa-utils.h" 50#include "gomp-constants.h" 51#include "lto-symtab.h" 52#include "stringpool.h" 53#include "fold-const.h" 54#include "attribs.h" 55#include "builtins.h" 56#include "lto-common.h" 57 58 59/* Number of parallel tasks to run, -1 if we want to use GNU Make jobserver. */ 60static int lto_parallelism; 61 62/* Return true when NODE has a clone that is analyzed (i.e. we need 63 to load its body even if the node itself is not needed). */ 64 65static bool 66has_analyzed_clone_p (struct cgraph_node *node) 67{ 68 struct cgraph_node *orig = node; 69 node = node->clones; 70 if (node) 71 while (node != orig) 72 { 73 if (node->analyzed) 74 return true; 75 if (node->clones) 76 node = node->clones; 77 else if (node->next_sibling_clone) 78 node = node->next_sibling_clone; 79 else 80 { 81 while (node != orig && !node->next_sibling_clone) 82 node = node->clone_of; 83 if (node != orig) 84 node = node->next_sibling_clone; 85 } 86 } 87 return false; 88} 89 90/* Read the function body for the function associated with NODE. */ 91 92static void 93lto_materialize_function (struct cgraph_node *node) 94{ 95 tree decl; 96 97 decl = node->decl; 98 /* Read in functions with body (analyzed nodes) 99 and also functions that are needed to produce virtual clones. */ 100 if ((node->has_gimple_body_p () && node->analyzed) 101 || node->used_as_abstract_origin 102 || has_analyzed_clone_p (node)) 103 { 104 /* Clones don't need to be read. */ 105 if (node->clone_of) 106 return; 107 if (DECL_FUNCTION_PERSONALITY (decl) && !first_personality_decl) 108 first_personality_decl = DECL_FUNCTION_PERSONALITY (decl); 109 /* If the file contains a function with a language specific EH 110 personality set or with EH enabled initialize the backend EH 111 machinery. */ 112 if (DECL_FUNCTION_PERSONALITY (decl) 113 || opt_for_fn (decl, flag_exceptions)) 114 lto_init_eh (); 115 } 116 117 /* Let the middle end know about the function. */ 118 rest_of_decl_compilation (decl, 1, 0); 119} 120 121/* Materialize all the bodies for all the nodes in the callgraph. */ 122 123static void 124materialize_cgraph (void) 125{ 126 struct cgraph_node *node; 127 timevar_id_t lto_timer; 128 129 if (!quiet_flag) 130 fprintf (stderr, 131 flag_wpa ? "Materializing decls:" : "Reading function bodies:"); 132 133 134 FOR_EACH_FUNCTION (node) 135 { 136 if (node->lto_file_data) 137 { 138 lto_materialize_function (node); 139 lto_stats.num_input_cgraph_nodes++; 140 } 141 } 142 143 144 /* Start the appropriate timer depending on the mode that we are 145 operating in. */ 146 lto_timer = (flag_wpa) ? TV_WHOPR_WPA 147 : (flag_ltrans) ? TV_WHOPR_LTRANS 148 : TV_LTO; 149 timevar_push (lto_timer); 150 151 current_function_decl = NULL; 152 set_cfun (NULL); 153 154 if (!quiet_flag) 155 fprintf (stderr, "\n"); 156 157 timevar_pop (lto_timer); 158} 159 160/* Actually stream out ENCODER into TEMP_FILENAME. */ 161 162static void 163stream_out (char *temp_filename, lto_symtab_encoder_t encoder, int part) 164{ 165 lto_file *file = lto_obj_file_open (temp_filename, true); 166 if (!file) 167 fatal_error (input_location, "%<lto_obj_file_open()%> failed"); 168 lto_set_current_out_file (file); 169 170 gcc_assert (!dump_file); 171 streamer_dump_file = dump_begin (TDI_lto_stream_out, NULL, part); 172 ipa_write_optimization_summaries (encoder); 173 174 free (CONST_CAST (char *, file->filename)); 175 176 lto_set_current_out_file (NULL); 177 lto_obj_file_close (file); 178 free (file); 179 if (streamer_dump_file) 180 { 181 dump_end (TDI_lto_stream_out, streamer_dump_file); 182 streamer_dump_file = NULL; 183 } 184} 185 186/* Wait for forked process and signal errors. */ 187#ifdef HAVE_WORKING_FORK 188static void 189wait_for_child () 190{ 191 int status; 192 do 193 { 194#ifndef WCONTINUED 195#define WCONTINUED 0 196#endif 197 int w = waitpid (0, &status, WUNTRACED | WCONTINUED); 198 if (w == -1) 199 fatal_error (input_location, "waitpid failed"); 200 201 if (WIFEXITED (status) && WEXITSTATUS (status)) 202 fatal_error (input_location, "streaming subprocess failed"); 203 else if (WIFSIGNALED (status)) 204 fatal_error (input_location, 205 "streaming subprocess was killed by signal"); 206 } 207 while (!WIFEXITED (status) && !WIFSIGNALED (status)); 208} 209#endif 210 211static void 212stream_out_partitions_1 (char *temp_filename, int blen, int min, int max) 213{ 214 /* Write all the nodes in SET. */ 215 for (int p = min; p < max; p ++) 216 { 217 sprintf (temp_filename + blen, "%u.o", p); 218 stream_out (temp_filename, ltrans_partitions[p]->encoder, p); 219 ltrans_partitions[p]->encoder = NULL; 220 } 221} 222 223/* Stream out ENCODER into TEMP_FILENAME 224 Fork if that seems to help. */ 225 226static void 227stream_out_partitions (char *temp_filename, int blen, int min, int max, 228 bool ARG_UNUSED (last)) 229{ 230#ifdef HAVE_WORKING_FORK 231 static int nruns; 232 233 if (lto_parallelism <= 1) 234 { 235 stream_out_partitions_1 (temp_filename, blen, min, max); 236 return; 237 } 238 239 /* Do not run more than LTO_PARALLELISM streamings 240 FIXME: we ignore limits on jobserver. */ 241 if (lto_parallelism > 0 && nruns >= lto_parallelism) 242 { 243 wait_for_child (); 244 nruns --; 245 } 246 /* If this is not the last parallel partition, execute new 247 streaming process. */ 248 if (!last) 249 { 250 pid_t cpid = fork (); 251 252 if (!cpid) 253 { 254 setproctitle ("lto1-wpa-streaming"); 255 stream_out_partitions_1 (temp_filename, blen, min, max); 256 exit (0); 257 } 258 /* Fork failed; lets do the job ourseleves. */ 259 else if (cpid == -1) 260 stream_out_partitions_1 (temp_filename, blen, min, max); 261 else 262 nruns++; 263 } 264 /* Last partition; stream it and wait for all children to die. */ 265 else 266 { 267 int i; 268 stream_out_partitions_1 (temp_filename, blen, min, max); 269 for (i = 0; i < nruns; i++) 270 wait_for_child (); 271 } 272 asm_nodes_output = true; 273#else 274 stream_out_partitions_1 (temp_filename, blen, min, max); 275#endif 276} 277 278/* Write all output files in WPA mode and the file with the list of 279 LTRANS units. */ 280 281static void 282lto_wpa_write_files (void) 283{ 284 unsigned i, n_sets; 285 ltrans_partition part; 286 FILE *ltrans_output_list_stream; 287 char *temp_filename; 288 auto_vec <char *>temp_filenames; 289 auto_vec <int>temp_priority; 290 size_t blen; 291 292 /* Open the LTRANS output list. */ 293 if (!ltrans_output_list) 294 fatal_error (input_location, "no LTRANS output list filename provided"); 295 296 timevar_push (TV_WHOPR_WPA); 297 298 FOR_EACH_VEC_ELT (ltrans_partitions, i, part) 299 lto_stats.num_output_symtab_nodes 300 += lto_symtab_encoder_size (part->encoder); 301 302 timevar_pop (TV_WHOPR_WPA); 303 304 timevar_push (TV_WHOPR_WPA_IO); 305 306 cgraph_node *node; 307 /* Do body modifications needed for streaming before we fork out 308 worker processes. */ 309 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) 310 if (!node->clone_of && gimple_has_body_p (node->decl)) 311 lto_prepare_function_for_streaming (node); 312 313 ggc_trim (); 314 report_heap_memory_use (); 315 316 /* Generate a prefix for the LTRANS unit files. */ 317 blen = strlen (ltrans_output_list); 318 temp_filename = (char *) xmalloc (blen + sizeof ("2147483648.o")); 319 strcpy (temp_filename, ltrans_output_list); 320 if (blen > sizeof (".out") 321 && strcmp (temp_filename + blen - sizeof (".out") + 1, 322 ".out") == 0) 323 temp_filename[blen - sizeof (".out") + 1] = '\0'; 324 blen = strlen (temp_filename); 325 326 n_sets = ltrans_partitions.length (); 327 unsigned sets_per_worker = n_sets; 328 if (lto_parallelism > 1) 329 { 330 if (lto_parallelism > (int)n_sets) 331 lto_parallelism = n_sets; 332 sets_per_worker = (n_sets + lto_parallelism - 1) / lto_parallelism; 333 } 334 335 for (i = 0; i < n_sets; i++) 336 { 337 ltrans_partition part = ltrans_partitions[i]; 338 339 /* Write all the nodes in SET. */ 340 sprintf (temp_filename + blen, "%u.o", i); 341 342 if (!quiet_flag) 343 fprintf (stderr, " %s (%s %i insns)", temp_filename, part->name, 344 part->insns); 345 if (symtab->dump_file) 346 { 347 lto_symtab_encoder_iterator lsei; 348 349 fprintf (symtab->dump_file, 350 "Writing partition %s to file %s, %i insns\n", 351 part->name, temp_filename, part->insns); 352 fprintf (symtab->dump_file, " Symbols in partition: "); 353 for (lsei = lsei_start_in_partition (part->encoder); 354 !lsei_end_p (lsei); 355 lsei_next_in_partition (&lsei)) 356 { 357 symtab_node *node = lsei_node (lsei); 358 fprintf (symtab->dump_file, "%s ", node->dump_asm_name ()); 359 } 360 fprintf (symtab->dump_file, "\n Symbols in boundary: "); 361 for (lsei = lsei_start (part->encoder); !lsei_end_p (lsei); 362 lsei_next (&lsei)) 363 { 364 symtab_node *node = lsei_node (lsei); 365 if (!lto_symtab_encoder_in_partition_p (part->encoder, node)) 366 { 367 fprintf (symtab->dump_file, "%s ", node->dump_asm_name ()); 368 cgraph_node *cnode = dyn_cast <cgraph_node *> (node); 369 if (cnode 370 && lto_symtab_encoder_encode_body_p (part->encoder, 371 cnode)) 372 fprintf (symtab->dump_file, "(body included)"); 373 else 374 { 375 varpool_node *vnode = dyn_cast <varpool_node *> (node); 376 if (vnode 377 && lto_symtab_encoder_encode_initializer_p (part->encoder, 378 vnode)) 379 fprintf (symtab->dump_file, "(initializer included)"); 380 } 381 } 382 } 383 fprintf (symtab->dump_file, "\n"); 384 } 385 gcc_checking_assert (lto_symtab_encoder_size (part->encoder) || !i); 386 387 temp_priority.safe_push (part->insns); 388 temp_filenames.safe_push (xstrdup (temp_filename)); 389 } 390 memory_block_pool::trim (0); 391 392 for (int set = 0; set < MAX (lto_parallelism, 1); set++) 393 { 394 stream_out_partitions (temp_filename, blen, set * sets_per_worker, 395 MIN ((set + 1) * sets_per_worker, n_sets), 396 set == MAX (lto_parallelism, 1) - 1); 397 } 398 399 ltrans_output_list_stream = fopen (ltrans_output_list, "w"); 400 if (ltrans_output_list_stream == NULL) 401 fatal_error (input_location, 402 "opening LTRANS output list %s: %m", ltrans_output_list); 403 for (i = 0; i < n_sets; i++) 404 { 405 unsigned int len = strlen (temp_filenames[i]); 406 if (fprintf (ltrans_output_list_stream, "%i\n", temp_priority[i]) < 0 407 || fwrite (temp_filenames[i], 1, len, ltrans_output_list_stream) < len 408 || fwrite ("\n", 1, 1, ltrans_output_list_stream) < 1) 409 fatal_error (input_location, "writing to LTRANS output list %s: %m", 410 ltrans_output_list); 411 free (temp_filenames[i]); 412 } 413 414 lto_stats.num_output_files += n_sets; 415 416 /* Close the LTRANS output list. */ 417 if (fclose (ltrans_output_list_stream)) 418 fatal_error (input_location, 419 "closing LTRANS output list %s: %m", ltrans_output_list); 420 421 free_ltrans_partitions (); 422 free (temp_filename); 423 424 timevar_pop (TV_WHOPR_WPA_IO); 425} 426 427/* Perform whole program analysis (WPA) on the callgraph and write out the 428 optimization plan. */ 429 430static void 431do_whole_program_analysis (void) 432{ 433 symtab_node *node; 434 435 lto_parallelism = 1; 436 437 /* TODO: jobserver communication is not supported, yet. */ 438 if (!strcmp (flag_wpa, "jobserver")) 439 lto_parallelism = param_max_lto_streaming_parallelism; 440 else 441 { 442 lto_parallelism = atoi (flag_wpa); 443 if (lto_parallelism <= 0) 444 lto_parallelism = 0; 445 if (lto_parallelism >= param_max_lto_streaming_parallelism) 446 lto_parallelism = param_max_lto_streaming_parallelism; 447 } 448 449 timevar_start (TV_PHASE_OPT_GEN); 450 451 /* Note that since we are in WPA mode, materialize_cgraph will not 452 actually read in all the function bodies. It only materializes 453 the decls and cgraph nodes so that analysis can be performed. */ 454 materialize_cgraph (); 455 456 /* Reading in the cgraph uses different timers, start timing WPA now. */ 457 timevar_push (TV_WHOPR_WPA); 458 459 if (pre_ipa_mem_report) 460 dump_memory_report ("Memory consumption before IPA"); 461 462 symtab->function_flags_ready = true; 463 464 if (symtab->dump_file) 465 symtab->dump (symtab->dump_file); 466 bitmap_obstack_initialize (NULL); 467 symtab->state = IPA_SSA; 468 469 execute_ipa_pass_list (g->get_passes ()->all_regular_ipa_passes); 470 471 /* When WPA analysis raises errors, do not bother to output anything. */ 472 if (seen_error ()) 473 return; 474 475 /* We are about to launch the final LTRANS phase, stop the WPA timer. */ 476 timevar_pop (TV_WHOPR_WPA); 477 478 /* We are no longer going to stream in anything. Free some memory. */ 479 lto_free_file_name_hash (); 480 481 482 timevar_push (TV_WHOPR_PARTITIONING); 483 484 gcc_assert (!dump_file); 485 dump_file = dump_begin (partition_dump_id, NULL); 486 487 if (dump_file) 488 symtab->dump (dump_file); 489 490 symtab_node::checking_verify_symtab_nodes (); 491 bitmap_obstack_release (NULL); 492 if (flag_lto_partition == LTO_PARTITION_1TO1) 493 lto_1_to_1_map (); 494 else if (flag_lto_partition == LTO_PARTITION_MAX) 495 lto_max_map (); 496 else if (flag_lto_partition == LTO_PARTITION_ONE) 497 lto_balanced_map (1, INT_MAX); 498 else if (flag_lto_partition == LTO_PARTITION_BALANCED) 499 lto_balanced_map (param_lto_partitions, 500 param_max_partition_size); 501 else 502 gcc_unreachable (); 503 504 /* Size summaries are needed for balanced partitioning. Free them now so 505 the memory can be used for streamer caches. */ 506 ipa_free_size_summary (); 507 508 /* AUX pointers are used by partitioning code to bookkeep number of 509 partitions symbol is in. This is no longer needed. */ 510 FOR_EACH_SYMBOL (node) 511 node->aux = NULL; 512 513 lto_stats.num_cgraph_partitions += ltrans_partitions.length (); 514 515 /* Find out statics that need to be promoted 516 to globals with hidden visibility because they are accessed from multiple 517 partitions. */ 518 lto_promote_cross_file_statics (); 519 if (dump_file) 520 dump_end (partition_dump_id, dump_file); 521 dump_file = NULL; 522 timevar_pop (TV_WHOPR_PARTITIONING); 523 524 timevar_stop (TV_PHASE_OPT_GEN); 525 526 /* Collect a last time - in lto_wpa_write_files we may end up forking 527 with the idea that this doesn't increase memory usage. So we 528 absoultely do not want to collect after that. */ 529 ggc_collect (); 530 531 timevar_start (TV_PHASE_STREAM_OUT); 532 if (!quiet_flag) 533 { 534 fprintf (stderr, "\nStreaming out"); 535 fflush (stderr); 536 } 537 lto_wpa_write_files (); 538 if (!quiet_flag) 539 fprintf (stderr, "\n"); 540 timevar_stop (TV_PHASE_STREAM_OUT); 541 542 if (post_ipa_mem_report) 543 dump_memory_report ("Memory consumption after IPA"); 544 545 /* Show the LTO report before launching LTRANS. */ 546 if (flag_lto_report || (flag_wpa && flag_lto_report_wpa)) 547 print_lto_report_1 (); 548 if (mem_report_wpa) 549 dump_memory_report ("Final"); 550} 551 552/* Create artificial pointers for "omp declare target link" vars. */ 553 554static void 555offload_handle_link_vars (void) 556{ 557#ifdef ACCEL_COMPILER 558 varpool_node *var; 559 FOR_EACH_VARIABLE (var) 560 if (lookup_attribute ("omp declare target link", 561 DECL_ATTRIBUTES (var->decl))) 562 { 563 tree type = build_pointer_type (TREE_TYPE (var->decl)); 564 tree link_ptr_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, 565 clone_function_name (var->decl, 566 "linkptr"), type); 567 TREE_USED (link_ptr_var) = 1; 568 TREE_STATIC (link_ptr_var) = 1; 569 TREE_PUBLIC (link_ptr_var) = TREE_PUBLIC (var->decl); 570 DECL_ARTIFICIAL (link_ptr_var) = 1; 571 SET_DECL_ASSEMBLER_NAME (link_ptr_var, DECL_NAME (link_ptr_var)); 572 SET_DECL_VALUE_EXPR (var->decl, build_simple_mem_ref (link_ptr_var)); 573 DECL_HAS_VALUE_EXPR_P (var->decl) = 1; 574 } 575#endif 576} 577 578unsigned int 579lto_option_lang_mask (void) 580{ 581 return CL_LTO; 582} 583 584/* Main entry point for the GIMPLE front end. This front end has 585 three main personalities: 586 587 - LTO (-flto). All the object files on the command line are 588 loaded in memory and processed as a single translation unit. 589 This is the traditional link-time optimization behavior. 590 591 - WPA (-fwpa). Only the callgraph and summary information for 592 files in the command file are loaded. A single callgraph 593 (without function bodies) is instantiated for the whole set of 594 files. IPA passes are only allowed to analyze the call graph 595 and make transformation decisions. The callgraph is 596 partitioned, each partition is written to a new object file 597 together with the transformation decisions. 598 599 - LTRANS (-fltrans). Similar to -flto but it prevents the IPA 600 summary files from running again. Since WPA computed summary 601 information and decided what transformations to apply, LTRANS 602 simply applies them. */ 603 604void 605lto_main (void) 606{ 607 /* LTO is called as a front end, even though it is not a front end. 608 Because it is called as a front end, TV_PHASE_PARSING and 609 TV_PARSE_GLOBAL are active, and we need to turn them off while 610 doing LTO. Later we turn them back on so they are active up in 611 toplev.c. */ 612 timevar_pop (TV_PARSE_GLOBAL); 613 timevar_stop (TV_PHASE_PARSING); 614 615 timevar_start (TV_PHASE_SETUP); 616 617 /* Initialize the LTO front end. */ 618 lto_fe_init (); 619 620 timevar_stop (TV_PHASE_SETUP); 621 timevar_start (TV_PHASE_STREAM_IN); 622 623 /* Read all the symbols and call graph from all the files in the 624 command line. */ 625 read_cgraph_and_symbols (num_in_fnames, in_fnames); 626 627 timevar_stop (TV_PHASE_STREAM_IN); 628 629 if (!seen_error ()) 630 { 631 offload_handle_link_vars (); 632 633 /* If WPA is enabled analyze the whole call graph and create an 634 optimization plan. Otherwise, read in all the function 635 bodies and continue with optimization. */ 636 if (flag_wpa) 637 do_whole_program_analysis (); 638 else 639 { 640 timevar_start (TV_PHASE_OPT_GEN); 641 642 materialize_cgraph (); 643 if (!flag_ltrans) 644 lto_promote_statics_nonwpa (); 645 646 /* Annotate the CU DIE and mark the early debug phase as finished. */ 647 debuginfo_early_start (); 648 debug_hooks->early_finish ("<artificial>"); 649 debuginfo_early_stop (); 650 651 /* Let the middle end know that we have read and merged all of 652 the input files. */ 653 symtab->compile (); 654 655 timevar_stop (TV_PHASE_OPT_GEN); 656 657 /* FIXME lto, if the processes spawned by WPA fail, we miss 658 the chance to print WPA's report, so WPA will call 659 print_lto_report before launching LTRANS. If LTRANS was 660 launched directly by the driver we would not need to do 661 this. */ 662 if (flag_lto_report || (flag_wpa && flag_lto_report_wpa)) 663 print_lto_report_1 (); 664 } 665 } 666 667 /* Here we make LTO pretend to be a parser. */ 668 timevar_start (TV_PHASE_PARSING); 669 timevar_push (TV_PARSE_GLOBAL); 670} 671