1//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the PassManagerBuilder class, which is used to set up a
10// "standard" optimization sequence suitable for languages like C and C++.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/IPO/PassManagerBuilder.h"
15#include "llvm-c/Transforms/PassManagerBuilder.h"
16#include "llvm/ADT/STLExtras.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/Analysis/GlobalsModRef.h"
19#include "llvm/Analysis/ScopedNoAliasAA.h"
20#include "llvm/Analysis/TargetLibraryInfo.h"
21#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
22#include "llvm/IR/LegacyPassManager.h"
23#include "llvm/Support/CommandLine.h"
24#include "llvm/Support/ManagedStatic.h"
25#include "llvm/Target/CGPassBuilderOption.h"
26#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
27#include "llvm/Transforms/IPO.h"
28#include "llvm/Transforms/IPO/Attributor.h"
29#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
30#include "llvm/Transforms/IPO/FunctionAttrs.h"
31#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
32#include "llvm/Transforms/InstCombine/InstCombine.h"
33#include "llvm/Transforms/Instrumentation.h"
34#include "llvm/Transforms/Scalar.h"
35#include "llvm/Transforms/Scalar/GVN.h"
36#include "llvm/Transforms/Scalar/LICM.h"
37#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
38#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
39#include "llvm/Transforms/Utils.h"
40#include "llvm/Transforms/Vectorize.h"
41
42using namespace llvm;
43
44PassManagerBuilder::PassManagerBuilder() {
45    OptLevel = 2;
46    SizeLevel = 0;
47    LibraryInfo = nullptr;
48    Inliner = nullptr;
49    DisableUnrollLoops = false;
50    SLPVectorize = false;
51    LoopVectorize = true;
52    LoopsInterleaved = true;
53    LicmMssaOptCap = SetLicmMssaOptCap;
54    LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
55    DisableGVNLoadPRE = false;
56    ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
57    VerifyInput = false;
58    VerifyOutput = false;
59    MergeFunctions = false;
60    DivergentTarget = false;
61    CallGraphProfile = true;
62}
63
64PassManagerBuilder::~PassManagerBuilder() {
65  delete LibraryInfo;
66  delete Inliner;
67}
68
69void PassManagerBuilder::addInitialAliasAnalysisPasses(
70    legacy::PassManagerBase &PM) const {
71  // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
72  // BasicAliasAnalysis wins if they disagree. This is intended to help
73  // support "obvious" type-punning idioms.
74  PM.add(createTypeBasedAAWrapperPass());
75  PM.add(createScopedNoAliasAAWrapperPass());
76}
77
78void PassManagerBuilder::populateFunctionPassManager(
79    legacy::FunctionPassManager &FPM) {
80  // Add LibraryInfo if we have some.
81  if (LibraryInfo)
82    FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
83
84  if (OptLevel == 0) return;
85
86  addInitialAliasAnalysisPasses(FPM);
87
88  // Lower llvm.expect to metadata before attempting transforms.
89  // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
90  FPM.add(createLowerExpectIntrinsicPass());
91  FPM.add(createCFGSimplificationPass());
92  FPM.add(createSROAPass());
93  FPM.add(createEarlyCSEPass());
94}
95
96void PassManagerBuilder::addFunctionSimplificationPasses(
97    legacy::PassManagerBase &MPM) {
98  // Start of function pass.
99  // Break up aggregate allocas, using SSAUpdater.
100  assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!");
101  MPM.add(createSROAPass());
102  MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies
103
104  if (OptLevel > 1) {
105    // Speculative execution if the target has divergent branches; otherwise nop.
106    MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
107
108    MPM.add(createJumpThreadingPass());         // Thread jumps.
109    MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
110  }
111  MPM.add(
112      createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
113          true))); // Merge & remove BBs
114  // Combine silly seq's
115  MPM.add(createInstructionCombiningPass());
116  if (SizeLevel == 0)
117    MPM.add(createLibCallsShrinkWrapPass());
118
119  // TODO: Investigate the cost/benefit of tail call elimination on debugging.
120  if (OptLevel > 1)
121    MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
122  MPM.add(
123      createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
124          true)));                            // Merge & remove BBs
125  MPM.add(createReassociatePass());           // Reassociate expressions
126
127  // Begin the loop pass pipeline.
128
129  // The simple loop unswitch pass relies on separate cleanup passes. Schedule
130  // them first so when we re-process a loop they run before other loop
131  // passes.
132  MPM.add(createLoopInstSimplifyPass());
133  MPM.add(createLoopSimplifyCFGPass());
134
135  // Try to remove as much code from the loop header as possible,
136  // to reduce amount of IR that will have to be duplicated. However,
137  // do not perform speculative hoisting the first time as LICM
138  // will destroy metadata that may not need to be destroyed if run
139  // after loop rotation.
140  // TODO: Investigate promotion cap for O1.
141  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
142                         /*AllowSpeculation=*/false));
143  // Rotate Loop - disable header duplication at -Oz
144  MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false));
145  // TODO: Investigate promotion cap for O1.
146  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
147                         /*AllowSpeculation=*/true));
148  MPM.add(createSimpleLoopUnswitchLegacyPass(OptLevel == 3));
149  // FIXME: We break the loop pass pipeline here in order to do full
150  // simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the
151  // need for this.
152  MPM.add(createCFGSimplificationPass(
153      SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
154  MPM.add(createInstructionCombiningPass());
155  // We resume loop passes creating a second loop pipeline here.
156  MPM.add(createLoopIdiomPass());             // Recognize idioms like memset.
157  MPM.add(createIndVarSimplifyPass());        // Canonicalize indvars
158  MPM.add(createLoopDeletionPass());          // Delete dead loops
159
160  // Unroll small loops and perform peeling.
161  MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
162                                     ForgetAllSCEVInLoopUnroll));
163  // This ends the loop pass pipelines.
164
165  // Break up allocas that may now be splittable after loop unrolling.
166  MPM.add(createSROAPass());
167
168  if (OptLevel > 1) {
169    MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
170    MPM.add(createGVNPass(DisableGVNLoadPRE));  // Remove redundancies
171  }
172  MPM.add(createSCCPPass());                  // Constant prop with SCCP
173
174  // Delete dead bit computations (instcombine runs after to fold away the dead
175  // computations, and then ADCE will run later to exploit any new DCE
176  // opportunities that creates).
177  MPM.add(createBitTrackingDCEPass());        // Delete dead bit computations
178
179  // Run instcombine after redundancy elimination to exploit opportunities
180  // opened up by them.
181  MPM.add(createInstructionCombiningPass());
182  if (OptLevel > 1) {
183    MPM.add(createJumpThreadingPass());         // Thread jumps
184    MPM.add(createCorrelatedValuePropagationPass());
185  }
186  MPM.add(createAggressiveDCEPass()); // Delete dead instructions
187
188  MPM.add(createMemCpyOptPass());               // Remove memcpy / form memset
189  // TODO: Investigate if this is too expensive at O1.
190  if (OptLevel > 1) {
191    MPM.add(createDeadStoreEliminationPass());  // Delete dead stores
192    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
193                           /*AllowSpeculation=*/true));
194  }
195
196  // Merge & remove BBs and sink & hoist common instructions.
197  MPM.add(createCFGSimplificationPass(
198      SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
199  // Clean up after everything.
200  MPM.add(createInstructionCombiningPass());
201}
202
203/// FIXME: Should LTO cause any differences to this set of passes?
204void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
205                                         bool IsFullLTO) {
206  PM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
207
208  if (IsFullLTO) {
209    // The vectorizer may have significantly shortened a loop body; unroll
210    // again. Unroll small loops to hide loop backedge latency and saturate any
211    // parallel execution resources of an out-of-order processor. We also then
212    // need to clean up redundancies and loop invariant code.
213    // FIXME: It would be really good to use a loop-integrated instruction
214    // combiner for cleanup here so that the unrolling and LICM can be pipelined
215    // across the loop nests.
216    PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
217                                ForgetAllSCEVInLoopUnroll));
218    PM.add(createWarnMissedTransformationsPass());
219  }
220
221  if (!IsFullLTO) {
222    // Eliminate loads by forwarding stores from the previous iteration to loads
223    // of the current iteration.
224    PM.add(createLoopLoadEliminationPass());
225  }
226  // Cleanup after the loop optimization passes.
227  PM.add(createInstructionCombiningPass());
228
229  // Now that we've formed fast to execute loop structures, we do further
230  // optimizations. These are run afterward as they might block doing complex
231  // analyses and transforms such as what are needed for loop vectorization.
232
233  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
234  // GVN, loop transforms, and others have already run, so it's now better to
235  // convert to more optimized IR using more aggressive simplify CFG options.
236  // The extra sinking transform can create larger basic blocks, so do this
237  // before SLP vectorization.
238  PM.add(createCFGSimplificationPass(SimplifyCFGOptions()
239                                         .forwardSwitchCondToPhi(true)
240                                         .convertSwitchRangeToICmp(true)
241                                         .convertSwitchToLookupTable(true)
242                                         .needCanonicalLoops(false)
243                                         .hoistCommonInsts(true)
244                                         .sinkCommonInsts(true)));
245
246  if (IsFullLTO) {
247    PM.add(createSCCPPass());                 // Propagate exposed constants
248    PM.add(createInstructionCombiningPass()); // Clean up again
249    PM.add(createBitTrackingDCEPass());
250  }
251
252  // Optimize parallel scalar instruction chains into SIMD instructions.
253  if (SLPVectorize) {
254    PM.add(createSLPVectorizerPass());
255  }
256
257  // Enhance/cleanup vector code.
258  PM.add(createVectorCombinePass());
259
260  if (!IsFullLTO) {
261    PM.add(createInstructionCombiningPass());
262
263    // Unroll small loops
264    PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
265                                ForgetAllSCEVInLoopUnroll));
266
267    if (!DisableUnrollLoops) {
268      // LoopUnroll may generate some redundency to cleanup.
269      PM.add(createInstructionCombiningPass());
270
271      // Runtime unrolling will introduce runtime check in loop prologue. If the
272      // unrolled loop is a inner loop, then the prologue will be inside the
273      // outer loop. LICM pass can help to promote the runtime check out if the
274      // checked value is loop invariant.
275      PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
276                            /*AllowSpeculation=*/true));
277    }
278
279    PM.add(createWarnMissedTransformationsPass());
280  }
281
282  // After vectorization and unrolling, assume intrinsics may tell us more
283  // about pointer alignments.
284  PM.add(createAlignmentFromAssumptionsPass());
285
286  if (IsFullLTO)
287    PM.add(createInstructionCombiningPass());
288}
289
290void PassManagerBuilder::populateModulePassManager(
291    legacy::PassManagerBase &MPM) {
292  MPM.add(createAnnotation2MetadataLegacyPass());
293
294  // Allow forcing function attributes as a debugging and tuning aid.
295  MPM.add(createForceFunctionAttrsLegacyPass());
296
297  // If all optimizations are disabled, just run the always-inline pass and,
298  // if enabled, the function merging pass.
299  if (OptLevel == 0) {
300    if (Inliner) {
301      MPM.add(Inliner);
302      Inliner = nullptr;
303    }
304
305    // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly
306    // creates a CGSCC pass manager, but we don't want to add extensions into
307    // that pass manager. To prevent this we insert a no-op module pass to reset
308    // the pass manager to get the same behavior as EP_OptimizerLast in non-O0
309    // builds. The function merging pass is
310    if (MergeFunctions)
311      MPM.add(createMergeFunctionsPass());
312    return;
313  }
314
315  // Add LibraryInfo if we have some.
316  if (LibraryInfo)
317    MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
318
319  addInitialAliasAnalysisPasses(MPM);
320
321  // Infer attributes about declarations if possible.
322  MPM.add(createInferFunctionAttrsLegacyPass());
323
324  if (OptLevel > 2)
325    MPM.add(createCallSiteSplittingPass());
326
327  MPM.add(createIPSCCPPass());          // IP SCCP
328  MPM.add(createCalledValuePropagationPass());
329
330  MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
331  // Promote any localized global vars.
332  MPM.add(createPromoteMemoryToRegisterPass());
333
334  MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
335
336  MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE
337  MPM.add(
338      createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
339          true))); // Clean up after IPCP & DAE
340
341  // We add a module alias analysis pass here. In part due to bugs in the
342  // analysis infrastructure this "works" in that the analysis stays alive
343  // for the entire SCC pass run below.
344  MPM.add(createGlobalsAAWrapperPass());
345
346  // Start of CallGraph SCC passes.
347  bool RunInliner = false;
348  if (Inliner) {
349    MPM.add(Inliner);
350    Inliner = nullptr;
351    RunInliner = true;
352  }
353
354  MPM.add(createPostOrderFunctionAttrsLegacyPass());
355
356  addFunctionSimplificationPasses(MPM);
357
358  // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
359  // pass manager that we are specifically trying to avoid. To prevent this
360  // we must insert a no-op module pass to reset the pass manager.
361  MPM.add(createBarrierNoopPass());
362
363  if (OptLevel > 1)
364    // Remove avail extern fns and globals definitions if we aren't
365    // compiling an object file for later LTO. For LTO we want to preserve
366    // these so they are eligible for inlining at link-time. Note if they
367    // are unreferenced they will be removed by GlobalDCE later, so
368    // this only impacts referenced available externally globals.
369    // Eventually they will be suppressed during codegen, but eliminating
370    // here enables more opportunity for GlobalDCE as it may make
371    // globals referenced by available external functions dead
372    // and saves running remaining passes on the eliminated functions.
373    MPM.add(createEliminateAvailableExternallyPass());
374
375  MPM.add(createReversePostOrderFunctionAttrsPass());
376
377  // The inliner performs some kind of dead code elimination as it goes,
378  // but there are cases that are not really caught by it. We might
379  // at some point consider teaching the inliner about them, but it
380  // is OK for now to run GlobalOpt + GlobalDCE in tandem as their
381  // benefits generally outweight the cost, making the whole pipeline
382  // faster.
383  if (RunInliner) {
384    MPM.add(createGlobalOptimizerPass());
385    MPM.add(createGlobalDCEPass());
386  }
387
388  // We add a fresh GlobalsModRef run at this point. This is particularly
389  // useful as the above will have inlined, DCE'ed, and function-attr
390  // propagated everything. We should at this point have a reasonably minimal
391  // and richly annotated call graph. By computing aliasing and mod/ref
392  // information for all local globals here, the late loop passes and notably
393  // the vectorizer will be able to use them to help recognize vectorizable
394  // memory operations.
395  //
396  // Note that this relies on a bug in the pass manager which preserves
397  // a module analysis into a function pass pipeline (and throughout it) so
398  // long as the first function pass doesn't invalidate the module analysis.
399  // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for
400  // this to work. Fortunately, it is trivial to preserve AliasAnalysis
401  // (doing nothing preserves it as it is required to be conservatively
402  // correct in the face of IR changes).
403  MPM.add(createGlobalsAAWrapperPass());
404
405  MPM.add(createFloat2IntPass());
406  MPM.add(createLowerConstantIntrinsicsPass());
407
408  // Re-rotate loops in all our loop nests. These may have fallout out of
409  // rotated form due to GVN or other transformations, and the vectorizer relies
410  // on the rotated form. Disable header duplication at -Oz.
411  MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false));
412
413  // Distribute loops to allow partial vectorization.  I.e. isolate dependences
414  // into separate loop that would otherwise inhibit vectorization.  This is
415  // currently only performed for loops marked with the metadata
416  // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
417  MPM.add(createLoopDistributePass());
418
419  addVectorPasses(MPM, /* IsFullLTO */ false);
420
421  // FIXME: We shouldn't bother with this anymore.
422  MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
423
424  // GlobalOpt already deletes dead functions and globals, at -O2 try a
425  // late pass of GlobalDCE.  It is capable of deleting dead cycles.
426  if (OptLevel > 1) {
427    MPM.add(createGlobalDCEPass());         // Remove dead fns and globals.
428    MPM.add(createConstantMergePass());     // Merge dup global constants
429  }
430
431  if (MergeFunctions)
432    MPM.add(createMergeFunctionsPass());
433
434  // LoopSink pass sinks instructions hoisted by LICM, which serves as a
435  // canonicalization pass that enables other optimizations. As a result,
436  // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
437  // result too early.
438  MPM.add(createLoopSinkPass());
439  // Get rid of LCSSA nodes.
440  MPM.add(createInstSimplifyLegacyPass());
441
442  // This hoists/decomposes div/rem ops. It should run after other sink/hoist
443  // passes to avoid re-sinking, but before SimplifyCFG because it can allow
444  // flattening of blocks.
445  MPM.add(createDivRemPairsPass());
446
447  // LoopSink (and other loop passes since the last simplifyCFG) might have
448  // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
449  MPM.add(createCFGSimplificationPass(
450      SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
451}
452
453LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
454  PassManagerBuilder *PMB = new PassManagerBuilder();
455  return wrap(PMB);
456}
457
458void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) {
459  PassManagerBuilder *Builder = unwrap(PMB);
460  delete Builder;
461}
462
463void
464LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB,
465                                  unsigned OptLevel) {
466  PassManagerBuilder *Builder = unwrap(PMB);
467  Builder->OptLevel = OptLevel;
468}
469
470void
471LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,
472                                   unsigned SizeLevel) {
473  PassManagerBuilder *Builder = unwrap(PMB);
474  Builder->SizeLevel = SizeLevel;
475}
476
477void
478LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB,
479                                            LLVMBool Value) {
480  // NOTE: The DisableUnitAtATime switch has been removed.
481}
482
483void
484LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB,
485                                            LLVMBool Value) {
486  PassManagerBuilder *Builder = unwrap(PMB);
487  Builder->DisableUnrollLoops = Value;
488}
489
490void
491LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB,
492                                                 LLVMBool Value) {
493  // NOTE: The simplify-libcalls pass has been removed.
494}
495
496void
497LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB,
498                                              unsigned Threshold) {
499  PassManagerBuilder *Builder = unwrap(PMB);
500  Builder->Inliner = createFunctionInliningPass(Threshold);
501}
502
503void
504LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
505                                                  LLVMPassManagerRef PM) {
506  PassManagerBuilder *Builder = unwrap(PMB);
507  legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM);
508  Builder->populateFunctionPassManager(*FPM);
509}
510
511void
512LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
513                                                LLVMPassManagerRef PM) {
514  PassManagerBuilder *Builder = unwrap(PMB);
515  legacy::PassManagerBase *MPM = unwrap(PM);
516  Builder->populateModulePassManager(*MPM);
517}
518