1//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the PassManagerBuilder class, which is used to set up a 10// "standard" optimization sequence suitable for languages like C and C++. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm/Transforms/IPO/PassManagerBuilder.h" 15#include "llvm-c/Transforms/PassManagerBuilder.h" 16#include "llvm/ADT/STLExtras.h" 17#include "llvm/ADT/SmallVector.h" 18#include "llvm/Analysis/GlobalsModRef.h" 19#include "llvm/Analysis/ScopedNoAliasAA.h" 20#include "llvm/Analysis/TargetLibraryInfo.h" 21#include "llvm/Analysis/TypeBasedAliasAnalysis.h" 22#include "llvm/IR/LegacyPassManager.h" 23#include "llvm/Support/CommandLine.h" 24#include "llvm/Support/ManagedStatic.h" 25#include "llvm/Target/CGPassBuilderOption.h" 26#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" 27#include "llvm/Transforms/IPO.h" 28#include "llvm/Transforms/IPO/Attributor.h" 29#include "llvm/Transforms/IPO/ForceFunctionAttrs.h" 30#include "llvm/Transforms/IPO/FunctionAttrs.h" 31#include "llvm/Transforms/IPO/InferFunctionAttrs.h" 32#include "llvm/Transforms/InstCombine/InstCombine.h" 33#include "llvm/Transforms/Instrumentation.h" 34#include "llvm/Transforms/Scalar.h" 35#include "llvm/Transforms/Scalar/GVN.h" 36#include "llvm/Transforms/Scalar/LICM.h" 37#include "llvm/Transforms/Scalar/LoopUnrollPass.h" 38#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" 39#include "llvm/Transforms/Utils.h" 40#include "llvm/Transforms/Vectorize.h" 41 42using namespace llvm; 43 44PassManagerBuilder::PassManagerBuilder() { 45 OptLevel = 2; 46 SizeLevel = 0; 47 LibraryInfo = nullptr; 48 Inliner = nullptr; 49 DisableUnrollLoops = false; 50 SLPVectorize = false; 51 LoopVectorize = true; 52 LoopsInterleaved = true; 53 LicmMssaOptCap = SetLicmMssaOptCap; 54 LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; 55 DisableGVNLoadPRE = false; 56 ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; 57 VerifyInput = false; 58 VerifyOutput = false; 59 MergeFunctions = false; 60 DivergentTarget = false; 61 CallGraphProfile = true; 62} 63 64PassManagerBuilder::~PassManagerBuilder() { 65 delete LibraryInfo; 66 delete Inliner; 67} 68 69void PassManagerBuilder::addInitialAliasAnalysisPasses( 70 legacy::PassManagerBase &PM) const { 71 // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that 72 // BasicAliasAnalysis wins if they disagree. This is intended to help 73 // support "obvious" type-punning idioms. 74 PM.add(createTypeBasedAAWrapperPass()); 75 PM.add(createScopedNoAliasAAWrapperPass()); 76} 77 78void PassManagerBuilder::populateFunctionPassManager( 79 legacy::FunctionPassManager &FPM) { 80 // Add LibraryInfo if we have some. 81 if (LibraryInfo) 82 FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); 83 84 if (OptLevel == 0) return; 85 86 addInitialAliasAnalysisPasses(FPM); 87 88 // Lower llvm.expect to metadata before attempting transforms. 89 // Compare/branch metadata may alter the behavior of passes like SimplifyCFG. 90 FPM.add(createLowerExpectIntrinsicPass()); 91 FPM.add(createCFGSimplificationPass()); 92 FPM.add(createSROAPass()); 93 FPM.add(createEarlyCSEPass()); 94} 95 96void PassManagerBuilder::addFunctionSimplificationPasses( 97 legacy::PassManagerBase &MPM) { 98 // Start of function pass. 99 // Break up aggregate allocas, using SSAUpdater. 100 assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!"); 101 MPM.add(createSROAPass()); 102 MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies 103 104 if (OptLevel > 1) { 105 // Speculative execution if the target has divergent branches; otherwise nop. 106 MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); 107 108 MPM.add(createJumpThreadingPass()); // Thread jumps. 109 MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals 110 } 111 MPM.add( 112 createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( 113 true))); // Merge & remove BBs 114 // Combine silly seq's 115 MPM.add(createInstructionCombiningPass()); 116 if (SizeLevel == 0) 117 MPM.add(createLibCallsShrinkWrapPass()); 118 119 // TODO: Investigate the cost/benefit of tail call elimination on debugging. 120 if (OptLevel > 1) 121 MPM.add(createTailCallEliminationPass()); // Eliminate tail calls 122 MPM.add( 123 createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( 124 true))); // Merge & remove BBs 125 MPM.add(createReassociatePass()); // Reassociate expressions 126 127 // Begin the loop pass pipeline. 128 129 // The simple loop unswitch pass relies on separate cleanup passes. Schedule 130 // them first so when we re-process a loop they run before other loop 131 // passes. 132 MPM.add(createLoopInstSimplifyPass()); 133 MPM.add(createLoopSimplifyCFGPass()); 134 135 // Try to remove as much code from the loop header as possible, 136 // to reduce amount of IR that will have to be duplicated. However, 137 // do not perform speculative hoisting the first time as LICM 138 // will destroy metadata that may not need to be destroyed if run 139 // after loop rotation. 140 // TODO: Investigate promotion cap for O1. 141 MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, 142 /*AllowSpeculation=*/false)); 143 // Rotate Loop - disable header duplication at -Oz 144 MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false)); 145 // TODO: Investigate promotion cap for O1. 146 MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, 147 /*AllowSpeculation=*/true)); 148 MPM.add(createSimpleLoopUnswitchLegacyPass(OptLevel == 3)); 149 // FIXME: We break the loop pass pipeline here in order to do full 150 // simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the 151 // need for this. 152 MPM.add(createCFGSimplificationPass( 153 SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 154 MPM.add(createInstructionCombiningPass()); 155 // We resume loop passes creating a second loop pipeline here. 156 MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. 157 MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars 158 MPM.add(createLoopDeletionPass()); // Delete dead loops 159 160 // Unroll small loops and perform peeling. 161 MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops, 162 ForgetAllSCEVInLoopUnroll)); 163 // This ends the loop pass pipelines. 164 165 // Break up allocas that may now be splittable after loop unrolling. 166 MPM.add(createSROAPass()); 167 168 if (OptLevel > 1) { 169 MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds 170 MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies 171 } 172 MPM.add(createSCCPPass()); // Constant prop with SCCP 173 174 // Delete dead bit computations (instcombine runs after to fold away the dead 175 // computations, and then ADCE will run later to exploit any new DCE 176 // opportunities that creates). 177 MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations 178 179 // Run instcombine after redundancy elimination to exploit opportunities 180 // opened up by them. 181 MPM.add(createInstructionCombiningPass()); 182 if (OptLevel > 1) { 183 MPM.add(createJumpThreadingPass()); // Thread jumps 184 MPM.add(createCorrelatedValuePropagationPass()); 185 } 186 MPM.add(createAggressiveDCEPass()); // Delete dead instructions 187 188 MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset 189 // TODO: Investigate if this is too expensive at O1. 190 if (OptLevel > 1) { 191 MPM.add(createDeadStoreEliminationPass()); // Delete dead stores 192 MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, 193 /*AllowSpeculation=*/true)); 194 } 195 196 // Merge & remove BBs and sink & hoist common instructions. 197 MPM.add(createCFGSimplificationPass( 198 SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true))); 199 // Clean up after everything. 200 MPM.add(createInstructionCombiningPass()); 201} 202 203/// FIXME: Should LTO cause any differences to this set of passes? 204void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, 205 bool IsFullLTO) { 206 PM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize)); 207 208 if (IsFullLTO) { 209 // The vectorizer may have significantly shortened a loop body; unroll 210 // again. Unroll small loops to hide loop backedge latency and saturate any 211 // parallel execution resources of an out-of-order processor. We also then 212 // need to clean up redundancies and loop invariant code. 213 // FIXME: It would be really good to use a loop-integrated instruction 214 // combiner for cleanup here so that the unrolling and LICM can be pipelined 215 // across the loop nests. 216 PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, 217 ForgetAllSCEVInLoopUnroll)); 218 PM.add(createWarnMissedTransformationsPass()); 219 } 220 221 if (!IsFullLTO) { 222 // Eliminate loads by forwarding stores from the previous iteration to loads 223 // of the current iteration. 224 PM.add(createLoopLoadEliminationPass()); 225 } 226 // Cleanup after the loop optimization passes. 227 PM.add(createInstructionCombiningPass()); 228 229 // Now that we've formed fast to execute loop structures, we do further 230 // optimizations. These are run afterward as they might block doing complex 231 // analyses and transforms such as what are needed for loop vectorization. 232 233 // Cleanup after loop vectorization, etc. Simplification passes like CVP and 234 // GVN, loop transforms, and others have already run, so it's now better to 235 // convert to more optimized IR using more aggressive simplify CFG options. 236 // The extra sinking transform can create larger basic blocks, so do this 237 // before SLP vectorization. 238 PM.add(createCFGSimplificationPass(SimplifyCFGOptions() 239 .forwardSwitchCondToPhi(true) 240 .convertSwitchRangeToICmp(true) 241 .convertSwitchToLookupTable(true) 242 .needCanonicalLoops(false) 243 .hoistCommonInsts(true) 244 .sinkCommonInsts(true))); 245 246 if (IsFullLTO) { 247 PM.add(createSCCPPass()); // Propagate exposed constants 248 PM.add(createInstructionCombiningPass()); // Clean up again 249 PM.add(createBitTrackingDCEPass()); 250 } 251 252 // Optimize parallel scalar instruction chains into SIMD instructions. 253 if (SLPVectorize) { 254 PM.add(createSLPVectorizerPass()); 255 } 256 257 // Enhance/cleanup vector code. 258 PM.add(createVectorCombinePass()); 259 260 if (!IsFullLTO) { 261 PM.add(createInstructionCombiningPass()); 262 263 // Unroll small loops 264 PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops, 265 ForgetAllSCEVInLoopUnroll)); 266 267 if (!DisableUnrollLoops) { 268 // LoopUnroll may generate some redundency to cleanup. 269 PM.add(createInstructionCombiningPass()); 270 271 // Runtime unrolling will introduce runtime check in loop prologue. If the 272 // unrolled loop is a inner loop, then the prologue will be inside the 273 // outer loop. LICM pass can help to promote the runtime check out if the 274 // checked value is loop invariant. 275 PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap, 276 /*AllowSpeculation=*/true)); 277 } 278 279 PM.add(createWarnMissedTransformationsPass()); 280 } 281 282 // After vectorization and unrolling, assume intrinsics may tell us more 283 // about pointer alignments. 284 PM.add(createAlignmentFromAssumptionsPass()); 285 286 if (IsFullLTO) 287 PM.add(createInstructionCombiningPass()); 288} 289 290void PassManagerBuilder::populateModulePassManager( 291 legacy::PassManagerBase &MPM) { 292 MPM.add(createAnnotation2MetadataLegacyPass()); 293 294 // Allow forcing function attributes as a debugging and tuning aid. 295 MPM.add(createForceFunctionAttrsLegacyPass()); 296 297 // If all optimizations are disabled, just run the always-inline pass and, 298 // if enabled, the function merging pass. 299 if (OptLevel == 0) { 300 if (Inliner) { 301 MPM.add(Inliner); 302 Inliner = nullptr; 303 } 304 305 // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly 306 // creates a CGSCC pass manager, but we don't want to add extensions into 307 // that pass manager. To prevent this we insert a no-op module pass to reset 308 // the pass manager to get the same behavior as EP_OptimizerLast in non-O0 309 // builds. The function merging pass is 310 if (MergeFunctions) 311 MPM.add(createMergeFunctionsPass()); 312 return; 313 } 314 315 // Add LibraryInfo if we have some. 316 if (LibraryInfo) 317 MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); 318 319 addInitialAliasAnalysisPasses(MPM); 320 321 // Infer attributes about declarations if possible. 322 MPM.add(createInferFunctionAttrsLegacyPass()); 323 324 if (OptLevel > 2) 325 MPM.add(createCallSiteSplittingPass()); 326 327 MPM.add(createIPSCCPPass()); // IP SCCP 328 MPM.add(createCalledValuePropagationPass()); 329 330 MPM.add(createGlobalOptimizerPass()); // Optimize out global vars 331 // Promote any localized global vars. 332 MPM.add(createPromoteMemoryToRegisterPass()); 333 334 MPM.add(createDeadArgEliminationPass()); // Dead argument elimination 335 336 MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE 337 MPM.add( 338 createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp( 339 true))); // Clean up after IPCP & DAE 340 341 // We add a module alias analysis pass here. In part due to bugs in the 342 // analysis infrastructure this "works" in that the analysis stays alive 343 // for the entire SCC pass run below. 344 MPM.add(createGlobalsAAWrapperPass()); 345 346 // Start of CallGraph SCC passes. 347 bool RunInliner = false; 348 if (Inliner) { 349 MPM.add(Inliner); 350 Inliner = nullptr; 351 RunInliner = true; 352 } 353 354 MPM.add(createPostOrderFunctionAttrsLegacyPass()); 355 356 addFunctionSimplificationPasses(MPM); 357 358 // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC 359 // pass manager that we are specifically trying to avoid. To prevent this 360 // we must insert a no-op module pass to reset the pass manager. 361 MPM.add(createBarrierNoopPass()); 362 363 if (OptLevel > 1) 364 // Remove avail extern fns and globals definitions if we aren't 365 // compiling an object file for later LTO. For LTO we want to preserve 366 // these so they are eligible for inlining at link-time. Note if they 367 // are unreferenced they will be removed by GlobalDCE later, so 368 // this only impacts referenced available externally globals. 369 // Eventually they will be suppressed during codegen, but eliminating 370 // here enables more opportunity for GlobalDCE as it may make 371 // globals referenced by available external functions dead 372 // and saves running remaining passes on the eliminated functions. 373 MPM.add(createEliminateAvailableExternallyPass()); 374 375 MPM.add(createReversePostOrderFunctionAttrsPass()); 376 377 // The inliner performs some kind of dead code elimination as it goes, 378 // but there are cases that are not really caught by it. We might 379 // at some point consider teaching the inliner about them, but it 380 // is OK for now to run GlobalOpt + GlobalDCE in tandem as their 381 // benefits generally outweight the cost, making the whole pipeline 382 // faster. 383 if (RunInliner) { 384 MPM.add(createGlobalOptimizerPass()); 385 MPM.add(createGlobalDCEPass()); 386 } 387 388 // We add a fresh GlobalsModRef run at this point. This is particularly 389 // useful as the above will have inlined, DCE'ed, and function-attr 390 // propagated everything. We should at this point have a reasonably minimal 391 // and richly annotated call graph. By computing aliasing and mod/ref 392 // information for all local globals here, the late loop passes and notably 393 // the vectorizer will be able to use them to help recognize vectorizable 394 // memory operations. 395 // 396 // Note that this relies on a bug in the pass manager which preserves 397 // a module analysis into a function pass pipeline (and throughout it) so 398 // long as the first function pass doesn't invalidate the module analysis. 399 // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for 400 // this to work. Fortunately, it is trivial to preserve AliasAnalysis 401 // (doing nothing preserves it as it is required to be conservatively 402 // correct in the face of IR changes). 403 MPM.add(createGlobalsAAWrapperPass()); 404 405 MPM.add(createFloat2IntPass()); 406 MPM.add(createLowerConstantIntrinsicsPass()); 407 408 // Re-rotate loops in all our loop nests. These may have fallout out of 409 // rotated form due to GVN or other transformations, and the vectorizer relies 410 // on the rotated form. Disable header duplication at -Oz. 411 MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false)); 412 413 // Distribute loops to allow partial vectorization. I.e. isolate dependences 414 // into separate loop that would otherwise inhibit vectorization. This is 415 // currently only performed for loops marked with the metadata 416 // llvm.loop.distribute=true or when -enable-loop-distribute is specified. 417 MPM.add(createLoopDistributePass()); 418 419 addVectorPasses(MPM, /* IsFullLTO */ false); 420 421 // FIXME: We shouldn't bother with this anymore. 422 MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes 423 424 // GlobalOpt already deletes dead functions and globals, at -O2 try a 425 // late pass of GlobalDCE. It is capable of deleting dead cycles. 426 if (OptLevel > 1) { 427 MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. 428 MPM.add(createConstantMergePass()); // Merge dup global constants 429 } 430 431 if (MergeFunctions) 432 MPM.add(createMergeFunctionsPass()); 433 434 // LoopSink pass sinks instructions hoisted by LICM, which serves as a 435 // canonicalization pass that enables other optimizations. As a result, 436 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM 437 // result too early. 438 MPM.add(createLoopSinkPass()); 439 // Get rid of LCSSA nodes. 440 MPM.add(createInstSimplifyLegacyPass()); 441 442 // This hoists/decomposes div/rem ops. It should run after other sink/hoist 443 // passes to avoid re-sinking, but before SimplifyCFG because it can allow 444 // flattening of blocks. 445 MPM.add(createDivRemPairsPass()); 446 447 // LoopSink (and other loop passes since the last simplifyCFG) might have 448 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. 449 MPM.add(createCFGSimplificationPass( 450 SimplifyCFGOptions().convertSwitchRangeToICmp(true))); 451} 452 453LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() { 454 PassManagerBuilder *PMB = new PassManagerBuilder(); 455 return wrap(PMB); 456} 457 458void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) { 459 PassManagerBuilder *Builder = unwrap(PMB); 460 delete Builder; 461} 462 463void 464LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB, 465 unsigned OptLevel) { 466 PassManagerBuilder *Builder = unwrap(PMB); 467 Builder->OptLevel = OptLevel; 468} 469 470void 471LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB, 472 unsigned SizeLevel) { 473 PassManagerBuilder *Builder = unwrap(PMB); 474 Builder->SizeLevel = SizeLevel; 475} 476 477void 478LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB, 479 LLVMBool Value) { 480 // NOTE: The DisableUnitAtATime switch has been removed. 481} 482 483void 484LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB, 485 LLVMBool Value) { 486 PassManagerBuilder *Builder = unwrap(PMB); 487 Builder->DisableUnrollLoops = Value; 488} 489 490void 491LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB, 492 LLVMBool Value) { 493 // NOTE: The simplify-libcalls pass has been removed. 494} 495 496void 497LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB, 498 unsigned Threshold) { 499 PassManagerBuilder *Builder = unwrap(PMB); 500 Builder->Inliner = createFunctionInliningPass(Threshold); 501} 502 503void 504LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB, 505 LLVMPassManagerRef PM) { 506 PassManagerBuilder *Builder = unwrap(PMB); 507 legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM); 508 Builder->populateFunctionPassManager(*FPM); 509} 510 511void 512LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, 513 LLVMPassManagerRef PM) { 514 PassManagerBuilder *Builder = unwrap(PMB); 515 legacy::PassManagerBase *MPM = unwrap(PM); 516 Builder->populateModulePassManager(*MPM); 517} 518