1//===- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop -------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This pass transforms loops that contain branches on loop-invariant conditions 10// to multiple loops. For example, it turns the left into the right code: 11// 12// for (...) if (lic) 13// A for (...) 14// if (lic) A; B; C 15// B else 16// C for (...) 17// A; C 18// 19// This can increase the size of the code exponentially (doubling it every time 20// a loop is unswitched) so we only unswitch if the resultant code will be 21// smaller than a threshold. 22// 23// This pass expects LICM to be run before it to hoist invariant conditions out 24// of the loop, to make the unswitching opportunity obvious. 25// 26//===----------------------------------------------------------------------===// 27 28#include "llvm/ADT/DenseMap.h" 29#include "llvm/ADT/SmallPtrSet.h" 30#include "llvm/ADT/SmallVector.h" 31#include "llvm/ADT/Statistic.h" 32#include "llvm/Analysis/AssumptionCache.h" 33#include "llvm/Analysis/CodeMetrics.h" 34#include "llvm/Analysis/InstructionSimplify.h" 35#include "llvm/Analysis/LegacyDivergenceAnalysis.h" 36#include "llvm/Analysis/LoopInfo.h" 37#include "llvm/Analysis/LoopIterator.h" 38#include "llvm/Analysis/LoopPass.h" 39#include "llvm/Analysis/MemorySSA.h" 40#include "llvm/Analysis/MemorySSAUpdater.h" 41#include "llvm/Analysis/MustExecute.h" 42#include "llvm/Analysis/ScalarEvolution.h" 43#include "llvm/Analysis/TargetTransformInfo.h" 44#include "llvm/IR/Attributes.h" 45#include "llvm/IR/BasicBlock.h" 46#include "llvm/IR/Constant.h" 47#include "llvm/IR/Constants.h" 48#include "llvm/IR/DerivedTypes.h" 49#include "llvm/IR/Dominators.h" 50#include "llvm/IR/Function.h" 51#include "llvm/IR/IRBuilder.h" 52#include "llvm/IR/InstrTypes.h" 53#include "llvm/IR/Instruction.h" 54#include "llvm/IR/Instructions.h" 55#include "llvm/IR/IntrinsicInst.h" 56#include "llvm/IR/Intrinsics.h" 57#include "llvm/IR/Module.h" 58#include "llvm/IR/Type.h" 59#include "llvm/IR/User.h" 60#include "llvm/IR/Value.h" 61#include "llvm/IR/ValueHandle.h" 62#include "llvm/InitializePasses.h" 63#include "llvm/Pass.h" 64#include "llvm/Support/Casting.h" 65#include "llvm/Support/CommandLine.h" 66#include "llvm/Support/Debug.h" 67#include "llvm/Support/raw_ostream.h" 68#include "llvm/Transforms/Scalar.h" 69#include "llvm/Transforms/Scalar/LoopPassManager.h" 70#include "llvm/Transforms/Utils/BasicBlockUtils.h" 71#include "llvm/Transforms/Utils/Cloning.h" 72#include "llvm/Transforms/Utils/Local.h" 73#include "llvm/Transforms/Utils/LoopUtils.h" 74#include "llvm/Transforms/Utils/ValueMapper.h" 75#include <algorithm> 76#include <cassert> 77#include <map> 78#include <set> 79#include <tuple> 80#include <utility> 81#include <vector> 82 83using namespace llvm; 84 85#define DEBUG_TYPE "loop-unswitch" 86 87STATISTIC(NumBranches, "Number of branches unswitched"); 88STATISTIC(NumSwitches, "Number of switches unswitched"); 89STATISTIC(NumGuards, "Number of guards unswitched"); 90STATISTIC(NumSelects , "Number of selects unswitched"); 91STATISTIC(NumTrivial , "Number of unswitches that are trivial"); 92STATISTIC(NumSimplify, "Number of simplifications of unswitched code"); 93STATISTIC(TotalInsts, "Total number of instructions analyzed"); 94 95// The specific value of 100 here was chosen based only on intuition and a 96// few specific examples. 97static cl::opt<unsigned> 98Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), 99 cl::init(100), cl::Hidden); 100 101namespace { 102 103 class LUAnalysisCache { 104 using UnswitchedValsMap = 105 DenseMap<const SwitchInst *, SmallPtrSet<const Value *, 8>>; 106 using UnswitchedValsIt = UnswitchedValsMap::iterator; 107 108 struct LoopProperties { 109 unsigned CanBeUnswitchedCount; 110 unsigned WasUnswitchedCount; 111 unsigned SizeEstimation; 112 UnswitchedValsMap UnswitchedVals; 113 }; 114 115 // Here we use std::map instead of DenseMap, since we need to keep valid 116 // LoopProperties pointer for current loop for better performance. 117 using LoopPropsMap = std::map<const Loop *, LoopProperties>; 118 using LoopPropsMapIt = LoopPropsMap::iterator; 119 120 LoopPropsMap LoopsProperties; 121 UnswitchedValsMap *CurLoopInstructions = nullptr; 122 LoopProperties *CurrentLoopProperties = nullptr; 123 124 // A loop unswitching with an estimated cost above this threshold 125 // is not performed. MaxSize is turned into unswitching quota for 126 // the current loop, and reduced correspondingly, though note that 127 // the quota is returned by releaseMemory() when the loop has been 128 // processed, so that MaxSize will return to its previous 129 // value. So in most cases MaxSize will equal the Threshold flag 130 // when a new loop is processed. An exception to that is that 131 // MaxSize will have a smaller value while processing nested loops 132 // that were introduced due to loop unswitching of an outer loop. 133 // 134 // FIXME: The way that MaxSize works is subtle and depends on the 135 // pass manager processing loops and calling releaseMemory() in a 136 // specific order. It would be good to find a more straightforward 137 // way of doing what MaxSize does. 138 unsigned MaxSize; 139 140 public: 141 LUAnalysisCache() : MaxSize(Threshold) {} 142 143 // Analyze loop. Check its size, calculate is it possible to unswitch 144 // it. Returns true if we can unswitch this loop. 145 bool countLoop(const Loop *L, const TargetTransformInfo &TTI, 146 AssumptionCache *AC); 147 148 // Clean all data related to given loop. 149 void forgetLoop(const Loop *L); 150 151 // Mark case value as unswitched. 152 // Since SI instruction can be partly unswitched, in order to avoid 153 // extra unswitching in cloned loops keep track all unswitched values. 154 void setUnswitched(const SwitchInst *SI, const Value *V); 155 156 // Check was this case value unswitched before or not. 157 bool isUnswitched(const SwitchInst *SI, const Value *V); 158 159 // Returns true if another unswitching could be done within the cost 160 // threshold. 161 bool costAllowsUnswitching(); 162 163 // Clone all loop-unswitch related loop properties. 164 // Redistribute unswitching quotas. 165 // Note, that new loop data is stored inside the VMap. 166 void cloneData(const Loop *NewLoop, const Loop *OldLoop, 167 const ValueToValueMapTy &VMap); 168 }; 169 170 class LoopUnswitch : public LoopPass { 171 LoopInfo *LI; // Loop information 172 LPPassManager *LPM; 173 AssumptionCache *AC; 174 175 // Used to check if second loop needs processing after 176 // rewriteLoopBodyWithConditionConstant rewrites first loop. 177 std::vector<Loop*> LoopProcessWorklist; 178 179 LUAnalysisCache BranchesInfo; 180 181 bool OptimizeForSize; 182 bool RedoLoop = false; 183 184 Loop *CurrentLoop = nullptr; 185 DominatorTree *DT = nullptr; 186 MemorySSA *MSSA = nullptr; 187 std::unique_ptr<MemorySSAUpdater> MSSAU; 188 BasicBlock *LoopHeader = nullptr; 189 BasicBlock *LoopPreheader = nullptr; 190 191 bool SanitizeMemory; 192 SimpleLoopSafetyInfo SafetyInfo; 193 194 // LoopBlocks contains all of the basic blocks of the loop, including the 195 // preheader of the loop, the body of the loop, and the exit blocks of the 196 // loop, in that order. 197 std::vector<BasicBlock*> LoopBlocks; 198 // NewBlocks contained cloned copy of basic blocks from LoopBlocks. 199 std::vector<BasicBlock*> NewBlocks; 200 201 bool HasBranchDivergence; 202 203 public: 204 static char ID; // Pass ID, replacement for typeid 205 206 explicit LoopUnswitch(bool Os = false, bool HasBranchDivergence = false) 207 : LoopPass(ID), OptimizeForSize(Os), 208 HasBranchDivergence(HasBranchDivergence) { 209 initializeLoopUnswitchPass(*PassRegistry::getPassRegistry()); 210 } 211 212 bool runOnLoop(Loop *L, LPPassManager &LPM) override; 213 bool processCurrentLoop(); 214 bool isUnreachableDueToPreviousUnswitching(BasicBlock *); 215 216 /// This transformation requires natural loop information & requires that 217 /// loop preheaders be inserted into the CFG. 218 /// 219 void getAnalysisUsage(AnalysisUsage &AU) const override { 220 AU.addRequired<AssumptionCacheTracker>(); 221 AU.addRequired<TargetTransformInfoWrapperPass>(); 222 if (EnableMSSALoopDependency) { 223 AU.addRequired<MemorySSAWrapperPass>(); 224 AU.addPreserved<MemorySSAWrapperPass>(); 225 } 226 if (HasBranchDivergence) 227 AU.addRequired<LegacyDivergenceAnalysis>(); 228 getLoopAnalysisUsage(AU); 229 } 230 231 private: 232 void releaseMemory() override { BranchesInfo.forgetLoop(CurrentLoop); } 233 234 void initLoopData() { 235 LoopHeader = CurrentLoop->getHeader(); 236 LoopPreheader = CurrentLoop->getLoopPreheader(); 237 } 238 239 /// Split all of the edges from inside the loop to their exit blocks. 240 /// Update the appropriate Phi nodes as we do so. 241 void splitExitEdges(Loop *L, 242 const SmallVectorImpl<BasicBlock *> &ExitBlocks); 243 244 bool tryTrivialLoopUnswitch(bool &Changed); 245 246 bool unswitchIfProfitable(Value *LoopCond, Constant *Val, 247 Instruction *TI = nullptr); 248 void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, 249 BasicBlock *ExitBlock, Instruction *TI); 250 void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L, 251 Instruction *TI); 252 253 void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, 254 Constant *Val, bool IsEqual); 255 256 void emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, 257 BasicBlock *TrueDest, 258 BasicBlock *FalseDest, 259 BranchInst *OldBranch, Instruction *TI); 260 261 void simplifyCode(std::vector<Instruction *> &Worklist, Loop *L); 262 263 /// Given that the Invariant is not equal to Val. Simplify instructions 264 /// in the loop. 265 Value *simplifyInstructionWithNotEqual(Instruction *Inst, Value *Invariant, 266 Constant *Val); 267 }; 268 269} // end anonymous namespace 270 271// Analyze loop. Check its size, calculate is it possible to unswitch 272// it. Returns true if we can unswitch this loop. 273bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI, 274 AssumptionCache *AC) { 275 LoopPropsMapIt PropsIt; 276 bool Inserted; 277 std::tie(PropsIt, Inserted) = 278 LoopsProperties.insert(std::make_pair(L, LoopProperties())); 279 280 LoopProperties &Props = PropsIt->second; 281 282 if (Inserted) { 283 // New loop. 284 285 // Limit the number of instructions to avoid causing significant code 286 // expansion, and the number of basic blocks, to avoid loops with 287 // large numbers of branches which cause loop unswitching to go crazy. 288 // This is a very ad-hoc heuristic. 289 290 SmallPtrSet<const Value *, 32> EphValues; 291 CodeMetrics::collectEphemeralValues(L, AC, EphValues); 292 293 // FIXME: This is overly conservative because it does not take into 294 // consideration code simplification opportunities and code that can 295 // be shared by the resultant unswitched loops. 296 CodeMetrics Metrics; 297 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; 298 ++I) 299 Metrics.analyzeBasicBlock(*I, TTI, EphValues); 300 301 Props.SizeEstimation = Metrics.NumInsts; 302 Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); 303 Props.WasUnswitchedCount = 0; 304 MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount; 305 306 if (Metrics.notDuplicatable) { 307 LLVM_DEBUG(dbgs() << "NOT unswitching loop %" << L->getHeader()->getName() 308 << ", contents cannot be " 309 << "duplicated!\n"); 310 return false; 311 } 312 } 313 314 // Be careful. This links are good only before new loop addition. 315 CurrentLoopProperties = &Props; 316 CurLoopInstructions = &Props.UnswitchedVals; 317 318 return true; 319} 320 321// Clean all data related to given loop. 322void LUAnalysisCache::forgetLoop(const Loop *L) { 323 LoopPropsMapIt LIt = LoopsProperties.find(L); 324 325 if (LIt != LoopsProperties.end()) { 326 LoopProperties &Props = LIt->second; 327 MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) * 328 Props.SizeEstimation; 329 LoopsProperties.erase(LIt); 330 } 331 332 CurrentLoopProperties = nullptr; 333 CurLoopInstructions = nullptr; 334} 335 336// Mark case value as unswitched. 337// Since SI instruction can be partly unswitched, in order to avoid 338// extra unswitching in cloned loops keep track all unswitched values. 339void LUAnalysisCache::setUnswitched(const SwitchInst *SI, const Value *V) { 340 (*CurLoopInstructions)[SI].insert(V); 341} 342 343// Check was this case value unswitched before or not. 344bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) { 345 return (*CurLoopInstructions)[SI].count(V); 346} 347 348bool LUAnalysisCache::costAllowsUnswitching() { 349 return CurrentLoopProperties->CanBeUnswitchedCount > 0; 350} 351 352// Clone all loop-unswitch related loop properties. 353// Redistribute unswitching quotas. 354// Note, that new loop data is stored inside the VMap. 355void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop, 356 const ValueToValueMapTy &VMap) { 357 LoopProperties &NewLoopProps = LoopsProperties[NewLoop]; 358 LoopProperties &OldLoopProps = *CurrentLoopProperties; 359 UnswitchedValsMap &Insts = OldLoopProps.UnswitchedVals; 360 361 // Reallocate "can-be-unswitched quota" 362 363 --OldLoopProps.CanBeUnswitchedCount; 364 ++OldLoopProps.WasUnswitchedCount; 365 NewLoopProps.WasUnswitchedCount = 0; 366 unsigned Quota = OldLoopProps.CanBeUnswitchedCount; 367 NewLoopProps.CanBeUnswitchedCount = Quota / 2; 368 OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2; 369 370 NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation; 371 372 // Clone unswitched values info: 373 // for new loop switches we clone info about values that was 374 // already unswitched and has redundant successors. 375 for (UnswitchedValsIt I = Insts.begin(); I != Insts.end(); ++I) { 376 const SwitchInst *OldInst = I->first; 377 Value *NewI = VMap.lookup(OldInst); 378 const SwitchInst *NewInst = cast_or_null<SwitchInst>(NewI); 379 assert(NewInst && "All instructions that are in SrcBB must be in VMap."); 380 381 NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst]; 382 } 383} 384 385char LoopUnswitch::ID = 0; 386 387INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops", 388 false, false) 389INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 390INITIALIZE_PASS_DEPENDENCY(LoopPass) 391INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 392INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) 393INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) 394INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops", 395 false, false) 396 397Pass *llvm::createLoopUnswitchPass(bool Os, bool HasBranchDivergence) { 398 return new LoopUnswitch(Os, HasBranchDivergence); 399} 400 401/// Operator chain lattice. 402enum OperatorChain { 403 OC_OpChainNone, ///< There is no operator. 404 OC_OpChainOr, ///< There are only ORs. 405 OC_OpChainAnd, ///< There are only ANDs. 406 OC_OpChainMixed ///< There are ANDs and ORs. 407}; 408 409/// Cond is a condition that occurs in L. If it is invariant in the loop, or has 410/// an invariant piece, return the invariant. Otherwise, return null. 411// 412/// NOTE: findLIVLoopCondition will not return a partial LIV by walking up a 413/// mixed operator chain, as we can not reliably find a value which will 414/// simplify the operator chain. If the chain is AND-only or OR-only, we can use 415/// 0 or ~0 to simplify the chain. 416/// 417/// NOTE: In case a partial LIV and a mixed operator chain, we may be able to 418/// simplify the condition itself to a loop variant condition, but at the 419/// cost of creating an entirely new loop. 420static Value *findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, 421 OperatorChain &ParentChain, 422 DenseMap<Value *, Value *> &Cache, 423 MemorySSAUpdater *MSSAU) { 424 auto CacheIt = Cache.find(Cond); 425 if (CacheIt != Cache.end()) 426 return CacheIt->second; 427 428 // We started analyze new instruction, increment scanned instructions counter. 429 ++TotalInsts; 430 431 // We can never unswitch on vector conditions. 432 if (Cond->getType()->isVectorTy()) 433 return nullptr; 434 435 // Constants should be folded, not unswitched on! 436 if (isa<Constant>(Cond)) return nullptr; 437 438 // TODO: Handle: br (VARIANT|INVARIANT). 439 440 // Hoist simple values out. 441 if (L->makeLoopInvariant(Cond, Changed, nullptr, MSSAU)) { 442 Cache[Cond] = Cond; 443 return Cond; 444 } 445 446 // Walk up the operator chain to find partial invariant conditions. 447 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond)) 448 if (BO->getOpcode() == Instruction::And || 449 BO->getOpcode() == Instruction::Or) { 450 // Given the previous operator, compute the current operator chain status. 451 OperatorChain NewChain; 452 switch (ParentChain) { 453 case OC_OpChainNone: 454 NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd : 455 OC_OpChainOr; 456 break; 457 case OC_OpChainOr: 458 NewChain = BO->getOpcode() == Instruction::Or ? OC_OpChainOr : 459 OC_OpChainMixed; 460 break; 461 case OC_OpChainAnd: 462 NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd : 463 OC_OpChainMixed; 464 break; 465 case OC_OpChainMixed: 466 NewChain = OC_OpChainMixed; 467 break; 468 } 469 470 // If we reach a Mixed state, we do not want to keep walking up as we can not 471 // reliably find a value that will simplify the chain. With this check, we 472 // will return null on the first sight of mixed chain and the caller will 473 // either backtrack to find partial LIV in other operand or return null. 474 if (NewChain != OC_OpChainMixed) { 475 // Update the current operator chain type before we search up the chain. 476 ParentChain = NewChain; 477 // If either the left or right side is invariant, we can unswitch on this, 478 // which will cause the branch to go away in one loop and the condition to 479 // simplify in the other one. 480 if (Value *LHS = findLIVLoopCondition(BO->getOperand(0), L, Changed, 481 ParentChain, Cache, MSSAU)) { 482 Cache[Cond] = LHS; 483 return LHS; 484 } 485 // We did not manage to find a partial LIV in operand(0). Backtrack and try 486 // operand(1). 487 ParentChain = NewChain; 488 if (Value *RHS = findLIVLoopCondition(BO->getOperand(1), L, Changed, 489 ParentChain, Cache, MSSAU)) { 490 Cache[Cond] = RHS; 491 return RHS; 492 } 493 } 494 } 495 496 Cache[Cond] = nullptr; 497 return nullptr; 498} 499 500/// Cond is a condition that occurs in L. If it is invariant in the loop, or has 501/// an invariant piece, return the invariant along with the operator chain type. 502/// Otherwise, return null. 503static std::pair<Value *, OperatorChain> 504findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, 505 MemorySSAUpdater *MSSAU) { 506 DenseMap<Value *, Value *> Cache; 507 OperatorChain OpChain = OC_OpChainNone; 508 Value *FCond = findLIVLoopCondition(Cond, L, Changed, OpChain, Cache, MSSAU); 509 510 // In case we do find a LIV, it can not be obtained by walking up a mixed 511 // operator chain. 512 assert((!FCond || OpChain != OC_OpChainMixed) && 513 "Do not expect a partial LIV with mixed operator chain"); 514 return {FCond, OpChain}; 515} 516 517bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) { 518 if (skipLoop(L)) 519 return false; 520 521 AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache( 522 *L->getHeader()->getParent()); 523 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 524 LPM = &LPMRef; 525 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 526 if (EnableMSSALoopDependency) { 527 MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); 528 MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 529 assert(DT && "Cannot update MemorySSA without a valid DomTree."); 530 } 531 CurrentLoop = L; 532 Function *F = CurrentLoop->getHeader()->getParent(); 533 534 SanitizeMemory = F->hasFnAttribute(Attribute::SanitizeMemory); 535 if (SanitizeMemory) 536 SafetyInfo.computeLoopSafetyInfo(L); 537 538 if (MSSA && VerifyMemorySSA) 539 MSSA->verifyMemorySSA(); 540 541 bool Changed = false; 542 do { 543 assert(CurrentLoop->isLCSSAForm(*DT)); 544 if (MSSA && VerifyMemorySSA) 545 MSSA->verifyMemorySSA(); 546 RedoLoop = false; 547 Changed |= processCurrentLoop(); 548 } while (RedoLoop); 549 550 if (MSSA && VerifyMemorySSA) 551 MSSA->verifyMemorySSA(); 552 553 return Changed; 554} 555 556// Return true if the BasicBlock BB is unreachable from the loop header. 557// Return false, otherwise. 558bool LoopUnswitch::isUnreachableDueToPreviousUnswitching(BasicBlock *BB) { 559 auto *Node = DT->getNode(BB)->getIDom(); 560 BasicBlock *DomBB = Node->getBlock(); 561 while (CurrentLoop->contains(DomBB)) { 562 BranchInst *BInst = dyn_cast<BranchInst>(DomBB->getTerminator()); 563 564 Node = DT->getNode(DomBB)->getIDom(); 565 DomBB = Node->getBlock(); 566 567 if (!BInst || !BInst->isConditional()) 568 continue; 569 570 Value *Cond = BInst->getCondition(); 571 if (!isa<ConstantInt>(Cond)) 572 continue; 573 574 BasicBlock *UnreachableSucc = 575 Cond == ConstantInt::getTrue(Cond->getContext()) 576 ? BInst->getSuccessor(1) 577 : BInst->getSuccessor(0); 578 579 if (DT->dominates(UnreachableSucc, BB)) 580 return true; 581 } 582 return false; 583} 584 585/// FIXME: Remove this workaround when freeze related patches are done. 586/// LoopUnswitch and Equality propagation in GVN have discrepancy about 587/// whether branch on undef/poison has undefine behavior. Here it is to 588/// rule out some common cases that we found such discrepancy already 589/// causing problems. Detail could be found in PR31652. Note if the 590/// func returns true, it is unsafe. But if it is false, it doesn't mean 591/// it is necessarily safe. 592static bool equalityPropUnSafe(Value &LoopCond) { 593 ICmpInst *CI = dyn_cast<ICmpInst>(&LoopCond); 594 if (!CI || !CI->isEquality()) 595 return false; 596 597 Value *LHS = CI->getOperand(0); 598 Value *RHS = CI->getOperand(1); 599 if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) 600 return true; 601 602 auto HasUndefInPHI = [](PHINode &PN) { 603 for (Value *Opd : PN.incoming_values()) { 604 if (isa<UndefValue>(Opd)) 605 return true; 606 } 607 return false; 608 }; 609 PHINode *LPHI = dyn_cast<PHINode>(LHS); 610 PHINode *RPHI = dyn_cast<PHINode>(RHS); 611 if ((LPHI && HasUndefInPHI(*LPHI)) || (RPHI && HasUndefInPHI(*RPHI))) 612 return true; 613 614 auto HasUndefInSelect = [](SelectInst &SI) { 615 if (isa<UndefValue>(SI.getTrueValue()) || 616 isa<UndefValue>(SI.getFalseValue())) 617 return true; 618 return false; 619 }; 620 SelectInst *LSI = dyn_cast<SelectInst>(LHS); 621 SelectInst *RSI = dyn_cast<SelectInst>(RHS); 622 if ((LSI && HasUndefInSelect(*LSI)) || (RSI && HasUndefInSelect(*RSI))) 623 return true; 624 return false; 625} 626 627/// Do actual work and unswitch loop if possible and profitable. 628bool LoopUnswitch::processCurrentLoop() { 629 bool Changed = false; 630 631 initLoopData(); 632 633 // If LoopSimplify was unable to form a preheader, don't do any unswitching. 634 if (!LoopPreheader) 635 return false; 636 637 // Loops with indirectbr cannot be cloned. 638 if (!CurrentLoop->isSafeToClone()) 639 return false; 640 641 // Without dedicated exits, splitting the exit edge may fail. 642 if (!CurrentLoop->hasDedicatedExits()) 643 return false; 644 645 LLVMContext &Context = LoopHeader->getContext(); 646 647 // Analyze loop cost, and stop unswitching if loop content can not be duplicated. 648 if (!BranchesInfo.countLoop( 649 CurrentLoop, 650 getAnalysis<TargetTransformInfoWrapperPass>().getTTI( 651 *CurrentLoop->getHeader()->getParent()), 652 AC)) 653 return false; 654 655 // Try trivial unswitch first before loop over other basic blocks in the loop. 656 if (tryTrivialLoopUnswitch(Changed)) { 657 return true; 658 } 659 660 // Do not do non-trivial unswitch while optimizing for size. 661 // FIXME: Use Function::hasOptSize(). 662 if (OptimizeForSize || 663 LoopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize)) 664 return false; 665 666 // Run through the instructions in the loop, keeping track of three things: 667 // 668 // - That we do not unswitch loops containing convergent operations, as we 669 // might be making them control dependent on the unswitch value when they 670 // were not before. 671 // FIXME: This could be refined to only bail if the convergent operation is 672 // not already control-dependent on the unswitch value. 673 // 674 // - That basic blocks in the loop contain invokes whose predecessor edges we 675 // cannot split. 676 // 677 // - The set of guard intrinsics encountered (these are non terminator 678 // instructions that are also profitable to be unswitched). 679 680 SmallVector<IntrinsicInst *, 4> Guards; 681 682 for (const auto BB : CurrentLoop->blocks()) { 683 for (auto &I : *BB) { 684 auto *CB = dyn_cast<CallBase>(&I); 685 if (!CB) 686 continue; 687 if (CB->isConvergent()) 688 return false; 689 if (auto *II = dyn_cast<InvokeInst>(&I)) 690 if (!II->getUnwindDest()->canSplitPredecessors()) 691 return false; 692 if (auto *II = dyn_cast<IntrinsicInst>(&I)) 693 if (II->getIntrinsicID() == Intrinsic::experimental_guard) 694 Guards.push_back(II); 695 } 696 } 697 698 for (IntrinsicInst *Guard : Guards) { 699 Value *LoopCond = findLIVLoopCondition(Guard->getOperand(0), CurrentLoop, 700 Changed, MSSAU.get()) 701 .first; 702 if (LoopCond && 703 unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { 704 // NB! Unswitching (if successful) could have erased some of the 705 // instructions in Guards leaving dangling pointers there. This is fine 706 // because we're returning now, and won't look at Guards again. 707 ++NumGuards; 708 return true; 709 } 710 } 711 712 // Loop over all of the basic blocks in the loop. If we find an interior 713 // block that is branching on a loop-invariant condition, we can unswitch this 714 // loop. 715 for (Loop::block_iterator I = CurrentLoop->block_begin(), 716 E = CurrentLoop->block_end(); 717 I != E; ++I) { 718 Instruction *TI = (*I)->getTerminator(); 719 720 // Unswitching on a potentially uninitialized predicate is not 721 // MSan-friendly. Limit this to the cases when the original predicate is 722 // guaranteed to execute, to avoid creating a use-of-uninitialized-value 723 // in the code that did not have one. 724 // This is a workaround for the discrepancy between LLVM IR and MSan 725 // semantics. See PR28054 for more details. 726 if (SanitizeMemory && 727 !SafetyInfo.isGuaranteedToExecute(*TI, DT, CurrentLoop)) 728 continue; 729 730 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { 731 // Some branches may be rendered unreachable because of previous 732 // unswitching. 733 // Unswitch only those branches that are reachable. 734 if (isUnreachableDueToPreviousUnswitching(*I)) 735 continue; 736 737 // If this isn't branching on an invariant condition, we can't unswitch 738 // it. 739 if (BI->isConditional()) { 740 // See if this, or some part of it, is loop invariant. If so, we can 741 // unswitch on it if we desire. 742 Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop, 743 Changed, MSSAU.get()) 744 .first; 745 if (LoopCond && !equalityPropUnSafe(*LoopCond) && 746 unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) { 747 ++NumBranches; 748 return true; 749 } 750 } 751 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { 752 Value *SC = SI->getCondition(); 753 Value *LoopCond; 754 OperatorChain OpChain; 755 std::tie(LoopCond, OpChain) = 756 findLIVLoopCondition(SC, CurrentLoop, Changed, MSSAU.get()); 757 758 unsigned NumCases = SI->getNumCases(); 759 if (LoopCond && NumCases) { 760 // Find a value to unswitch on: 761 // FIXME: this should chose the most expensive case! 762 // FIXME: scan for a case with a non-critical edge? 763 Constant *UnswitchVal = nullptr; 764 // Find a case value such that at least one case value is unswitched 765 // out. 766 if (OpChain == OC_OpChainAnd) { 767 // If the chain only has ANDs and the switch has a case value of 0. 768 // Dropping in a 0 to the chain will unswitch out the 0-casevalue. 769 auto *AllZero = cast<ConstantInt>(Constant::getNullValue(SC->getType())); 770 if (BranchesInfo.isUnswitched(SI, AllZero)) 771 continue; 772 // We are unswitching 0 out. 773 UnswitchVal = AllZero; 774 } else if (OpChain == OC_OpChainOr) { 775 // If the chain only has ORs and the switch has a case value of ~0. 776 // Dropping in a ~0 to the chain will unswitch out the ~0-casevalue. 777 auto *AllOne = cast<ConstantInt>(Constant::getAllOnesValue(SC->getType())); 778 if (BranchesInfo.isUnswitched(SI, AllOne)) 779 continue; 780 // We are unswitching ~0 out. 781 UnswitchVal = AllOne; 782 } else { 783 assert(OpChain == OC_OpChainNone && 784 "Expect to unswitch on trivial chain"); 785 // Do not process same value again and again. 786 // At this point we have some cases already unswitched and 787 // some not yet unswitched. Let's find the first not yet unswitched one. 788 for (auto Case : SI->cases()) { 789 Constant *UnswitchValCandidate = Case.getCaseValue(); 790 if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) { 791 UnswitchVal = UnswitchValCandidate; 792 break; 793 } 794 } 795 } 796 797 if (!UnswitchVal) 798 continue; 799 800 if (unswitchIfProfitable(LoopCond, UnswitchVal)) { 801 ++NumSwitches; 802 // In case of a full LIV, UnswitchVal is the value we unswitched out. 803 // In case of a partial LIV, we only unswitch when its an AND-chain 804 // or OR-chain. In both cases switch input value simplifies to 805 // UnswitchVal. 806 BranchesInfo.setUnswitched(SI, UnswitchVal); 807 return true; 808 } 809 } 810 } 811 812 // Scan the instructions to check for unswitchable values. 813 for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); 814 BBI != E; ++BBI) 815 if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) { 816 Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop, 817 Changed, MSSAU.get()) 818 .first; 819 if (LoopCond && 820 unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { 821 ++NumSelects; 822 return true; 823 } 824 } 825 } 826 return Changed; 827} 828 829/// Check to see if all paths from BB exit the loop with no side effects 830/// (including infinite loops). 831/// 832/// If true, we return true and set ExitBB to the block we 833/// exit through. 834/// 835static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, 836 BasicBlock *&ExitBB, 837 std::set<BasicBlock*> &Visited) { 838 if (!Visited.insert(BB).second) { 839 // Already visited. Without more analysis, this could indicate an infinite 840 // loop. 841 return false; 842 } 843 if (!L->contains(BB)) { 844 // Otherwise, this is a loop exit, this is fine so long as this is the 845 // first exit. 846 if (ExitBB) return false; 847 ExitBB = BB; 848 return true; 849 } 850 851 // Otherwise, this is an unvisited intra-loop node. Check all successors. 852 for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) { 853 // Check to see if the successor is a trivial loop exit. 854 if (!isTrivialLoopExitBlockHelper(L, *SI, ExitBB, Visited)) 855 return false; 856 } 857 858 // Okay, everything after this looks good, check to make sure that this block 859 // doesn't include any side effects. 860 for (Instruction &I : *BB) 861 if (I.mayHaveSideEffects()) 862 return false; 863 864 return true; 865} 866 867/// Return true if the specified block unconditionally leads to an exit from 868/// the specified loop, and has no side-effects in the process. If so, return 869/// the block that is exited to, otherwise return null. 870static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { 871 std::set<BasicBlock*> Visited; 872 Visited.insert(L->getHeader()); // Branches to header make infinite loops. 873 BasicBlock *ExitBB = nullptr; 874 if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited)) 875 return ExitBB; 876 return nullptr; 877} 878 879/// We have found that we can unswitch CurrentLoop when LoopCond == Val to 880/// simplify the loop. If we decide that this is profitable, 881/// unswitch the loop, reprocess the pieces, then return true. 882bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val, 883 Instruction *TI) { 884 // Check to see if it would be profitable to unswitch current loop. 885 if (!BranchesInfo.costAllowsUnswitching()) { 886 LLVM_DEBUG(dbgs() << "NOT unswitching loop %" 887 << CurrentLoop->getHeader()->getName() 888 << " at non-trivial condition '" << *Val 889 << "' == " << *LoopCond << "\n" 890 << ". Cost too high.\n"); 891 return false; 892 } 893 if (HasBranchDivergence && 894 getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) { 895 LLVM_DEBUG(dbgs() << "NOT unswitching loop %" 896 << CurrentLoop->getHeader()->getName() 897 << " at non-trivial condition '" << *Val 898 << "' == " << *LoopCond << "\n" 899 << ". Condition is divergent.\n"); 900 return false; 901 } 902 903 unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI); 904 return true; 905} 906 907/// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, 908/// otherwise branch to FalseDest. Insert the code immediately before OldBranch 909/// and remove (but not erase!) it from the function. 910void LoopUnswitch::emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, 911 BasicBlock *TrueDest, 912 BasicBlock *FalseDest, 913 BranchInst *OldBranch, 914 Instruction *TI) { 915 assert(OldBranch->isUnconditional() && "Preheader is not split correctly"); 916 assert(TrueDest != FalseDest && "Branch targets should be different"); 917 // Insert a conditional branch on LIC to the two preheaders. The original 918 // code is the true version and the new code is the false version. 919 Value *BranchVal = LIC; 920 bool Swapped = false; 921 if (!isa<ConstantInt>(Val) || 922 Val->getType() != Type::getInt1Ty(LIC->getContext())) 923 BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val); 924 else if (Val != ConstantInt::getTrue(Val->getContext())) { 925 // We want to enter the new loop when the condition is true. 926 std::swap(TrueDest, FalseDest); 927 Swapped = true; 928 } 929 930 // Old branch will be removed, so save its parent and successor to update the 931 // DomTree. 932 auto *OldBranchSucc = OldBranch->getSuccessor(0); 933 auto *OldBranchParent = OldBranch->getParent(); 934 935 // Insert the new branch. 936 BranchInst *BI = 937 IRBuilder<>(OldBranch).CreateCondBr(BranchVal, TrueDest, FalseDest, TI); 938 if (Swapped) 939 BI->swapProfMetadata(); 940 941 // Remove the old branch so there is only one branch at the end. This is 942 // needed to perform DomTree's internal DFS walk on the function's CFG. 943 OldBranch->removeFromParent(); 944 945 // Inform the DT about the new branch. 946 if (DT) { 947 // First, add both successors. 948 SmallVector<DominatorTree::UpdateType, 3> Updates; 949 if (TrueDest != OldBranchSucc) 950 Updates.push_back({DominatorTree::Insert, OldBranchParent, TrueDest}); 951 if (FalseDest != OldBranchSucc) 952 Updates.push_back({DominatorTree::Insert, OldBranchParent, FalseDest}); 953 // If both of the new successors are different from the old one, inform the 954 // DT that the edge was deleted. 955 if (OldBranchSucc != TrueDest && OldBranchSucc != FalseDest) { 956 Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc}); 957 } 958 DT->applyUpdates(Updates); 959 960 if (MSSAU) 961 MSSAU->applyUpdates(Updates, *DT); 962 } 963 964 // If either edge is critical, split it. This helps preserve LoopSimplify 965 // form for enclosing loops. 966 auto Options = 967 CriticalEdgeSplittingOptions(DT, LI, MSSAU.get()).setPreserveLCSSA(); 968 SplitCriticalEdge(BI, 0, Options); 969 SplitCriticalEdge(BI, 1, Options); 970} 971 972/// Given a loop that has a trivial unswitchable condition in it (a cond branch 973/// from its header block to its latch block, where the path through the loop 974/// that doesn't execute its body has no side-effects), unswitch it. This 975/// doesn't involve any code duplication, just moving the conditional branch 976/// outside of the loop and updating loop info. 977void LoopUnswitch::unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, 978 BasicBlock *ExitBlock, 979 Instruction *TI) { 980 LLVM_DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %" 981 << LoopHeader->getName() << " [" << L->getBlocks().size() 982 << " blocks] in Function " 983 << L->getHeader()->getParent()->getName() 984 << " on cond: " << *Val << " == " << *Cond << "\n"); 985 // We are going to make essential changes to CFG. This may invalidate cached 986 // information for L or one of its parent loops in SCEV. 987 if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>()) 988 SEWP->getSE().forgetTopmostLoop(L); 989 990 // First step, split the preheader, so that we know that there is a safe place 991 // to insert the conditional branch. We will change LoopPreheader to have a 992 // conditional branch on Cond. 993 BasicBlock *NewPH = SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get()); 994 995 // Now that we have a place to insert the conditional branch, create a place 996 // to branch to: this is the exit block out of the loop that we should 997 // short-circuit to. 998 999 // Split this block now, so that the loop maintains its exit block, and so 1000 // that the jump from the preheader can execute the contents of the exit block 1001 // without actually branching to it (the exit block should be dominated by the 1002 // loop header, not the preheader). 1003 assert(!L->contains(ExitBlock) && "Exit block is in the loop?"); 1004 BasicBlock *NewExit = 1005 SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI, MSSAU.get()); 1006 1007 // Okay, now we have a position to branch from and a position to branch to, 1008 // insert the new conditional branch. 1009 auto *OldBranch = dyn_cast<BranchInst>(LoopPreheader->getTerminator()); 1010 assert(OldBranch && "Failed to split the preheader"); 1011 emitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, OldBranch, TI); 1012 1013 // emitPreheaderBranchOnCondition removed the OldBranch from the function. 1014 // Delete it, as it is no longer needed. 1015 delete OldBranch; 1016 1017 // We need to reprocess this loop, it could be unswitched again. 1018 RedoLoop = true; 1019 1020 // Now that we know that the loop is never entered when this condition is a 1021 // particular value, rewrite the loop with this info. We know that this will 1022 // at least eliminate the old branch. 1023 rewriteLoopBodyWithConditionConstant(L, Cond, Val, /*IsEqual=*/false); 1024 1025 ++NumTrivial; 1026} 1027 1028/// Check if the first non-constant condition starting from the loop header is 1029/// a trivial unswitch condition: that is, a condition controls whether or not 1030/// the loop does anything at all. If it is a trivial condition, unswitching 1031/// produces no code duplications (equivalently, it produces a simpler loop and 1032/// a new empty loop, which gets deleted). Therefore always unswitch trivial 1033/// condition. 1034bool LoopUnswitch::tryTrivialLoopUnswitch(bool &Changed) { 1035 BasicBlock *CurrentBB = CurrentLoop->getHeader(); 1036 Instruction *CurrentTerm = CurrentBB->getTerminator(); 1037 LLVMContext &Context = CurrentBB->getContext(); 1038 1039 // If loop header has only one reachable successor (currently via an 1040 // unconditional branch or constant foldable conditional branch, but 1041 // should also consider adding constant foldable switch instruction in 1042 // future), we should keep looking for trivial condition candidates in 1043 // the successor as well. An alternative is to constant fold conditions 1044 // and merge successors into loop header (then we only need to check header's 1045 // terminator). The reason for not doing this in LoopUnswitch pass is that 1046 // it could potentially break LoopPassManager's invariants. Folding dead 1047 // branches could either eliminate the current loop or make other loops 1048 // unreachable. LCSSA form might also not be preserved after deleting 1049 // branches. The following code keeps traversing loop header's successors 1050 // until it finds the trivial condition candidate (condition that is not a 1051 // constant). Since unswitching generates branches with constant conditions, 1052 // this scenario could be very common in practice. 1053 SmallPtrSet<BasicBlock*, 8> Visited; 1054 1055 while (true) { 1056 // If we exit loop or reach a previous visited block, then 1057 // we can not reach any trivial condition candidates (unfoldable 1058 // branch instructions or switch instructions) and no unswitch 1059 // can happen. Exit and return false. 1060 if (!CurrentLoop->contains(CurrentBB) || !Visited.insert(CurrentBB).second) 1061 return false; 1062 1063 // Check if this loop will execute any side-effecting instructions (e.g. 1064 // stores, calls, volatile loads) in the part of the loop that the code 1065 // *would* execute. Check the header first. 1066 for (Instruction &I : *CurrentBB) 1067 if (I.mayHaveSideEffects()) 1068 return false; 1069 1070 if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) { 1071 if (BI->isUnconditional()) { 1072 CurrentBB = BI->getSuccessor(0); 1073 } else if (BI->getCondition() == ConstantInt::getTrue(Context)) { 1074 CurrentBB = BI->getSuccessor(0); 1075 } else if (BI->getCondition() == ConstantInt::getFalse(Context)) { 1076 CurrentBB = BI->getSuccessor(1); 1077 } else { 1078 // Found a trivial condition candidate: non-foldable conditional branch. 1079 break; 1080 } 1081 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) { 1082 // At this point, any constant-foldable instructions should have probably 1083 // been folded. 1084 ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); 1085 if (!Cond) 1086 break; 1087 // Find the target block we are definitely going to. 1088 CurrentBB = SI->findCaseValue(Cond)->getCaseSuccessor(); 1089 } else { 1090 // We do not understand these terminator instructions. 1091 break; 1092 } 1093 1094 CurrentTerm = CurrentBB->getTerminator(); 1095 } 1096 1097 // CondVal is the condition that controls the trivial condition. 1098 // LoopExitBB is the BasicBlock that loop exits when meets trivial condition. 1099 Constant *CondVal = nullptr; 1100 BasicBlock *LoopExitBB = nullptr; 1101 1102 if (BranchInst *BI = dyn_cast<BranchInst>(CurrentTerm)) { 1103 // If this isn't branching on an invariant condition, we can't unswitch it. 1104 if (!BI->isConditional()) 1105 return false; 1106 1107 Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop, 1108 Changed, MSSAU.get()) 1109 .first; 1110 1111 // Unswitch only if the trivial condition itself is an LIV (not 1112 // partial LIV which could occur in and/or) 1113 if (!LoopCond || LoopCond != BI->getCondition()) 1114 return false; 1115 1116 // Check to see if a successor of the branch is guaranteed to 1117 // exit through a unique exit block without having any 1118 // side-effects. If so, determine the value of Cond that causes 1119 // it to do this. 1120 if ((LoopExitBB = 1121 isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(0)))) { 1122 CondVal = ConstantInt::getTrue(Context); 1123 } else if ((LoopExitBB = 1124 isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(1)))) { 1125 CondVal = ConstantInt::getFalse(Context); 1126 } 1127 1128 // If we didn't find a single unique LoopExit block, or if the loop exit 1129 // block contains phi nodes, this isn't trivial. 1130 if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin())) 1131 return false; // Can't handle this. 1132 1133 if (equalityPropUnSafe(*LoopCond)) 1134 return false; 1135 1136 unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB, 1137 CurrentTerm); 1138 ++NumBranches; 1139 return true; 1140 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurrentTerm)) { 1141 // If this isn't switching on an invariant condition, we can't unswitch it. 1142 Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop, 1143 Changed, MSSAU.get()) 1144 .first; 1145 1146 // Unswitch only if the trivial condition itself is an LIV (not 1147 // partial LIV which could occur in and/or) 1148 if (!LoopCond || LoopCond != SI->getCondition()) 1149 return false; 1150 1151 // Check to see if a successor of the switch is guaranteed to go to the 1152 // latch block or exit through a one exit block without having any 1153 // side-effects. If so, determine the value of Cond that causes it to do 1154 // this. 1155 // Note that we can't trivially unswitch on the default case or 1156 // on already unswitched cases. 1157 for (auto Case : SI->cases()) { 1158 BasicBlock *LoopExitCandidate; 1159 if ((LoopExitCandidate = 1160 isTrivialLoopExitBlock(CurrentLoop, Case.getCaseSuccessor()))) { 1161 // Okay, we found a trivial case, remember the value that is trivial. 1162 ConstantInt *CaseVal = Case.getCaseValue(); 1163 1164 // Check that it was not unswitched before, since already unswitched 1165 // trivial vals are looks trivial too. 1166 if (BranchesInfo.isUnswitched(SI, CaseVal)) 1167 continue; 1168 LoopExitBB = LoopExitCandidate; 1169 CondVal = CaseVal; 1170 break; 1171 } 1172 } 1173 1174 // If we didn't find a single unique LoopExit block, or if the loop exit 1175 // block contains phi nodes, this isn't trivial. 1176 if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin())) 1177 return false; // Can't handle this. 1178 1179 unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB, 1180 nullptr); 1181 1182 // We are only unswitching full LIV. 1183 BranchesInfo.setUnswitched(SI, CondVal); 1184 ++NumSwitches; 1185 return true; 1186 } 1187 return false; 1188} 1189 1190/// Split all of the edges from inside the loop to their exit blocks. 1191/// Update the appropriate Phi nodes as we do so. 1192void LoopUnswitch::splitExitEdges( 1193 Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks) { 1194 1195 for (unsigned I = 0, E = ExitBlocks.size(); I != E; ++I) { 1196 BasicBlock *ExitBlock = ExitBlocks[I]; 1197 SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock), 1198 pred_end(ExitBlock)); 1199 1200 // Although SplitBlockPredecessors doesn't preserve loop-simplify in 1201 // general, if we call it on all predecessors of all exits then it does. 1202 SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI, MSSAU.get(), 1203 /*PreserveLCSSA*/ true); 1204 } 1205} 1206 1207/// We determined that the loop is profitable to unswitch when LIC equal Val. 1208/// Split it into loop versions and test the condition outside of either loop. 1209/// Return the loops created as Out1/Out2. 1210void LoopUnswitch::unswitchNontrivialCondition(Value *LIC, Constant *Val, 1211 Loop *L, Instruction *TI) { 1212 Function *F = LoopHeader->getParent(); 1213 LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %" 1214 << LoopHeader->getName() << " [" << L->getBlocks().size() 1215 << " blocks] in Function " << F->getName() << " when '" 1216 << *Val << "' == " << *LIC << "\n"); 1217 1218 // We are going to make essential changes to CFG. This may invalidate cached 1219 // information for L or one of its parent loops in SCEV. 1220 if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>()) 1221 SEWP->getSE().forgetTopmostLoop(L); 1222 1223 LoopBlocks.clear(); 1224 NewBlocks.clear(); 1225 1226 if (MSSAU && VerifyMemorySSA) 1227 MSSA->verifyMemorySSA(); 1228 1229 // First step, split the preheader and exit blocks, and add these blocks to 1230 // the LoopBlocks list. 1231 BasicBlock *NewPreheader = 1232 SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get()); 1233 LoopBlocks.push_back(NewPreheader); 1234 1235 // We want the loop to come after the preheader, but before the exit blocks. 1236 LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end()); 1237 1238 SmallVector<BasicBlock*, 8> ExitBlocks; 1239 L->getUniqueExitBlocks(ExitBlocks); 1240 1241 // Split all of the edges from inside the loop to their exit blocks. Update 1242 // the appropriate Phi nodes as we do so. 1243 splitExitEdges(L, ExitBlocks); 1244 1245 // The exit blocks may have been changed due to edge splitting, recompute. 1246 ExitBlocks.clear(); 1247 L->getUniqueExitBlocks(ExitBlocks); 1248 1249 // Add exit blocks to the loop blocks. 1250 LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end()); 1251 1252 // Next step, clone all of the basic blocks that make up the loop (including 1253 // the loop preheader and exit blocks), keeping track of the mapping between 1254 // the instructions and blocks. 1255 NewBlocks.reserve(LoopBlocks.size()); 1256 ValueToValueMapTy VMap; 1257 for (unsigned I = 0, E = LoopBlocks.size(); I != E; ++I) { 1258 BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[I], VMap, ".us", F); 1259 1260 NewBlocks.push_back(NewBB); 1261 VMap[LoopBlocks[I]] = NewBB; // Keep the BB mapping. 1262 } 1263 1264 // Splice the newly inserted blocks into the function right before the 1265 // original preheader. 1266 F->getBasicBlockList().splice(NewPreheader->getIterator(), 1267 F->getBasicBlockList(), 1268 NewBlocks[0]->getIterator(), F->end()); 1269 1270 // Now we create the new Loop object for the versioned loop. 1271 Loop *NewLoop = cloneLoop(L, L->getParentLoop(), VMap, LI, LPM); 1272 1273 // Recalculate unswitching quota, inherit simplified switches info for NewBB, 1274 // Probably clone more loop-unswitch related loop properties. 1275 BranchesInfo.cloneData(NewLoop, L, VMap); 1276 1277 Loop *ParentLoop = L->getParentLoop(); 1278 if (ParentLoop) { 1279 // Make sure to add the cloned preheader and exit blocks to the parent loop 1280 // as well. 1281 ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI); 1282 } 1283 1284 for (unsigned EBI = 0, EBE = ExitBlocks.size(); EBI != EBE; ++EBI) { 1285 BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[EBI]]); 1286 // The new exit block should be in the same loop as the old one. 1287 if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[EBI])) 1288 ExitBBLoop->addBasicBlockToLoop(NewExit, *LI); 1289 1290 assert(NewExit->getTerminator()->getNumSuccessors() == 1 && 1291 "Exit block should have been split to have one successor!"); 1292 BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); 1293 1294 // If the successor of the exit block had PHI nodes, add an entry for 1295 // NewExit. 1296 for (PHINode &PN : ExitSucc->phis()) { 1297 Value *V = PN.getIncomingValueForBlock(ExitBlocks[EBI]); 1298 ValueToValueMapTy::iterator It = VMap.find(V); 1299 if (It != VMap.end()) V = It->second; 1300 PN.addIncoming(V, NewExit); 1301 } 1302 1303 if (LandingPadInst *LPad = NewExit->getLandingPadInst()) { 1304 PHINode *PN = PHINode::Create(LPad->getType(), 0, "", 1305 &*ExitSucc->getFirstInsertionPt()); 1306 1307 for (pred_iterator I = pred_begin(ExitSucc), E = pred_end(ExitSucc); 1308 I != E; ++I) { 1309 BasicBlock *BB = *I; 1310 LandingPadInst *LPI = BB->getLandingPadInst(); 1311 LPI->replaceAllUsesWith(PN); 1312 PN->addIncoming(LPI, BB); 1313 } 1314 } 1315 } 1316 1317 // Rewrite the code to refer to itself. 1318 for (unsigned NBI = 0, NBE = NewBlocks.size(); NBI != NBE; ++NBI) { 1319 for (Instruction &I : *NewBlocks[NBI]) { 1320 RemapInstruction(&I, VMap, 1321 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1322 if (auto *II = dyn_cast<IntrinsicInst>(&I)) 1323 if (II->getIntrinsicID() == Intrinsic::assume) 1324 AC->registerAssumption(II); 1325 } 1326 } 1327 1328 // Rewrite the original preheader to select between versions of the loop. 1329 BranchInst *OldBR = cast<BranchInst>(LoopPreheader->getTerminator()); 1330 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] && 1331 "Preheader splitting did not work correctly!"); 1332 1333 if (MSSAU) { 1334 // Update MemorySSA after cloning, and before splitting to unreachables, 1335 // since that invalidates the 1:1 mapping of clones in VMap. 1336 LoopBlocksRPO LBRPO(L); 1337 LBRPO.perform(LI); 1338 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, VMap); 1339 } 1340 1341 // Emit the new branch that selects between the two versions of this loop. 1342 emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR, 1343 TI); 1344 if (MSSAU) { 1345 // Update MemoryPhis in Exit blocks. 1346 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT); 1347 if (VerifyMemorySSA) 1348 MSSA->verifyMemorySSA(); 1349 } 1350 1351 // The OldBr was replaced by a new one and removed (but not erased) by 1352 // emitPreheaderBranchOnCondition. It is no longer needed, so delete it. 1353 delete OldBR; 1354 1355 LoopProcessWorklist.push_back(NewLoop); 1356 RedoLoop = true; 1357 1358 // Keep a WeakTrackingVH holding onto LIC. If the first call to 1359 // RewriteLoopBody 1360 // deletes the instruction (for example by simplifying a PHI that feeds into 1361 // the condition that we're unswitching on), we don't rewrite the second 1362 // iteration. 1363 WeakTrackingVH LICHandle(LIC); 1364 1365 // Now we rewrite the original code to know that the condition is true and the 1366 // new code to know that the condition is false. 1367 rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false); 1368 1369 // It's possible that simplifying one loop could cause the other to be 1370 // changed to another value or a constant. If its a constant, don't simplify 1371 // it. 1372 if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && 1373 LICHandle && !isa<Constant>(LICHandle)) 1374 rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, 1375 /*IsEqual=*/true); 1376 1377 if (MSSA && VerifyMemorySSA) 1378 MSSA->verifyMemorySSA(); 1379} 1380 1381/// Remove all instances of I from the worklist vector specified. 1382static void removeFromWorklist(Instruction *I, 1383 std::vector<Instruction *> &Worklist) { 1384 1385 Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), I), 1386 Worklist.end()); 1387} 1388 1389/// When we find that I really equals V, remove I from the 1390/// program, replacing all uses with V and update the worklist. 1391static void replaceUsesOfWith(Instruction *I, Value *V, 1392 std::vector<Instruction *> &Worklist, Loop *L, 1393 LPPassManager *LPM, MemorySSAUpdater *MSSAU) { 1394 LLVM_DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n"); 1395 1396 // Add uses to the worklist, which may be dead now. 1397 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 1398 if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) 1399 Worklist.push_back(Use); 1400 1401 // Add users to the worklist which may be simplified now. 1402 for (User *U : I->users()) 1403 Worklist.push_back(cast<Instruction>(U)); 1404 removeFromWorklist(I, Worklist); 1405 I->replaceAllUsesWith(V); 1406 if (!I->mayHaveSideEffects()) { 1407 if (MSSAU) 1408 MSSAU->removeMemoryAccess(I); 1409 I->eraseFromParent(); 1410 } 1411 ++NumSimplify; 1412} 1413 1414/// We know either that the value LIC has the value specified by Val in the 1415/// specified loop, or we know it does NOT have that value. 1416/// Rewrite any uses of LIC or of properties correlated to it. 1417void LoopUnswitch::rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, 1418 Constant *Val, 1419 bool IsEqual) { 1420 assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?"); 1421 1422 // FIXME: Support correlated properties, like: 1423 // for (...) 1424 // if (li1 < li2) 1425 // ... 1426 // if (li1 > li2) 1427 // ... 1428 1429 // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches, 1430 // selects, switches. 1431 std::vector<Instruction*> Worklist; 1432 LLVMContext &Context = Val->getContext(); 1433 1434 // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC 1435 // in the loop with the appropriate one directly. 1436 if (IsEqual || (isa<ConstantInt>(Val) && 1437 Val->getType()->isIntegerTy(1))) { 1438 Value *Replacement; 1439 if (IsEqual) 1440 Replacement = Val; 1441 else 1442 Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), 1443 !cast<ConstantInt>(Val)->getZExtValue()); 1444 1445 for (User *U : LIC->users()) { 1446 Instruction *UI = dyn_cast<Instruction>(U); 1447 if (!UI || !L->contains(UI)) 1448 continue; 1449 Worklist.push_back(UI); 1450 } 1451 1452 for (Instruction *UI : Worklist) 1453 UI->replaceUsesOfWith(LIC, Replacement); 1454 1455 simplifyCode(Worklist, L); 1456 return; 1457 } 1458 1459 // Otherwise, we don't know the precise value of LIC, but we do know that it 1460 // is certainly NOT "Val". As such, simplify any uses in the loop that we 1461 // can. This case occurs when we unswitch switch statements. 1462 for (User *U : LIC->users()) { 1463 Instruction *UI = dyn_cast<Instruction>(U); 1464 if (!UI || !L->contains(UI)) 1465 continue; 1466 1467 // At this point, we know LIC is definitely not Val. Try to use some simple 1468 // logic to simplify the user w.r.t. to the context. 1469 if (Value *Replacement = simplifyInstructionWithNotEqual(UI, LIC, Val)) { 1470 if (LI->replacementPreservesLCSSAForm(UI, Replacement)) { 1471 // This in-loop instruction has been simplified w.r.t. its context, 1472 // i.e. LIC != Val, make sure we propagate its replacement value to 1473 // all its users. 1474 // 1475 // We can not yet delete UI, the LIC user, yet, because that would invalidate 1476 // the LIC->users() iterator !. However, we can make this instruction 1477 // dead by replacing all its users and push it onto the worklist so that 1478 // it can be properly deleted and its operands simplified. 1479 UI->replaceAllUsesWith(Replacement); 1480 } 1481 } 1482 1483 // This is a LIC user, push it into the worklist so that simplifyCode can 1484 // attempt to simplify it. 1485 Worklist.push_back(UI); 1486 1487 // If we know that LIC is not Val, use this info to simplify code. 1488 SwitchInst *SI = dyn_cast<SwitchInst>(UI); 1489 if (!SI || !isa<ConstantInt>(Val)) continue; 1490 1491 // NOTE: if a case value for the switch is unswitched out, we record it 1492 // after the unswitch finishes. We can not record it here as the switch 1493 // is not a direct user of the partial LIV. 1494 SwitchInst::CaseHandle DeadCase = 1495 *SI->findCaseValue(cast<ConstantInt>(Val)); 1496 // Default case is live for multiple values. 1497 if (DeadCase == *SI->case_default()) 1498 continue; 1499 1500 // Found a dead case value. Don't remove PHI nodes in the 1501 // successor if they become single-entry, those PHI nodes may 1502 // be in the Users list. 1503 1504 BasicBlock *Switch = SI->getParent(); 1505 BasicBlock *SISucc = DeadCase.getCaseSuccessor(); 1506 BasicBlock *Latch = L->getLoopLatch(); 1507 1508 if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. 1509 // If the DeadCase successor dominates the loop latch, then the 1510 // transformation isn't safe since it will delete the sole predecessor edge 1511 // to the latch. 1512 if (Latch && DT->dominates(SISucc, Latch)) 1513 continue; 1514 1515 // FIXME: This is a hack. We need to keep the successor around 1516 // and hooked up so as to preserve the loop structure, because 1517 // trying to update it is complicated. So instead we preserve the 1518 // loop structure and put the block on a dead code path. 1519 SplitEdge(Switch, SISucc, DT, LI, MSSAU.get()); 1520 // Compute the successors instead of relying on the return value 1521 // of SplitEdge, since it may have split the switch successor 1522 // after PHI nodes. 1523 BasicBlock *NewSISucc = DeadCase.getCaseSuccessor(); 1524 BasicBlock *OldSISucc = *succ_begin(NewSISucc); 1525 // Create an "unreachable" destination. 1526 BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", 1527 Switch->getParent(), 1528 OldSISucc); 1529 new UnreachableInst(Context, Abort); 1530 // Force the new case destination to branch to the "unreachable" 1531 // block while maintaining a (dead) CFG edge to the old block. 1532 NewSISucc->getTerminator()->eraseFromParent(); 1533 BranchInst::Create(Abort, OldSISucc, 1534 ConstantInt::getTrue(Context), NewSISucc); 1535 // Release the PHI operands for this edge. 1536 for (PHINode &PN : NewSISucc->phis()) 1537 PN.setIncomingValueForBlock(Switch, UndefValue::get(PN.getType())); 1538 // Tell the domtree about the new block. We don't fully update the 1539 // domtree here -- instead we force it to do a full recomputation 1540 // after the pass is complete -- but we do need to inform it of 1541 // new blocks. 1542 DT->addNewBlock(Abort, NewSISucc); 1543 } 1544 1545 simplifyCode(Worklist, L); 1546} 1547 1548/// Now that we have simplified some instructions in the loop, walk over it and 1549/// constant prop, dce, and fold control flow where possible. Note that this is 1550/// effectively a very simple loop-structure-aware optimizer. During processing 1551/// of this loop, L could very well be deleted, so it must not be used. 1552/// 1553/// FIXME: When the loop optimizer is more mature, separate this out to a new 1554/// pass. 1555/// 1556void LoopUnswitch::simplifyCode(std::vector<Instruction *> &Worklist, Loop *L) { 1557 const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 1558 while (!Worklist.empty()) { 1559 Instruction *I = Worklist.back(); 1560 Worklist.pop_back(); 1561 1562 // Simple DCE. 1563 if (isInstructionTriviallyDead(I)) { 1564 LLVM_DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n"); 1565 1566 // Add uses to the worklist, which may be dead now. 1567 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) 1568 if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) 1569 Worklist.push_back(Use); 1570 removeFromWorklist(I, Worklist); 1571 if (MSSAU) 1572 MSSAU->removeMemoryAccess(I); 1573 I->eraseFromParent(); 1574 ++NumSimplify; 1575 continue; 1576 } 1577 1578 // See if instruction simplification can hack this up. This is common for 1579 // things like "select false, X, Y" after unswitching made the condition be 1580 // 'false'. TODO: update the domtree properly so we can pass it here. 1581 if (Value *V = SimplifyInstruction(I, DL)) 1582 if (LI->replacementPreservesLCSSAForm(I, V)) { 1583 replaceUsesOfWith(I, V, Worklist, L, LPM, MSSAU.get()); 1584 continue; 1585 } 1586 1587 // Special case hacks that appear commonly in unswitched code. 1588 if (BranchInst *BI = dyn_cast<BranchInst>(I)) { 1589 if (BI->isUnconditional()) { 1590 // If BI's parent is the only pred of the successor, fold the two blocks 1591 // together. 1592 BasicBlock *Pred = BI->getParent(); 1593 (void)Pred; 1594 BasicBlock *Succ = BI->getSuccessor(0); 1595 BasicBlock *SinglePred = Succ->getSinglePredecessor(); 1596 if (!SinglePred) continue; // Nothing to do. 1597 assert(SinglePred == Pred && "CFG broken"); 1598 1599 // Make the LPM and Worklist updates specific to LoopUnswitch. 1600 removeFromWorklist(BI, Worklist); 1601 auto SuccIt = Succ->begin(); 1602 while (PHINode *PN = dyn_cast<PHINode>(SuccIt++)) { 1603 for (unsigned It = 0, E = PN->getNumOperands(); It != E; ++It) 1604 if (Instruction *Use = dyn_cast<Instruction>(PN->getOperand(It))) 1605 Worklist.push_back(Use); 1606 for (User *U : PN->users()) 1607 Worklist.push_back(cast<Instruction>(U)); 1608 removeFromWorklist(PN, Worklist); 1609 ++NumSimplify; 1610 } 1611 // Merge the block and make the remaining analyses updates. 1612 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 1613 MergeBlockIntoPredecessor(Succ, &DTU, LI, MSSAU.get()); 1614 ++NumSimplify; 1615 continue; 1616 } 1617 1618 continue; 1619 } 1620 } 1621} 1622 1623/// Simple simplifications we can do given the information that Cond is 1624/// definitely not equal to Val. 1625Value *LoopUnswitch::simplifyInstructionWithNotEqual(Instruction *Inst, 1626 Value *Invariant, 1627 Constant *Val) { 1628 // icmp eq cond, val -> false 1629 ICmpInst *CI = dyn_cast<ICmpInst>(Inst); 1630 if (CI && CI->isEquality()) { 1631 Value *Op0 = CI->getOperand(0); 1632 Value *Op1 = CI->getOperand(1); 1633 if ((Op0 == Invariant && Op1 == Val) || (Op0 == Val && Op1 == Invariant)) { 1634 LLVMContext &Ctx = Inst->getContext(); 1635 if (CI->getPredicate() == CmpInst::ICMP_EQ) 1636 return ConstantInt::getFalse(Ctx); 1637 else 1638 return ConstantInt::getTrue(Ctx); 1639 } 1640 } 1641 1642 // FIXME: there may be other opportunities, e.g. comparison with floating 1643 // point, or Invariant - Val != 0, etc. 1644 return nullptr; 1645} 1646