LoopVersioningLICM.cpp revision 360784
1148330Snetchild//===- LoopVersioningLICM.cpp - LICM Loop Versioning ----------------------===// 2148330Snetchild// 3148330Snetchild// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4148330Snetchild// See https://llvm.org/LICENSE.txt for license information. 5148330Snetchild// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6148330Snetchild// 7148330Snetchild//===----------------------------------------------------------------------===// 8148330Snetchild// 9148330Snetchild// When alias analysis is uncertain about the aliasing between any two accesses, 10148330Snetchild// it will return MayAlias. This uncertainty from alias analysis restricts LICM 11148330Snetchild// from proceeding further. In cases where alias analysis is uncertain we might 12148330Snetchild// use loop versioning as an alternative. 13148330Snetchild// 14148543Snetchild// Loop Versioning will create a version of the loop with aggressive aliasing 15148543Snetchild// assumptions in addition to the original with conservative (default) aliasing 16148330Snetchild// assumptions. The version of the loop making aggressive aliasing assumptions 17189585Sthompsa// will have all the memory accesses marked as no-alias. These two versions of 18189585Sthompsa// loop will be preceded by a memory runtime check. This runtime check consists 19189607Sthompsa// of bound checks for all unique memory accessed in loop, and it ensures the 20189607Sthompsa// lack of memory aliasing. The result of the runtime check determines which of 21189607Sthompsa// the loop versions is executed: If the runtime check detects any memory 22189585Sthompsa// aliasing, then the original loop is executed. Otherwise, the version with 23189092Sed// aggressive aliasing assumptions is used. 24189092Sed// 25188948Sthompsa// Following are the top level steps: 26189000Sthompsa// 27189000Sthompsa// a) Perform LoopVersioningLICM's feasibility check. 28189000Sthompsa// b) If loop is a candidate for versioning then create a memory bound check, 29189000Sthompsa// by considering all the memory accesses in loop body. 30189000Sthompsa// c) Clone original loop and set all memory accesses as no-alias in new loop. 31189000Sthompsa// d) Set original loop & versioned loop as a branch target of the runtime check 32189000Sthompsa// result. 33189000Sthompsa// 34189000Sthompsa// It transforms loop as shown below: 35189000Sthompsa// 36189000Sthompsa// +----------------+ 37189000Sthompsa// |Runtime Memcheck| 38189000Sthompsa// +----------------+ 39189000Sthompsa// | 40188948Sthompsa// +----------+----------------+----------+ 41188948Sthompsa// | | 42188948Sthompsa// +---------+----------+ +-----------+----------+ 43188948Sthompsa// |Orig Loop Preheader | |Cloned Loop Preheader | 44188948Sthompsa// +--------------------+ +----------------------+ 45188948Sthompsa// | | 46188948Sthompsa// +--------------------+ +----------------------+ 47188948Sthompsa// |Orig Loop Body | |Cloned Loop Body | 48188948Sthompsa// +--------------------+ +----------------------+ 49188948Sthompsa// | | 50188948Sthompsa// +--------------------+ +----------------------+ 51188948Sthompsa// |Orig Loop Exit Block| |Cloned Loop Exit Block| 52188948Sthompsa// +--------------------+ +-----------+----------+ 53188948Sthompsa// | | 54188948Sthompsa// +----------+--------------+-----------+ 55188948Sthompsa// | 56188948Sthompsa// +-----+----+ 57188948Sthompsa// |Join Block| 58188948Sthompsa// +----------+ 59188948Sthompsa// 60188948Sthompsa//===----------------------------------------------------------------------===// 61188948Sthompsa 62188948Sthompsa#include "llvm/ADT/SmallVector.h" 63188948Sthompsa#include "llvm/ADT/StringRef.h" 64188948Sthompsa#include "llvm/Analysis/AliasAnalysis.h" 65188948Sthompsa#include "llvm/Analysis/AliasSetTracker.h" 66188948Sthompsa#include "llvm/Analysis/GlobalsModRef.h" 67188948Sthompsa#include "llvm/Analysis/LoopAccessAnalysis.h" 68188948Sthompsa#include "llvm/Analysis/LoopInfo.h" 69188948Sthompsa#include "llvm/Analysis/LoopPass.h" 70188948Sthompsa#include "llvm/Analysis/OptimizationRemarkEmitter.h" 71188948Sthompsa#include "llvm/Analysis/ScalarEvolution.h" 72188948Sthompsa#include "llvm/IR/CallSite.h" 73188948Sthompsa#include "llvm/IR/Constants.h" 74188948Sthompsa#include "llvm/IR/Dominators.h" 75188948Sthompsa#include "llvm/IR/Instruction.h" 76188948Sthompsa#include "llvm/IR/Instructions.h" 77188948Sthompsa#include "llvm/IR/LLVMContext.h" 78188948Sthompsa#include "llvm/IR/MDBuilder.h" 79188948Sthompsa#include "llvm/IR/Metadata.h" 80188948Sthompsa#include "llvm/IR/Type.h" 81188948Sthompsa#include "llvm/IR/Value.h" 82188652Sed#include "llvm/InitializePasses.h" 83188652Sed#include "llvm/Pass.h" 84188652Sed#include "llvm/Support/Casting.h" 85188652Sed#include "llvm/Support/CommandLine.h" 86188102Sgabor#include "llvm/Support/Debug.h" 87188102Sgabor#include "llvm/Support/raw_ostream.h" 88187694Santoine#include "llvm/Transforms/Scalar.h" 89187694Santoine#include "llvm/Transforms/Utils.h" 90187694Santoine#include "llvm/Transforms/Utils/LoopUtils.h" 91187694Santoine#include "llvm/Transforms/Utils/LoopVersioning.h" 92187694Santoine#include <cassert> 93186716Santoine#include <memory> 94186716Santoine 95186437Sbzusing namespace llvm; 96186437Sbz 97185472Santoine#define DEBUG_TYPE "loop-versioning-licm" 98185472Santoine 99185472Santoinestatic const char *LICMVersioningMetaData = "llvm.loop.licm_versioning.disable"; 100185472Santoine 101185472Santoine/// Threshold minimum allowed percentage for possible 102183442Sed/// invariant instructions in a loop. 103183442Sedstatic cl::opt<float> 104183442Sed LVInvarThreshold("licm-versioning-invariant-threshold", 105183442Sed cl::desc("LoopVersioningLICM's minimum allowed percentage" 106183442Sed "of possible invariant instructions per loop"), 107183442Sed cl::init(25), cl::Hidden); 108183113Sattilio 109183235Santoine/// Threshold for maximum allowed loop nest/depth 110183235Santoinestatic cl::opt<unsigned> LVLoopDepthThreshold( 111183026Santoine "licm-versioning-max-depth-threshold", 112183026Santoine cl::desc( 113182105Sed "LoopVersioningLICM's threshold for maximum allowed loop nest/depth"), 114182105Sed cl::init(2), cl::Hidden); 115182518Santoine 116182518Santoinenamespace { 117182518Santoine 118182518Santoinestruct LoopVersioningLICM : public LoopPass { 119182518Santoine static char ID; 120182518Santoine 121182518Santoine LoopVersioningLICM() 122182518Santoine : LoopPass(ID), LoopDepthThreshold(LVLoopDepthThreshold), 123182518Santoine InvariantThreshold(LVInvarThreshold) { 124182518Santoine initializeLoopVersioningLICMPass(*PassRegistry::getPassRegistry()); 125182518Santoine } 126182518Santoine 127182518Santoine bool runOnLoop(Loop *L, LPPassManager &LPM) override; 128182518Santoine 129182518Santoine void getAnalysisUsage(AnalysisUsage &AU) const override { 130182518Santoine AU.setPreservesCFG(); 131182518Santoine AU.addRequired<AAResultsWrapperPass>(); 132182518Santoine AU.addRequired<DominatorTreeWrapperPass>(); 133181905Sed AU.addRequiredID(LCSSAID); 134181905Sed AU.addRequired<LoopAccessLegacyAnalysis>(); 135181905Sed AU.addRequired<LoopInfoWrapperPass>(); 136180800Sed AU.addRequiredID(LoopSimplifyID); 137180800Sed AU.addRequired<ScalarEvolutionWrapperPass>(); 138180614Smarcel AU.addPreserved<AAResultsWrapperPass>(); 139180614Smarcel AU.addPreserved<GlobalsAAWrapperPass>(); 140180614Smarcel AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 141180614Smarcel } 142180614Smarcel 143180614Smarcel StringRef getPassName() const override { return "Loop Versioning for LICM"; } 144180331Smarcel 145180331Smarcel void reset() { 146180331Smarcel AA = nullptr; 147180331Smarcel SE = nullptr; 148180331Smarcel LAA = nullptr; 149180267Sjhb CurLoop = nullptr; 150180267Sjhb LoadAndStoreCounter = 0; 151180267Sjhb InvariantCounter = 0; 152180265Sjhb IsReadOnlyLoop = true; 153180265Sjhb ORE = nullptr; 154180265Sjhb CurAST.reset(); 155180259Sjhb } 156180259Sjhb 157180259Sjhb class AutoResetter { 158180259Sjhb public: 159180259Sjhb AutoResetter(LoopVersioningLICM &LVLICM) : LVLICM(LVLICM) {} 160180257Sjhb ~AutoResetter() { LVLICM.reset(); } 161180257Sjhb 162180257Sjhb private: 163180259Sjhb LoopVersioningLICM &LVLICM; 164180257Sjhb }; 165180257Sjhb 166180248Smarcelprivate: 167180248Smarcel // Current AliasAnalysis information 168180248Smarcel AliasAnalysis *AA = nullptr; 169180248Smarcel 170180248Smarcel // Current ScalarEvolution 171180230Smarcel ScalarEvolution *SE = nullptr; 172180230Smarcel 173180230Smarcel // Current LoopAccessAnalysis 174180230Smarcel LoopAccessLegacyAnalysis *LAA = nullptr; 175180230Smarcel 176180230Smarcel // Current Loop's LoopAccessInfo 177180230Smarcel const LoopAccessInfo *LAI = nullptr; 178180230Smarcel 179180159Sdanger // The current loop we are working on. 180180159Sdanger Loop *CurLoop = nullptr; 181180159Sdanger 182180496Santoine // AliasSet information for the current loop. 183180496Santoine std::unique_ptr<AliasSetTracker> CurAST; 184180496Santoine 185180496Santoine // Maximum loop nest threshold 186179784Sed unsigned LoopDepthThreshold; 187179784Sed 188179784Sed // Minimum invariant threshold 189179784Sed float InvariantThreshold; 190179784Sed 191179692Smarcel // Counter to track num of load & store 192179692Smarcel unsigned LoadAndStoreCounter = 0; 193179692Smarcel 194179315Sbz // Counter to track num of invariant 195179315Sbz unsigned InvariantCounter = 0; 196179315Sbz 197179315Sbz // Read only loop marker. 198179315Sbz bool IsReadOnlyLoop = true; 199179315Sbz 200179315Sbz // OptimizationRemarkEmitter 201179315Sbz OptimizationRemarkEmitter *ORE; 202179315Sbz 203179315Sbz bool isLegalForVersioning(); 204179315Sbz bool legalLoopStructure(); 205179315Sbz bool legalLoopInstructions(); 206179315Sbz bool legalLoopMemoryAccesses(); 207179315Sbz bool isLoopAlreadyVisited(); 208179315Sbz void setNoAliasToLoop(Loop *VerLoop); 209179315Sbz bool instructionSafeForVersioning(Instruction *I); 210179315Sbz}; 211179315Sbz 212179315Sbz} // end anonymous namespace 213179315Sbz 214179315Sbz/// Check loop structure and confirms it's good for LoopVersioningLICM. 215179315Sbzbool LoopVersioningLICM::legalLoopStructure() { 216179315Sbz // Loop must be in loop simplify form. 217179315Sbz if (!CurLoop->isLoopSimplifyForm()) { 218179315Sbz LLVM_DEBUG(dbgs() << " loop is not in loop-simplify form.\n"); 219179315Sbz return false; 220179315Sbz } 221179315Sbz // Loop should be innermost loop, if not return false. 222179315Sbz if (!CurLoop->getSubLoops().empty()) { 223179315Sbz LLVM_DEBUG(dbgs() << " loop is not innermost\n"); 224179315Sbz return false; 225179315Sbz } 226179315Sbz // Loop should have a single backedge, if not return false. 227179315Sbz if (CurLoop->getNumBackEdges() != 1) { 228179315Sbz LLVM_DEBUG(dbgs() << " loop has multiple backedges\n"); 229179315Sbz return false; 230179315Sbz } 231179315Sbz // Loop must have a single exiting block, if not return false. 232179315Sbz if (!CurLoop->getExitingBlock()) { 233179315Sbz LLVM_DEBUG(dbgs() << " loop has multiple exiting block\n"); 234179315Sbz return false; 235179315Sbz } 236179315Sbz // We only handle bottom-tested loop, i.e. loop in which the condition is 237179315Sbz // checked at the end of each iteration. With that we can assume that all 238179315Sbz // instructions in the loop are executed the same number of times. 239179315Sbz if (CurLoop->getExitingBlock() != CurLoop->getLoopLatch()) { 240179315Sbz LLVM_DEBUG(dbgs() << " loop is not bottom tested\n"); 241179315Sbz return false; 242179315Sbz } 243179315Sbz // Parallel loops must not have aliasing loop-invariant memory accesses. 244179315Sbz // Hence we don't need to version anything in this case. 245179315Sbz if (CurLoop->isAnnotatedParallel()) { 246179315Sbz LLVM_DEBUG(dbgs() << " Parallel loop is not worth versioning\n"); 247179315Sbz return false; 248179315Sbz } 249179315Sbz // Loop depth more then LoopDepthThreshold are not allowed 250179315Sbz if (CurLoop->getLoopDepth() > LoopDepthThreshold) { 251179315Sbz LLVM_DEBUG(dbgs() << " loop depth is more then threshold\n"); 252179315Sbz return false; 253179315Sbz } 254179315Sbz // We need to be able to compute the loop trip count in order 255179315Sbz // to generate the bound checks. 256179315Sbz const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop); 257179315Sbz if (ExitCount == SE->getCouldNotCompute()) { 258179315Sbz LLVM_DEBUG(dbgs() << " loop does not has trip count\n"); 259179315Sbz return false; 260179315Sbz } 261179315Sbz return true; 262179315Sbz} 263179315Sbz 264179315Sbz/// Check memory accesses in loop and confirms it's good for 265179315Sbz/// LoopVersioningLICM. 266179315Sbzbool LoopVersioningLICM::legalLoopMemoryAccesses() { 267179315Sbz bool HasMayAlias = false; 268179315Sbz bool TypeSafety = false; 269179315Sbz bool HasMod = false; 270179315Sbz // Memory check: 271179315Sbz // Transform phase will generate a versioned loop and also a runtime check to 272179315Sbz // ensure the pointers are independent and they don���t alias. 273179315Sbz // In version variant of loop, alias meta data asserts that all access are 274179315Sbz // mutually independent. 275179315Sbz // 276179315Sbz // Pointers aliasing in alias domain are avoided because with multiple 277179315Sbz // aliasing domains we may not be able to hoist potential loop invariant 278179315Sbz // access out of the loop. 279179315Sbz // 280179315Sbz // Iterate over alias tracker sets, and confirm AliasSets doesn't have any 281179315Sbz // must alias set. 282179315Sbz for (const auto &I : *CurAST) { 283179315Sbz const AliasSet &AS = I; 284179315Sbz // Skip Forward Alias Sets, as this should be ignored as part of 285179315Sbz // the AliasSetTracker object. 286179315Sbz if (AS.isForwardingAliasSet()) 287179315Sbz continue; 288179315Sbz // With MustAlias its not worth adding runtime bound check. 289179315Sbz if (AS.isMustAlias()) 290179315Sbz return false; 291179315Sbz Value *SomePtr = AS.begin()->getValue(); 292179315Sbz bool TypeCheck = true; 293179315Sbz // Check for Mod & MayAlias 294179315Sbz HasMayAlias |= AS.isMayAlias(); 295179315Sbz HasMod |= AS.isMod(); 296179315Sbz for (const auto &A : AS) { 297179315Sbz Value *Ptr = A.getValue(); 298179315Sbz // Alias tracker should have pointers of same data type. 299179315Sbz TypeCheck = (TypeCheck && (SomePtr->getType() == Ptr->getType())); 300179315Sbz } 301179315Sbz // At least one alias tracker should have pointers of same data type. 302179315Sbz TypeSafety |= TypeCheck; 303179315Sbz } 304179315Sbz // Ensure types should be of same type. 305179315Sbz if (!TypeSafety) { 306179315Sbz LLVM_DEBUG(dbgs() << " Alias tracker type safety failed!\n"); 307179315Sbz return false; 308179361Santoine } 309179361Santoine // Ensure loop body shouldn't be read only. 310179361Santoine if (!HasMod) { 311179361Santoine LLVM_DEBUG(dbgs() << " No memory modified in loop body\n"); 312179361Santoine return false; 313179361Santoine } 314179361Santoine // Make sure alias set has may alias case. 315179361Santoine // If there no alias memory ambiguity, return false. 316179361Santoine if (!HasMayAlias) { 317179361Santoine LLVM_DEBUG(dbgs() << " No ambiguity in memory access.\n"); 318179361Santoine return false; 319179361Santoine } 320179361Santoine return true; 321179361Santoine} 322179361Santoine 323179361Santoine/// Check loop instructions safe for Loop versioning. 324179361Santoine/// It returns true if it's safe else returns false. 325179361Santoine/// Consider following: 326179361Santoine/// 1) Check all load store in loop body are non atomic & non volatile. 327179361Santoine/// 2) Check function call safety, by ensuring its not accessing memory. 328178924Santoine/// 3) Loop body shouldn't have any may throw instruction. 329178924Santoine/// 4) Loop body shouldn't have any convergent or noduplicate instructions. 330178924Santoinebool LoopVersioningLICM::instructionSafeForVersioning(Instruction *I) { 331178924Santoine assert(I != nullptr && "Null instruction found!"); 332178924Santoine // Check function call safety 333177831Sflz if (auto *Call = dyn_cast<CallBase>(I)) { 334177831Sflz if (Call->isConvergent() || Call->cannotDuplicate()) { 335177831Sflz LLVM_DEBUG(dbgs() << " Convergent call site found.\n"); 336177831Sflz return false; 337177831Sflz } 338178331Santoine 339178331Santoine if (!AA->doesNotAccessMemory(Call)) { 340178331Santoine LLVM_DEBUG(dbgs() << " Unsafe call site found.\n"); 341178331Santoine return false; 342178331Santoine } 343178331Santoine } 344178331Santoine 345178331Santoine // Avoid loops with possiblity of throw 346178331Santoine if (I->mayThrow()) { 347178331Santoine LLVM_DEBUG(dbgs() << " May throw instruction found in loop body\n"); 348178331Santoine return false; 349178331Santoine } 350178331Santoine // If current instruction is load instructions 351178331Santoine // make sure it's a simple load (non atomic & non volatile) 352178331Santoine if (I->mayReadFromMemory()) { 353178331Santoine LoadInst *Ld = dyn_cast<LoadInst>(I); 354178924Santoine if (!Ld || !Ld->isSimple()) { 355178924Santoine LLVM_DEBUG(dbgs() << " Found a non-simple load.\n"); 356178924Santoine return false; 357178924Santoine } 358176422Sthompsa LoadAndStoreCounter++; 359176422Sthompsa Value *Ptr = Ld->getPointerOperand(); 360175690Sbrueffer // Check loop invariant. 361175690Sbrueffer if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) 362175690Sbrueffer InvariantCounter++; 363175576Sattilio } 364175576Sattilio // If current instruction is store instruction 365175227Sjhb // make sure it's a simple store (non atomic & non volatile) 366175227Sjhb else if (I->mayWriteToMemory()) { 367175227Sjhb StoreInst *St = dyn_cast<StoreInst>(I); 368174426Sdougb if (!St || !St->isSimple()) { 369174426Sdougb LLVM_DEBUG(dbgs() << " Found a non-simple store.\n"); 370174426Sdougb return false; 371177153Sbrueffer } 372177153Sbrueffer LoadAndStoreCounter++; 373174092Sbrooks Value *Ptr = St->getPointerOperand(); 374174092Sbrooks // Check loop invariant. 375174092Sbrooks if (SE->isLoopInvariant(SE->getSCEV(Ptr), CurLoop)) 376174092Sbrooks InvariantCounter++; 377176956Santoine 378176956Santoine IsReadOnlyLoop = false; 379176956Santoine } 380176956Santoine return true; 381174092Sbrooks} 382174061Sjb 383174061Sjb/// Check loop instructions and confirms it's good for 384175227Sjhb/// LoopVersioningLICM. 385175227Sjhbbool LoopVersioningLICM::legalLoopInstructions() { 386173466Simp // Resetting counters. 387173466Simp LoadAndStoreCounter = 0; 388173662Smarcel InvariantCounter = 0; 389173662Smarcel IsReadOnlyLoop = true; 390173662Smarcel using namespace ore; 391173662Smarcel // Iterate over loop blocks and instructions of each block and check 392173662Smarcel // instruction safety. 393173662Smarcel for (auto *Block : CurLoop->getBlocks()) 394175227Sjhb for (auto &Inst : *Block) { 395175227Sjhb // If instruction is unsafe just return false. 396172983Smtm if (!instructionSafeForVersioning(&Inst)) { 397172983Smtm ORE->emit([&]() { 398172390Sbushman return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopInst", &Inst) 399173176Sbushman << " Unsafe Loop Instruction"; 400172390Sbushman }); 401172390Sbushman return false; 402172570Sru } 403172570Sru } 404171786Smarcel // Get LoopAccessInfo from current loop. 405171786Smarcel LAI = &LAA->getInfo(CurLoop); 406171786Smarcel // Check LoopAccessInfo for need of runtime check. 407171786Smarcel if (LAI->getRuntimePointerChecking()->getChecks().empty()) { 408171696Sbz LLVM_DEBUG(dbgs() << " LAA: Runtime check not found !!\n"); 409171696Sbz return false; 410179368Sbz } 411171461Srwatson // Number of runtime-checks should be less then RuntimeMemoryCheckThreshold 412171461Srwatson if (LAI->getNumRuntimePointerChecks() > 413171461Srwatson VectorizerParams::RuntimeMemoryCheckThreshold) { 414171461Srwatson LLVM_DEBUG( 415171461Srwatson dbgs() << " LAA: Runtime checks are more than threshold !!\n"); 416171461Srwatson ORE->emit([&]() { 417171461Srwatson return OptimizationRemarkMissed(DEBUG_TYPE, "RuntimeCheck", 418171461Srwatson CurLoop->getStartLoc(), 419171461Srwatson CurLoop->getHeader()) 420171461Srwatson << "Number of runtime checks " 421171461Srwatson << NV("RuntimeChecks", LAI->getNumRuntimePointerChecks()) 422171461Srwatson << " exceeds threshold " 423171461Srwatson << NV("Threshold", VectorizerParams::RuntimeMemoryCheckThreshold); 424171461Srwatson }); 425171461Srwatson return false; 426171461Srwatson } 427171461Srwatson // Loop should have at least one invariant load or store instruction. 428171461Srwatson if (!InvariantCounter) { 429171461Srwatson LLVM_DEBUG(dbgs() << " Invariant not found !!\n"); 430171461Srwatson return false; 431171461Srwatson } 432171461Srwatson // Read only loop not allowed. 433171461Srwatson if (IsReadOnlyLoop) { 434171461Srwatson LLVM_DEBUG(dbgs() << " Found a read-only loop!\n"); 435171461Srwatson return false; 436171461Srwatson } 437171461Srwatson // Profitablity check: 438171461Srwatson // Check invariant threshold, should be in limit. 439171461Srwatson if (InvariantCounter * 100 < InvariantThreshold * LoadAndStoreCounter) { 440171461Srwatson LLVM_DEBUG( 441171461Srwatson dbgs() 442171461Srwatson << " Invariant load & store are less then defined threshold\n"); 443171461Srwatson LLVM_DEBUG(dbgs() << " Invariant loads & stores: " 444171461Srwatson << ((InvariantCounter * 100) / LoadAndStoreCounter) 445171461Srwatson << "%\n"); 446171461Srwatson LLVM_DEBUG(dbgs() << " Invariant loads & store threshold: " 447171461Srwatson << InvariantThreshold << "%\n"); 448171461Srwatson ORE->emit([&]() { 449171461Srwatson return OptimizationRemarkMissed(DEBUG_TYPE, "InvariantThreshold", 450171461Srwatson CurLoop->getStartLoc(), 451171461Srwatson CurLoop->getHeader()) 452171461Srwatson << "Invariant load & store " 453171461Srwatson << NV("LoadAndStoreCounter", 454171461Srwatson ((InvariantCounter * 100) / LoadAndStoreCounter)) 455171461Srwatson << " are less then defined threshold " 456171461Srwatson << NV("Threshold", InvariantThreshold); 457171461Srwatson }); 458171461Srwatson return false; 459171461Srwatson } 460171461Srwatson return true; 461171461Srwatson} 462171461Srwatson 463171461Srwatson/// It checks loop is already visited or not. 464171461Srwatson/// check loop meta data, if loop revisited return true 465171461Srwatson/// else false. 466171461Srwatsonbool LoopVersioningLICM::isLoopAlreadyVisited() { 467171461Srwatson // Check LoopVersioningLICM metadata into loop 468171461Srwatson if (findStringMetadataForLoop(CurLoop, LICMVersioningMetaData)) { 469171461Srwatson return true; 470171461Srwatson } 471171461Srwatson return false; 472171461Srwatson} 473171461Srwatson 474171461Srwatson/// Checks legality for LoopVersioningLICM by considering following: 475171461Srwatson/// a) loop structure legality b) loop instruction legality 476171461Srwatson/// c) loop memory access legality. 477171461Srwatson/// Return true if legal else returns false. 478171461Srwatsonbool LoopVersioningLICM::isLegalForVersioning() { 479171461Srwatson using namespace ore; 480171461Srwatson LLVM_DEBUG(dbgs() << "Loop: " << *CurLoop); 481171461Srwatson // Make sure not re-visiting same loop again. 482171461Srwatson if (isLoopAlreadyVisited()) { 483176956Santoine LLVM_DEBUG( 484176956Santoine dbgs() << " Revisiting loop in LoopVersioningLICM not allowed.\n\n"); 485176956Santoine return false; 486176956Santoine } 487176956Santoine // Check loop structure leagality. 488176956Santoine if (!legalLoopStructure()) { 489171274Sbz LLVM_DEBUG( 490171274Sbz dbgs() << " Loop structure not suitable for LoopVersioningLICM\n\n"); 491171274Sbz ORE->emit([&]() { 492171274Sbz return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopStruct", 493171274Sbz CurLoop->getStartLoc(), 494171274Sbz CurLoop->getHeader()) 495171274Sbz << " Unsafe Loop structure"; 496171274Sbz }); 497171274Sbz return false; 498179368Sbz } 499171205Sbz // Check loop instruction leagality. 500171205Sbz if (!legalLoopInstructions()) { 501171205Sbz LLVM_DEBUG( 502171205Sbz dbgs() 503171205Sbz << " Loop instructions not suitable for LoopVersioningLICM\n\n"); 504171175Smlaier return false; 505171175Smlaier } 506171137Sbz // Check loop memory access leagality. 507171137Sbz if (!legalLoopMemoryAccesses()) { 508171137Sbz LLVM_DEBUG( 509171137Sbz dbgs() 510171137Sbz << " Loop memory access not suitable for LoopVersioningLICM\n\n"); 511171137Sbz ORE->emit([&]() { 512171137Sbz return OptimizationRemarkMissed(DEBUG_TYPE, "IllegalLoopMemoryAccess", 513171137Sbz CurLoop->getStartLoc(), 514171137Sbz CurLoop->getHeader()) 515171137Sbz << " Unsafe Loop memory access"; 516171137Sbz }); 517171137Sbz return false; 518171137Sbz } 519171137Sbz // Loop versioning is feasible, return true. 520171137Sbz LLVM_DEBUG(dbgs() << " Loop Versioning found to be beneficial\n\n"); 521171137Sbz ORE->emit([&]() { 522171137Sbz return OptimizationRemark(DEBUG_TYPE, "IsLegalForVersioning", 523171137Sbz CurLoop->getStartLoc(), CurLoop->getHeader()) 524171137Sbz << " Versioned loop for LICM." 525171131Sthompsa << " Number of runtime checks we had to insert " 526171131Sthompsa << NV("RuntimeChecks", LAI->getNumRuntimePointerChecks()); 527171143Sthompsa }); 528171023Srafan return true; 529171023Srafan} 530171023Srafan 531171023Srafan/// Update loop with aggressive aliasing assumptions. 532171023Srafan/// It marks no-alias to any pairs of memory operations by assuming 533171023Srafan/// loop should not have any must-alias memory accesses pairs. 534171388Sdougb/// During LoopVersioningLICM legality we ignore loops having must 535171388Sdougb/// aliasing memory accesses. 536171388Sdougbvoid LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) { 537171388Sdougb // Get latch terminator instruction. 538170926Srafan Instruction *I = VerLoop->getLoopLatch()->getTerminator(); 539170926Srafan // Create alias scope domain. 540170926Srafan MDBuilder MDB(I->getContext()); 541170926Srafan MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain("LVDomain"); 542170926Srafan StringRef Name = "LVAliasScope"; 543170926Srafan SmallVector<Metadata *, 4> Scopes, NoAliases; 544170926Srafan MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); 545170926Srafan // Iterate over each instruction of loop. 546170926Srafan // set no-alias for all load & store instructions. 547170926Srafan for (auto *Block : CurLoop->getBlocks()) { 548170926Srafan for (auto &Inst : *Block) { 549170926Srafan // Only interested in instruction that may modify or read memory. 550170926Srafan if (!Inst.mayReadFromMemory() && !Inst.mayWriteToMemory()) 551170926Srafan continue; 552170926Srafan Scopes.push_back(NewScope); 553170926Srafan NoAliases.push_back(NewScope); 554170926Srafan // Set no-alias for current instruction. 555170926Srafan Inst.setMetadata( 556170926Srafan LLVMContext::MD_noalias, 557170926Srafan MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_noalias), 558170926Srafan MDNode::get(Inst.getContext(), NoAliases))); 559170926Srafan // set alias-scope for current instruction. 560170926Srafan Inst.setMetadata( 561170926Srafan LLVMContext::MD_alias_scope, 562170926Srafan MDNode::concatenate(Inst.getMetadata(LLVMContext::MD_alias_scope), 563170926Srafan MDNode::get(Inst.getContext(), Scopes))); 564170926Srafan } 565170926Srafan } 566170926Srafan} 567170926Srafan 568170926Srafanbool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) { 569170926Srafan // This will automatically release all resources hold by the current 570170926Srafan // LoopVersioningLICM object. 571170926Srafan AutoResetter Resetter(*this); 572170926Srafan 573170926Srafan if (skipLoop(L)) 574170926Srafan return false; 575170926Srafan 576170926Srafan // Do not do the transformation if disabled by metadata. 577170926Srafan if (hasLICMVersioningTransformation(L) & TM_Disable) 578170926Srafan return false; 579176956Santoine 580176956Santoine // Get Analysis information. 581176956Santoine AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 582176956Santoine SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 583176956Santoine LAA = &getAnalysis<LoopAccessLegacyAnalysis>(); 584176956Santoine ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 585176956Santoine LAI = nullptr; 586176956Santoine // Set Current Loop 587176956Santoine CurLoop = L; 588176956Santoine CurAST.reset(new AliasSetTracker(*AA)); 589176956Santoine 590176956Santoine // Loop over the body of this loop, construct AST. 591176956Santoine LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 592176956Santoine for (auto *Block : L->getBlocks()) { 593176956Santoine if (LI->getLoopFor(Block) == L) // Ignore blocks in subloop. 594176956Santoine CurAST->add(*Block); // Incorporate the specified basic block 595176956Santoine } 596176956Santoine 597176956Santoine bool Changed = false; 598176956Santoine 599176956Santoine // Check feasiblity of LoopVersioningLICM. 600176956Santoine // If versioning found to be feasible and beneficial then proceed 601176956Santoine // else simply return, by cleaning up memory. 602176956Santoine if (isLegalForVersioning()) { 603176956Santoine // Do loop versioning. 604176956Santoine // Create memcheck for memory accessed inside loop. 605176956Santoine // Clone original loop, and set blocks properly. 606176956Santoine DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 607176956Santoine LoopVersioning LVer(*LAI, CurLoop, LI, DT, SE, true); 608176956Santoine LVer.versionLoop(); 609176956Santoine // Set Loop Versioning metaData for original loop. 610176956Santoine addStringMetadataToLoop(LVer.getNonVersionedLoop(), LICMVersioningMetaData); 611176956Santoine // Set Loop Versioning metaData for version loop. 612176956Santoine addStringMetadataToLoop(LVer.getVersionedLoop(), LICMVersioningMetaData); 613176956Santoine // Set "llvm.mem.parallel_loop_access" metaData to versioned loop. 614176956Santoine // FIXME: "llvm.mem.parallel_loop_access" annotates memory access 615171476Sdelphij // instructions, not loops. 616171476Sdelphij addStringMetadataToLoop(LVer.getVersionedLoop(), 617170312Sdelphij "llvm.mem.parallel_loop_access"); 618170312Sdelphij // Update version loop with aggressive aliasing assumption. 619170926Srafan setNoAliasToLoop(LVer.getVersionedLoop()); 620173816Sru Changed = true; 621169815Sdelphij } 622169815Sdelphij return Changed; 623169815Sdelphij} 624169815Sdelphij 625169815Sdelphijchar LoopVersioningLICM::ID = 0; 626169815Sdelphij 627169815SdelphijINITIALIZE_PASS_BEGIN(LoopVersioningLICM, "loop-versioning-licm", 628169815Sdelphij "Loop Versioning For LICM", false, false) 629169815SdelphijINITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 630169815SdelphijINITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 631169815SdelphijINITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 632169815SdelphijINITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) 633170204SruINITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis) 634169815SdelphijINITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 635169815SdelphijINITIALIZE_PASS_DEPENDENCY(LoopSimplify) 636169815SdelphijINITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 637169815SdelphijINITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 638170204SruINITIALIZE_PASS_END(LoopVersioningLICM, "loop-versioning-licm", 639169815Sdelphij "Loop Versioning For LICM", false, false) 640169815Sdelphij 641169815SdelphijPass *llvm::createLoopVersioningLICMPass() { return new LoopVersioningLICM(); } 642169815Sdelphij