1198892Srdivacky//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===// 2198892Srdivacky// 3198892Srdivacky// The LLVM Compiler Infrastructure 4198892Srdivacky// 5198892Srdivacky// This file is distributed under the University of Illinois Open Source 6198892Srdivacky// License. See LICENSE.TXT for details. 7198892Srdivacky// 8198892Srdivacky//===----------------------------------------------------------------------===// 9198892Srdivacky// 10198892Srdivacky// This pass implements a simple loop unroller. It works best when loops have 11198892Srdivacky// been canonicalized by the -indvars pass, allowing it to determine the trip 12198892Srdivacky// counts of loops easily. 13198892Srdivacky//===----------------------------------------------------------------------===// 14198892Srdivacky 15198892Srdivacky#define DEBUG_TYPE "loop-unroll" 16198892Srdivacky#include "llvm/Transforms/Scalar.h" 17249423Sdim#include "llvm/Analysis/CodeMetrics.h" 18198892Srdivacky#include "llvm/Analysis/LoopPass.h" 19212904Sdim#include "llvm/Analysis/ScalarEvolution.h" 20249423Sdim#include "llvm/Analysis/TargetTransformInfo.h" 21249423Sdim#include "llvm/IR/DataLayout.h" 22249423Sdim#include "llvm/IR/IntrinsicInst.h" 23198892Srdivacky#include "llvm/Support/CommandLine.h" 24198892Srdivacky#include "llvm/Support/Debug.h" 25198892Srdivacky#include "llvm/Support/raw_ostream.h" 26198892Srdivacky#include "llvm/Transforms/Utils/UnrollLoop.h" 27198892Srdivacky#include <climits> 28198892Srdivacky 29198892Srdivackyusing namespace llvm; 30198892Srdivacky 31198892Srdivackystatic cl::opt<unsigned> 32218893SdimUnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden, 33198892Srdivacky cl::desc("The cut-off point for automatic loop unrolling")); 34198892Srdivacky 35198892Srdivackystatic cl::opt<unsigned> 36198892SrdivackyUnrollCount("unroll-count", cl::init(0), cl::Hidden, 37198892Srdivacky cl::desc("Use this unroll count for all loops, for testing purposes")); 38198892Srdivacky 39198892Srdivackystatic cl::opt<bool> 40198892SrdivackyUnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, 41198892Srdivacky cl::desc("Allows loops to be partially unrolled until " 42198892Srdivacky "-unroll-threshold loop size is reached.")); 43198892Srdivacky 44226633Sdimstatic cl::opt<bool> 45234353SdimUnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden, 46234353Sdim cl::desc("Unroll loops with run-time trip counts")); 47226633Sdim 48198892Srdivackynamespace { 49198892Srdivacky class LoopUnroll : public LoopPass { 50198892Srdivacky public: 51198892Srdivacky static char ID; // Pass ID, replacement for typeid 52263508Sdim LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) { 53221345Sdim CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T); 54221345Sdim CurrentCount = (C == -1) ? UnrollCount : unsigned(C); 55221345Sdim CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P; 56263508Sdim CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R; 57221345Sdim 58221345Sdim UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0); 59263508Sdim UserAllowPartial = (P != -1) || 60263508Sdim (UnrollAllowPartial.getNumOccurrences() > 0); 61263508Sdim UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0); 62263508Sdim UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0); 63226633Sdim 64218893Sdim initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); 65218893Sdim } 66198892Srdivacky 67198892Srdivacky /// A magic value for use with the Threshold parameter to indicate 68198892Srdivacky /// that the loop unroll should be performed regardless of how much 69198892Srdivacky /// code expansion would result. 70198892Srdivacky static const unsigned NoThreshold = UINT_MAX; 71226633Sdim 72218893Sdim // Threshold to use when optsize is specified (and there is no 73218893Sdim // explicit -unroll-threshold). 74218893Sdim static const unsigned OptSizeUnrollThreshold = 50; 75226633Sdim 76234353Sdim // Default unroll count for loops with run-time trip count if 77234353Sdim // -unroll-count is not set 78234353Sdim static const unsigned UnrollRuntimeCount = 8; 79234353Sdim 80221345Sdim unsigned CurrentCount; 81218893Sdim unsigned CurrentThreshold; 82221345Sdim bool CurrentAllowPartial; 83263508Sdim bool CurrentRuntime; 84263508Sdim bool UserCount; // CurrentCount is user-specified. 85221345Sdim bool UserThreshold; // CurrentThreshold is user-specified. 86263508Sdim bool UserAllowPartial; // CurrentAllowPartial is user-specified. 87263508Sdim bool UserRuntime; // CurrentRuntime is user-specified. 88198892Srdivacky 89198892Srdivacky bool runOnLoop(Loop *L, LPPassManager &LPM); 90198892Srdivacky 91198892Srdivacky /// This transformation requires natural loop information & requires that 92198892Srdivacky /// loop preheaders be inserted into the CFG... 93198892Srdivacky /// 94198892Srdivacky virtual void getAnalysisUsage(AnalysisUsage &AU) const { 95212904Sdim AU.addRequired<LoopInfo>(); 96212904Sdim AU.addPreserved<LoopInfo>(); 97198892Srdivacky AU.addRequiredID(LoopSimplifyID); 98212904Sdim AU.addPreservedID(LoopSimplifyID); 99198892Srdivacky AU.addRequiredID(LCSSAID); 100198892Srdivacky AU.addPreservedID(LCSSAID); 101226633Sdim AU.addRequired<ScalarEvolution>(); 102212904Sdim AU.addPreserved<ScalarEvolution>(); 103249423Sdim AU.addRequired<TargetTransformInfo>(); 104198892Srdivacky // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. 105198892Srdivacky // If loop unroll does not preserve dom info then LCSSA pass on next 106198892Srdivacky // loop will receive invalid dom info. 107198892Srdivacky // For now, recreate dom info, if loop is unrolled. 108198892Srdivacky AU.addPreserved<DominatorTree>(); 109198892Srdivacky } 110198892Srdivacky }; 111198892Srdivacky} 112198892Srdivacky 113198892Srdivackychar LoopUnroll::ID = 0; 114218893SdimINITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) 115249423SdimINITIALIZE_AG_DEPENDENCY(TargetTransformInfo) 116218893SdimINITIALIZE_PASS_DEPENDENCY(LoopInfo) 117218893SdimINITIALIZE_PASS_DEPENDENCY(LoopSimplify) 118218893SdimINITIALIZE_PASS_DEPENDENCY(LCSSA) 119234353SdimINITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 120218893SdimINITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) 121198892Srdivacky 122263508SdimPass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial, 123263508Sdim int Runtime) { 124263508Sdim return new LoopUnroll(Threshold, Count, AllowPartial, Runtime); 125221345Sdim} 126198892Srdivacky 127198892Srdivacky/// ApproximateLoopSize - Approximate the size of the loop. 128226633Sdimstatic unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, 129249423Sdim bool &NotDuplicatable, 130249423Sdim const TargetTransformInfo &TTI) { 131198892Srdivacky CodeMetrics Metrics; 132198892Srdivacky for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 133198892Srdivacky I != E; ++I) 134249423Sdim Metrics.analyzeBasicBlock(*I, TTI); 135218893Sdim NumCalls = Metrics.NumInlineCandidates; 136249423Sdim NotDuplicatable = Metrics.notDuplicatable; 137226633Sdim 138218893Sdim unsigned LoopSize = Metrics.NumInsts; 139226633Sdim 140218893Sdim // Don't allow an estimate of size zero. This would allows unrolling of loops 141218893Sdim // with huge iteration counts, which is a compile time problem even if it's 142218893Sdim // not a problem for code quality. 143218893Sdim if (LoopSize == 0) LoopSize = 1; 144226633Sdim 145218893Sdim return LoopSize; 146198892Srdivacky} 147198892Srdivacky 148198892Srdivackybool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { 149198892Srdivacky LoopInfo *LI = &getAnalysis<LoopInfo>(); 150226633Sdim ScalarEvolution *SE = &getAnalysis<ScalarEvolution>(); 151249423Sdim const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>(); 152198892Srdivacky 153198892Srdivacky BasicBlock *Header = L->getHeader(); 154202375Srdivacky DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() 155198892Srdivacky << "] Loop %" << Header->getName() << "\n"); 156198892Srdivacky (void)Header; 157226633Sdim 158263508Sdim TargetTransformInfo::UnrollingPreferences UP; 159263508Sdim UP.Threshold = CurrentThreshold; 160263508Sdim UP.OptSizeThreshold = OptSizeUnrollThreshold; 161263508Sdim UP.Count = CurrentCount; 162263508Sdim UP.Partial = CurrentAllowPartial; 163263508Sdim UP.Runtime = CurrentRuntime; 164263508Sdim TTI.getUnrollingPreferences(L, UP); 165263508Sdim 166218893Sdim // Determine the current unrolling threshold. While this is normally set 167218893Sdim // from UnrollThreshold, it is overridden to a smaller value if the current 168218893Sdim // function is marked as optimize-for-size, and the unroll threshold was 169218893Sdim // not user specified. 170263508Sdim unsigned Threshold = UserThreshold ? CurrentThreshold : UP.Threshold; 171226633Sdim if (!UserThreshold && 172249423Sdim Header->getParent()->getAttributes(). 173249423Sdim hasAttribute(AttributeSet::FunctionIndex, 174249423Sdim Attribute::OptimizeForSize)) 175263508Sdim Threshold = UP.OptSizeThreshold; 176198892Srdivacky 177226633Sdim // Find trip count and trip multiple if count is not available 178226633Sdim unsigned TripCount = 0; 179226633Sdim unsigned TripMultiple = 1; 180234353Sdim // Find "latch trip count". UnrollLoop assumes that control cannot exit 181234353Sdim // via the loop latch on any iteration prior to TripCount. The loop may exit 182234353Sdim // early via an earlier branch. 183234353Sdim BasicBlock *LatchBlock = L->getLoopLatch(); 184234353Sdim if (LatchBlock) { 185234353Sdim TripCount = SE->getSmallConstantTripCount(L, LatchBlock); 186234353Sdim TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock); 187226633Sdim } 188263508Sdim 189263508Sdim bool Runtime = UserRuntime ? CurrentRuntime : UP.Runtime; 190263508Sdim 191234353Sdim // Use a default unroll-count if the user doesn't specify a value 192234353Sdim // and the trip count is a run-time value. The default is different 193234353Sdim // for run-time or compile-time trip count loops. 194263508Sdim unsigned Count = UserCount ? CurrentCount : UP.Count; 195263508Sdim if (Runtime && Count == 0 && TripCount == 0) 196234353Sdim Count = UnrollRuntimeCount; 197234353Sdim 198198892Srdivacky if (Count == 0) { 199198892Srdivacky // Conservative heuristic: if we know the trip count, see if we can 200198892Srdivacky // completely unroll (subject to the threshold, checked below); otherwise 201198892Srdivacky // try to find greatest modulo of the trip count which is still under 202198892Srdivacky // threshold value. 203198892Srdivacky if (TripCount == 0) 204198892Srdivacky return false; 205198892Srdivacky Count = TripCount; 206198892Srdivacky } 207198892Srdivacky 208198892Srdivacky // Enforce the threshold. 209221345Sdim if (Threshold != NoThreshold) { 210218893Sdim unsigned NumInlineCandidates; 211249423Sdim bool notDuplicatable; 212249423Sdim unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, 213249423Sdim notDuplicatable, TTI); 214202375Srdivacky DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); 215249423Sdim if (notDuplicatable) { 216249423Sdim DEBUG(dbgs() << " Not unrolling loop which contains non duplicatable" 217249423Sdim << " instructions.\n"); 218249423Sdim return false; 219249423Sdim } 220218893Sdim if (NumInlineCandidates != 0) { 221218893Sdim DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); 222203954Srdivacky return false; 223203954Srdivacky } 224198892Srdivacky uint64_t Size = (uint64_t)LoopSize*Count; 225221345Sdim if (TripCount != 1 && Size > Threshold) { 226202375Srdivacky DEBUG(dbgs() << " Too large to fully unroll with count: " << Count 227221345Sdim << " because size: " << Size << ">" << Threshold << "\n"); 228263508Sdim bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; 229263508Sdim if (!AllowPartial && !(Runtime && TripCount == 0)) { 230202375Srdivacky DEBUG(dbgs() << " will not try to unroll partially because " 231198892Srdivacky << "-unroll-allow-partial not given\n"); 232198892Srdivacky return false; 233198892Srdivacky } 234234353Sdim if (TripCount) { 235234353Sdim // Reduce unroll count to be modulo of TripCount for partial unrolling 236234353Sdim Count = Threshold / LoopSize; 237234353Sdim while (Count != 0 && TripCount%Count != 0) 238234353Sdim Count--; 239198892Srdivacky } 240263508Sdim else if (Runtime) { 241234353Sdim // Reduce unroll count to be a lower power-of-two value 242234353Sdim while (Count != 0 && Size > Threshold) { 243234353Sdim Count >>= 1; 244234353Sdim Size = LoopSize*Count; 245234353Sdim } 246234353Sdim } 247198892Srdivacky if (Count < 2) { 248202375Srdivacky DEBUG(dbgs() << " could not unroll partially\n"); 249198892Srdivacky return false; 250198892Srdivacky } 251202375Srdivacky DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n"); 252198892Srdivacky } 253198892Srdivacky } 254198892Srdivacky 255198892Srdivacky // Unroll the loop. 256263508Sdim if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, &LPM)) 257198892Srdivacky return false; 258198892Srdivacky 259198892Srdivacky return true; 260198892Srdivacky} 261