1//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file promotes memory references to be register references.  It promotes
10// alloca instructions which only have loads and stores as uses.  An alloca is
11// transformed by using iterated dominator frontiers to place PHI nodes, then
12// traversing the function in depth-first order to rewrite loads and stores as
13// appropriate.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallPtrSet.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/ADT/Twine.h"
24#include "llvm/Analysis/AssumptionCache.h"
25#include "llvm/Analysis/InstructionSimplify.h"
26#include "llvm/Analysis/IteratedDominanceFrontier.h"
27#include "llvm/Analysis/ValueTracking.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/CFG.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DIBuilder.h"
33#include "llvm/IR/DebugInfo.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/IntrinsicInst.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/Module.h"
43#include "llvm/IR/Type.h"
44#include "llvm/IR/User.h"
45#include "llvm/Support/Casting.h"
46#include "llvm/Transforms/Utils/Local.h"
47#include "llvm/Transforms/Utils/PromoteMemToReg.h"
48#include <algorithm>
49#include <cassert>
50#include <iterator>
51#include <utility>
52#include <vector>
53
54using namespace llvm;
55
56#define DEBUG_TYPE "mem2reg"
57
58STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
59STATISTIC(NumSingleStore,   "Number of alloca's promoted with a single store");
60STATISTIC(NumDeadAlloca,    "Number of dead alloca's removed");
61STATISTIC(NumPHIInsert,     "Number of PHI nodes inserted");
62
63bool llvm::isAllocaPromotable(const AllocaInst *AI) {
64  // Only allow direct and non-volatile loads and stores...
65  for (const User *U : AI->users()) {
66    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
67      // Note that atomic loads can be transformed; atomic semantics do
68      // not have any meaning for a local alloca.
69      if (LI->isVolatile() || LI->getType() != AI->getAllocatedType())
70        return false;
71    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
72      if (SI->getValueOperand() == AI ||
73          SI->getValueOperand()->getType() != AI->getAllocatedType())
74        return false; // Don't allow a store OF the AI, only INTO the AI.
75      // Note that atomic stores can be transformed; atomic semantics do
76      // not have any meaning for a local alloca.
77      if (SI->isVolatile())
78        return false;
79    } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
80      if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
81        return false;
82    } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
83      if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI))
84        return false;
85    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
86      if (!GEPI->hasAllZeroIndices())
87        return false;
88      if (!onlyUsedByLifetimeMarkersOrDroppableInsts(GEPI))
89        return false;
90    } else if (const AddrSpaceCastInst *ASCI = dyn_cast<AddrSpaceCastInst>(U)) {
91      if (!onlyUsedByLifetimeMarkers(ASCI))
92        return false;
93    } else {
94      return false;
95    }
96  }
97
98  return true;
99}
100
101namespace {
102
103/// Helper for updating assignment tracking debug info when promoting allocas.
104class AssignmentTrackingInfo {
105  /// DbgAssignIntrinsics linked to the alloca with at most one per variable
106  /// fragment. (i.e. not be a comprehensive set if there are multiple
107  /// dbg.assigns for one variable fragment).
108  SmallVector<DbgVariableIntrinsic *> DbgAssigns;
109
110public:
111  void init(AllocaInst *AI) {
112    SmallSet<DebugVariable, 2> Vars;
113    for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(AI)) {
114      if (Vars.insert(DebugVariable(DAI)).second)
115        DbgAssigns.push_back(DAI);
116    }
117  }
118
119  /// Update assignment tracking debug info given for the to-be-deleted store
120  /// \p ToDelete that stores to this alloca.
121  void updateForDeletedStore(StoreInst *ToDelete, DIBuilder &DIB) const {
122    // There's nothing to do if the alloca doesn't have any variables using
123    // assignment tracking.
124    if (DbgAssigns.empty()) {
125      assert(at::getAssignmentMarkers(ToDelete).empty());
126      return;
127    }
128
129    // Just leave dbg.assign intrinsics in place and remember that we've seen
130    // one for each variable fragment.
131    SmallSet<DebugVariable, 2> VarHasDbgAssignForStore;
132    for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(ToDelete))
133      VarHasDbgAssignForStore.insert(DebugVariable(DAI));
134
135    // It's possible for variables using assignment tracking to have no
136    // dbg.assign linked to this store. These are variables in DbgAssigns that
137    // are missing from VarHasDbgAssignForStore. Since there isn't a dbg.assign
138    // to mark the assignment - and the store is going to be deleted - insert a
139    // dbg.value to do that now. An untracked store may be either one that
140    // cannot be represented using assignment tracking (non-const offset or
141    // size) or one that is trackable but has had its DIAssignID attachment
142    // dropped accidentally.
143    for (auto *DAI : DbgAssigns) {
144      if (VarHasDbgAssignForStore.contains(DebugVariable(DAI)))
145        continue;
146      ConvertDebugDeclareToDebugValue(DAI, ToDelete, DIB);
147    }
148  }
149
150  /// Update assignment tracking debug info given for the newly inserted PHI \p
151  /// NewPhi.
152  void updateForNewPhi(PHINode *NewPhi, DIBuilder &DIB) const {
153    // Regardless of the position of dbg.assigns relative to stores, the
154    // incoming values into a new PHI should be the same for the (imaginary)
155    // debug-phi.
156    for (auto *DAI : DbgAssigns)
157      ConvertDebugDeclareToDebugValue(DAI, NewPhi, DIB);
158  }
159
160  void clear() { DbgAssigns.clear(); }
161  bool empty() { return DbgAssigns.empty(); }
162};
163
164struct AllocaInfo {
165  using DbgUserVec = SmallVector<DbgVariableIntrinsic *, 1>;
166
167  SmallVector<BasicBlock *, 32> DefiningBlocks;
168  SmallVector<BasicBlock *, 32> UsingBlocks;
169
170  StoreInst *OnlyStore;
171  BasicBlock *OnlyBlock;
172  bool OnlyUsedInOneBlock;
173
174  /// Debug users of the alloca - does not include dbg.assign intrinsics.
175  DbgUserVec DbgUsers;
176  /// Helper to update assignment tracking debug info.
177  AssignmentTrackingInfo AssignmentTracking;
178
179  void clear() {
180    DefiningBlocks.clear();
181    UsingBlocks.clear();
182    OnlyStore = nullptr;
183    OnlyBlock = nullptr;
184    OnlyUsedInOneBlock = true;
185    DbgUsers.clear();
186    AssignmentTracking.clear();
187  }
188
189  /// Scan the uses of the specified alloca, filling in the AllocaInfo used
190  /// by the rest of the pass to reason about the uses of this alloca.
191  void AnalyzeAlloca(AllocaInst *AI) {
192    clear();
193
194    // As we scan the uses of the alloca instruction, keep track of stores,
195    // and decide whether all of the loads and stores to the alloca are within
196    // the same basic block.
197    for (User *U : AI->users()) {
198      Instruction *User = cast<Instruction>(U);
199
200      if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
201        // Remember the basic blocks which define new values for the alloca
202        DefiningBlocks.push_back(SI->getParent());
203        OnlyStore = SI;
204      } else {
205        LoadInst *LI = cast<LoadInst>(User);
206        // Otherwise it must be a load instruction, keep track of variable
207        // reads.
208        UsingBlocks.push_back(LI->getParent());
209      }
210
211      if (OnlyUsedInOneBlock) {
212        if (!OnlyBlock)
213          OnlyBlock = User->getParent();
214        else if (OnlyBlock != User->getParent())
215          OnlyUsedInOneBlock = false;
216      }
217    }
218    DbgUserVec AllDbgUsers;
219    findDbgUsers(AllDbgUsers, AI);
220    std::copy_if(AllDbgUsers.begin(), AllDbgUsers.end(),
221                 std::back_inserter(DbgUsers), [](DbgVariableIntrinsic *DII) {
222                   return !isa<DbgAssignIntrinsic>(DII);
223                 });
224    AssignmentTracking.init(AI);
225  }
226};
227
228/// Data package used by RenamePass().
229struct RenamePassData {
230  using ValVector = std::vector<Value *>;
231  using LocationVector = std::vector<DebugLoc>;
232
233  RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V, LocationVector L)
234      : BB(B), Pred(P), Values(std::move(V)), Locations(std::move(L)) {}
235
236  BasicBlock *BB;
237  BasicBlock *Pred;
238  ValVector Values;
239  LocationVector Locations;
240};
241
242/// This assigns and keeps a per-bb relative ordering of load/store
243/// instructions in the block that directly load or store an alloca.
244///
245/// This functionality is important because it avoids scanning large basic
246/// blocks multiple times when promoting many allocas in the same block.
247class LargeBlockInfo {
248  /// For each instruction that we track, keep the index of the
249  /// instruction.
250  ///
251  /// The index starts out as the number of the instruction from the start of
252  /// the block.
253  DenseMap<const Instruction *, unsigned> InstNumbers;
254
255public:
256
257  /// This code only looks at accesses to allocas.
258  static bool isInterestingInstruction(const Instruction *I) {
259    return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
260           (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
261  }
262
263  /// Get or calculate the index of the specified instruction.
264  unsigned getInstructionIndex(const Instruction *I) {
265    assert(isInterestingInstruction(I) &&
266           "Not a load/store to/from an alloca?");
267
268    // If we already have this instruction number, return it.
269    DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
270    if (It != InstNumbers.end())
271      return It->second;
272
273    // Scan the whole block to get the instruction.  This accumulates
274    // information for every interesting instruction in the block, in order to
275    // avoid gratuitus rescans.
276    const BasicBlock *BB = I->getParent();
277    unsigned InstNo = 0;
278    for (const Instruction &BBI : *BB)
279      if (isInterestingInstruction(&BBI))
280        InstNumbers[&BBI] = InstNo++;
281    It = InstNumbers.find(I);
282
283    assert(It != InstNumbers.end() && "Didn't insert instruction?");
284    return It->second;
285  }
286
287  void deleteValue(const Instruction *I) { InstNumbers.erase(I); }
288
289  void clear() { InstNumbers.clear(); }
290};
291
292struct PromoteMem2Reg {
293  /// The alloca instructions being promoted.
294  std::vector<AllocaInst *> Allocas;
295
296  DominatorTree &DT;
297  DIBuilder DIB;
298
299  /// A cache of @llvm.assume intrinsics used by SimplifyInstruction.
300  AssumptionCache *AC;
301
302  const SimplifyQuery SQ;
303
304  /// Reverse mapping of Allocas.
305  DenseMap<AllocaInst *, unsigned> AllocaLookup;
306
307  /// The PhiNodes we're adding.
308  ///
309  /// That map is used to simplify some Phi nodes as we iterate over it, so
310  /// it should have deterministic iterators.  We could use a MapVector, but
311  /// since we already maintain a map from BasicBlock* to a stable numbering
312  /// (BBNumbers), the DenseMap is more efficient (also supports removal).
313  DenseMap<std::pair<unsigned, unsigned>, PHINode *> NewPhiNodes;
314
315  /// For each PHI node, keep track of which entry in Allocas it corresponds
316  /// to.
317  DenseMap<PHINode *, unsigned> PhiToAllocaMap;
318
319  /// For each alloca, we keep track of the dbg.declare intrinsic that
320  /// describes it, if any, so that we can convert it to a dbg.value
321  /// intrinsic if the alloca gets promoted.
322  SmallVector<AllocaInfo::DbgUserVec, 8> AllocaDbgUsers;
323
324  /// For each alloca, keep an instance of a helper class that gives us an easy
325  /// way to update assignment tracking debug info if the alloca is promoted.
326  SmallVector<AssignmentTrackingInfo, 8> AllocaATInfo;
327
328  /// The set of basic blocks the renamer has already visited.
329  SmallPtrSet<BasicBlock *, 16> Visited;
330
331  /// Contains a stable numbering of basic blocks to avoid non-determinstic
332  /// behavior.
333  DenseMap<BasicBlock *, unsigned> BBNumbers;
334
335  /// Lazily compute the number of predecessors a block has.
336  DenseMap<const BasicBlock *, unsigned> BBNumPreds;
337
338public:
339  PromoteMem2Reg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
340                 AssumptionCache *AC)
341      : Allocas(Allocas.begin(), Allocas.end()), DT(DT),
342        DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false),
343        AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(),
344                   nullptr, &DT, AC) {}
345
346  void run();
347
348private:
349  void RemoveFromAllocasList(unsigned &AllocaIdx) {
350    Allocas[AllocaIdx] = Allocas.back();
351    Allocas.pop_back();
352    --AllocaIdx;
353  }
354
355  unsigned getNumPreds(const BasicBlock *BB) {
356    unsigned &NP = BBNumPreds[BB];
357    if (NP == 0)
358      NP = pred_size(BB) + 1;
359    return NP - 1;
360  }
361
362  void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
363                           const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
364                           SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);
365  void RenamePass(BasicBlock *BB, BasicBlock *Pred,
366                  RenamePassData::ValVector &IncVals,
367                  RenamePassData::LocationVector &IncLocs,
368                  std::vector<RenamePassData> &Worklist);
369  bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
370};
371
372} // end anonymous namespace
373
374/// Given a LoadInst LI this adds assume(LI != null) after it.
375static void addAssumeNonNull(AssumptionCache *AC, LoadInst *LI) {
376  Function *AssumeIntrinsic =
377      Intrinsic::getDeclaration(LI->getModule(), Intrinsic::assume);
378  ICmpInst *LoadNotNull = new ICmpInst(ICmpInst::ICMP_NE, LI,
379                                       Constant::getNullValue(LI->getType()));
380  LoadNotNull->insertAfter(LI);
381  CallInst *CI = CallInst::Create(AssumeIntrinsic, {LoadNotNull});
382  CI->insertAfter(LoadNotNull);
383  AC->registerAssumption(cast<AssumeInst>(CI));
384}
385
386static void convertMetadataToAssumes(LoadInst *LI, Value *Val,
387                                     const DataLayout &DL, AssumptionCache *AC,
388                                     const DominatorTree *DT) {
389  // If the load was marked as nonnull we don't want to lose that information
390  // when we erase this Load. So we preserve it with an assume. As !nonnull
391  // returns poison while assume violations are immediate undefined behavior,
392  // we can only do this if the value is known non-poison.
393  if (AC && LI->getMetadata(LLVMContext::MD_nonnull) &&
394      LI->getMetadata(LLVMContext::MD_noundef) &&
395      !isKnownNonZero(Val, DL, 0, AC, LI, DT))
396    addAssumeNonNull(AC, LI);
397}
398
399static void removeIntrinsicUsers(AllocaInst *AI) {
400  // Knowing that this alloca is promotable, we know that it's safe to kill all
401  // instructions except for load and store.
402
403  for (Use &U : llvm::make_early_inc_range(AI->uses())) {
404    Instruction *I = cast<Instruction>(U.getUser());
405    if (isa<LoadInst>(I) || isa<StoreInst>(I))
406      continue;
407
408    // Drop the use of AI in droppable instructions.
409    if (I->isDroppable()) {
410      I->dropDroppableUse(U);
411      continue;
412    }
413
414    if (!I->getType()->isVoidTy()) {
415      // The only users of this bitcast/GEP instruction are lifetime intrinsics.
416      // Follow the use/def chain to erase them now instead of leaving it for
417      // dead code elimination later.
418      for (Use &UU : llvm::make_early_inc_range(I->uses())) {
419        Instruction *Inst = cast<Instruction>(UU.getUser());
420
421        // Drop the use of I in droppable instructions.
422        if (Inst->isDroppable()) {
423          Inst->dropDroppableUse(UU);
424          continue;
425        }
426        Inst->eraseFromParent();
427      }
428    }
429    I->eraseFromParent();
430  }
431}
432
433/// Rewrite as many loads as possible given a single store.
434///
435/// When there is only a single store, we can use the domtree to trivially
436/// replace all of the dominated loads with the stored value. Do so, and return
437/// true if this has successfully promoted the alloca entirely. If this returns
438/// false there were some loads which were not dominated by the single store
439/// and thus must be phi-ed with undef. We fall back to the standard alloca
440/// promotion algorithm in that case.
441static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
442                                     LargeBlockInfo &LBI, const DataLayout &DL,
443                                     DominatorTree &DT, AssumptionCache *AC) {
444  StoreInst *OnlyStore = Info.OnlyStore;
445  bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
446  BasicBlock *StoreBB = OnlyStore->getParent();
447  int StoreIndex = -1;
448
449  // Clear out UsingBlocks.  We will reconstruct it here if needed.
450  Info.UsingBlocks.clear();
451
452  for (User *U : make_early_inc_range(AI->users())) {
453    Instruction *UserInst = cast<Instruction>(U);
454    if (UserInst == OnlyStore)
455      continue;
456    LoadInst *LI = cast<LoadInst>(UserInst);
457
458    // Okay, if we have a load from the alloca, we want to replace it with the
459    // only value stored to the alloca.  We can do this if the value is
460    // dominated by the store.  If not, we use the rest of the mem2reg machinery
461    // to insert the phi nodes as needed.
462    if (!StoringGlobalVal) { // Non-instructions are always dominated.
463      if (LI->getParent() == StoreBB) {
464        // If we have a use that is in the same block as the store, compare the
465        // indices of the two instructions to see which one came first.  If the
466        // load came before the store, we can't handle it.
467        if (StoreIndex == -1)
468          StoreIndex = LBI.getInstructionIndex(OnlyStore);
469
470        if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
471          // Can't handle this load, bail out.
472          Info.UsingBlocks.push_back(StoreBB);
473          continue;
474        }
475      } else if (!DT.dominates(StoreBB, LI->getParent())) {
476        // If the load and store are in different blocks, use BB dominance to
477        // check their relationships.  If the store doesn't dom the use, bail
478        // out.
479        Info.UsingBlocks.push_back(LI->getParent());
480        continue;
481      }
482    }
483
484    // Otherwise, we *can* safely rewrite this load.
485    Value *ReplVal = OnlyStore->getOperand(0);
486    // If the replacement value is the load, this must occur in unreachable
487    // code.
488    if (ReplVal == LI)
489      ReplVal = PoisonValue::get(LI->getType());
490
491    convertMetadataToAssumes(LI, ReplVal, DL, AC, &DT);
492    LI->replaceAllUsesWith(ReplVal);
493    LI->eraseFromParent();
494    LBI.deleteValue(LI);
495  }
496
497  // Finally, after the scan, check to see if the store is all that is left.
498  if (!Info.UsingBlocks.empty())
499    return false; // If not, we'll have to fall back for the remainder.
500
501  DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
502  // Update assignment tracking info for the store we're going to delete.
503  Info.AssignmentTracking.updateForDeletedStore(Info.OnlyStore, DIB);
504
505  // Record debuginfo for the store and remove the declaration's
506  // debuginfo.
507  for (DbgVariableIntrinsic *DII : Info.DbgUsers) {
508    if (DII->isAddressOfVariable()) {
509      ConvertDebugDeclareToDebugValue(DII, Info.OnlyStore, DIB);
510      DII->eraseFromParent();
511    } else if (DII->getExpression()->startsWithDeref()) {
512      DII->eraseFromParent();
513    }
514  }
515
516  // Remove dbg.assigns linked to the alloca as these are now redundant.
517  at::deleteAssignmentMarkers(AI);
518
519  // Remove the (now dead) store and alloca.
520  Info.OnlyStore->eraseFromParent();
521  LBI.deleteValue(Info.OnlyStore);
522
523  AI->eraseFromParent();
524  return true;
525}
526
527/// Many allocas are only used within a single basic block.  If this is the
528/// case, avoid traversing the CFG and inserting a lot of potentially useless
529/// PHI nodes by just performing a single linear pass over the basic block
530/// using the Alloca.
531///
532/// If we cannot promote this alloca (because it is read before it is written),
533/// return false.  This is necessary in cases where, due to control flow, the
534/// alloca is undefined only on some control flow paths.  e.g. code like
535/// this is correct in LLVM IR:
536///  // A is an alloca with no stores so far
537///  for (...) {
538///    int t = *A;
539///    if (!first_iteration)
540///      use(t);
541///    *A = 42;
542///  }
543static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
544                                     LargeBlockInfo &LBI,
545                                     const DataLayout &DL,
546                                     DominatorTree &DT,
547                                     AssumptionCache *AC) {
548  // The trickiest case to handle is when we have large blocks. Because of this,
549  // this code is optimized assuming that large blocks happen.  This does not
550  // significantly pessimize the small block case.  This uses LargeBlockInfo to
551  // make it efficient to get the index of various operations in the block.
552
553  // Walk the use-def list of the alloca, getting the locations of all stores.
554  using StoresByIndexTy = SmallVector<std::pair<unsigned, StoreInst *>, 64>;
555  StoresByIndexTy StoresByIndex;
556
557  for (User *U : AI->users())
558    if (StoreInst *SI = dyn_cast<StoreInst>(U))
559      StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
560
561  // Sort the stores by their index, making it efficient to do a lookup with a
562  // binary search.
563  llvm::sort(StoresByIndex, less_first());
564
565  // Walk all of the loads from this alloca, replacing them with the nearest
566  // store above them, if any.
567  for (User *U : make_early_inc_range(AI->users())) {
568    LoadInst *LI = dyn_cast<LoadInst>(U);
569    if (!LI)
570      continue;
571
572    unsigned LoadIdx = LBI.getInstructionIndex(LI);
573
574    // Find the nearest store that has a lower index than this load.
575    StoresByIndexTy::iterator I = llvm::lower_bound(
576        StoresByIndex,
577        std::make_pair(LoadIdx, static_cast<StoreInst *>(nullptr)),
578        less_first());
579    Value *ReplVal;
580    if (I == StoresByIndex.begin()) {
581      if (StoresByIndex.empty())
582        // If there are no stores, the load takes the undef value.
583        ReplVal = UndefValue::get(LI->getType());
584      else
585        // There is no store before this load, bail out (load may be affected
586        // by the following stores - see main comment).
587        return false;
588    } else {
589      // Otherwise, there was a store before this load, the load takes its
590      // value.
591      ReplVal = std::prev(I)->second->getOperand(0);
592    }
593
594    convertMetadataToAssumes(LI, ReplVal, DL, AC, &DT);
595
596    // If the replacement value is the load, this must occur in unreachable
597    // code.
598    if (ReplVal == LI)
599      ReplVal = PoisonValue::get(LI->getType());
600
601    LI->replaceAllUsesWith(ReplVal);
602    LI->eraseFromParent();
603    LBI.deleteValue(LI);
604  }
605
606  // Remove the (now dead) stores and alloca.
607  DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
608  while (!AI->use_empty()) {
609    StoreInst *SI = cast<StoreInst>(AI->user_back());
610    // Update assignment tracking info for the store we're going to delete.
611    Info.AssignmentTracking.updateForDeletedStore(SI, DIB);
612    // Record debuginfo for the store before removing it.
613    for (DbgVariableIntrinsic *DII : Info.DbgUsers) {
614      if (DII->isAddressOfVariable()) {
615        ConvertDebugDeclareToDebugValue(DII, SI, DIB);
616      }
617    }
618    SI->eraseFromParent();
619    LBI.deleteValue(SI);
620  }
621
622  // Remove dbg.assigns linked to the alloca as these are now redundant.
623  at::deleteAssignmentMarkers(AI);
624  AI->eraseFromParent();
625
626  // The alloca's debuginfo can be removed as well.
627  for (DbgVariableIntrinsic *DII : Info.DbgUsers)
628    if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref())
629      DII->eraseFromParent();
630
631  ++NumLocalPromoted;
632  return true;
633}
634
635void PromoteMem2Reg::run() {
636  Function &F = *DT.getRoot()->getParent();
637
638  AllocaDbgUsers.resize(Allocas.size());
639  AllocaATInfo.resize(Allocas.size());
640
641  AllocaInfo Info;
642  LargeBlockInfo LBI;
643  ForwardIDFCalculator IDF(DT);
644
645  for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
646    AllocaInst *AI = Allocas[AllocaNum];
647
648    assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!");
649    assert(AI->getParent()->getParent() == &F &&
650           "All allocas should be in the same function, which is same as DF!");
651
652    removeIntrinsicUsers(AI);
653
654    if (AI->use_empty()) {
655      // If there are no uses of the alloca, just delete it now.
656      AI->eraseFromParent();
657
658      // Remove the alloca from the Allocas list, since it has been processed
659      RemoveFromAllocasList(AllocaNum);
660      ++NumDeadAlloca;
661      continue;
662    }
663
664    // Calculate the set of read and write-locations for each alloca.  This is
665    // analogous to finding the 'uses' and 'definitions' of each variable.
666    Info.AnalyzeAlloca(AI);
667
668    // If there is only a single store to this value, replace any loads of
669    // it that are directly dominated by the definition with the value stored.
670    if (Info.DefiningBlocks.size() == 1) {
671      if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
672        // The alloca has been processed, move on.
673        RemoveFromAllocasList(AllocaNum);
674        ++NumSingleStore;
675        continue;
676      }
677    }
678
679    // If the alloca is only read and written in one basic block, just perform a
680    // linear sweep over the block to eliminate it.
681    if (Info.OnlyUsedInOneBlock &&
682        promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
683      // The alloca has been processed, move on.
684      RemoveFromAllocasList(AllocaNum);
685      continue;
686    }
687
688    // If we haven't computed a numbering for the BB's in the function, do so
689    // now.
690    if (BBNumbers.empty()) {
691      unsigned ID = 0;
692      for (auto &BB : F)
693        BBNumbers[&BB] = ID++;
694    }
695
696    // Remember the dbg.declare intrinsic describing this alloca, if any.
697    if (!Info.DbgUsers.empty())
698      AllocaDbgUsers[AllocaNum] = Info.DbgUsers;
699    if (!Info.AssignmentTracking.empty())
700      AllocaATInfo[AllocaNum] = Info.AssignmentTracking;
701
702    // Keep the reverse mapping of the 'Allocas' array for the rename pass.
703    AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
704
705    // Unique the set of defining blocks for efficient lookup.
706    SmallPtrSet<BasicBlock *, 32> DefBlocks(Info.DefiningBlocks.begin(),
707                                            Info.DefiningBlocks.end());
708
709    // Determine which blocks the value is live in.  These are blocks which lead
710    // to uses.
711    SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
712    ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
713
714    // At this point, we're committed to promoting the alloca using IDF's, and
715    // the standard SSA construction algorithm.  Determine which blocks need phi
716    // nodes and see if we can optimize out some work by avoiding insertion of
717    // dead phi nodes.
718    IDF.setLiveInBlocks(LiveInBlocks);
719    IDF.setDefiningBlocks(DefBlocks);
720    SmallVector<BasicBlock *, 32> PHIBlocks;
721    IDF.calculate(PHIBlocks);
722    llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) {
723      return BBNumbers.find(A)->second < BBNumbers.find(B)->second;
724    });
725
726    unsigned CurrentVersion = 0;
727    for (BasicBlock *BB : PHIBlocks)
728      QueuePhiNode(BB, AllocaNum, CurrentVersion);
729  }
730
731  if (Allocas.empty())
732    return; // All of the allocas must have been trivial!
733
734  LBI.clear();
735
736  // Set the incoming values for the basic block to be null values for all of
737  // the alloca's.  We do this in case there is a load of a value that has not
738  // been stored yet.  In this case, it will get this null value.
739  RenamePassData::ValVector Values(Allocas.size());
740  for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
741    Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
742
743  // When handling debug info, treat all incoming values as if they have unknown
744  // locations until proven otherwise.
745  RenamePassData::LocationVector Locations(Allocas.size());
746
747  // Walks all basic blocks in the function performing the SSA rename algorithm
748  // and inserting the phi nodes we marked as necessary
749  std::vector<RenamePassData> RenamePassWorkList;
750  RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values),
751                                  std::move(Locations));
752  do {
753    RenamePassData RPD = std::move(RenamePassWorkList.back());
754    RenamePassWorkList.pop_back();
755    // RenamePass may add new worklist entries.
756    RenamePass(RPD.BB, RPD.Pred, RPD.Values, RPD.Locations, RenamePassWorkList);
757  } while (!RenamePassWorkList.empty());
758
759  // The renamer uses the Visited set to avoid infinite loops.  Clear it now.
760  Visited.clear();
761
762  // Remove the allocas themselves from the function.
763  for (Instruction *A : Allocas) {
764    // Remove dbg.assigns linked to the alloca as these are now redundant.
765    at::deleteAssignmentMarkers(A);
766    // If there are any uses of the alloca instructions left, they must be in
767    // unreachable basic blocks that were not processed by walking the dominator
768    // tree. Just delete the users now.
769    if (!A->use_empty())
770      A->replaceAllUsesWith(PoisonValue::get(A->getType()));
771    A->eraseFromParent();
772  }
773
774  // Remove alloca's dbg.declare intrinsics from the function.
775  for (auto &DbgUsers : AllocaDbgUsers) {
776    for (auto *DII : DbgUsers)
777      if (DII->isAddressOfVariable() || DII->getExpression()->startsWithDeref())
778        DII->eraseFromParent();
779  }
780
781  // Loop over all of the PHI nodes and see if there are any that we can get
782  // rid of because they merge all of the same incoming values.  This can
783  // happen due to undef values coming into the PHI nodes.  This process is
784  // iterative, because eliminating one PHI node can cause others to be removed.
785  bool EliminatedAPHI = true;
786  while (EliminatedAPHI) {
787    EliminatedAPHI = false;
788
789    // Iterating over NewPhiNodes is deterministic, so it is safe to try to
790    // simplify and RAUW them as we go.  If it was not, we could add uses to
791    // the values we replace with in a non-deterministic order, thus creating
792    // non-deterministic def->use chains.
793    for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
794             I = NewPhiNodes.begin(),
795             E = NewPhiNodes.end();
796         I != E;) {
797      PHINode *PN = I->second;
798
799      // If this PHI node merges one value and/or undefs, get the value.
800      if (Value *V = simplifyInstruction(PN, SQ)) {
801        PN->replaceAllUsesWith(V);
802        PN->eraseFromParent();
803        NewPhiNodes.erase(I++);
804        EliminatedAPHI = true;
805        continue;
806      }
807      ++I;
808    }
809  }
810
811  // At this point, the renamer has added entries to PHI nodes for all reachable
812  // code.  Unfortunately, there may be unreachable blocks which the renamer
813  // hasn't traversed.  If this is the case, the PHI nodes may not
814  // have incoming values for all predecessors.  Loop over all PHI nodes we have
815  // created, inserting undef values if they are missing any incoming values.
816  for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
817           I = NewPhiNodes.begin(),
818           E = NewPhiNodes.end();
819       I != E; ++I) {
820    // We want to do this once per basic block.  As such, only process a block
821    // when we find the PHI that is the first entry in the block.
822    PHINode *SomePHI = I->second;
823    BasicBlock *BB = SomePHI->getParent();
824    if (&BB->front() != SomePHI)
825      continue;
826
827    // Only do work here if there the PHI nodes are missing incoming values.  We
828    // know that all PHI nodes that were inserted in a block will have the same
829    // number of incoming values, so we can just check any of them.
830    if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
831      continue;
832
833    // Get the preds for BB.
834    SmallVector<BasicBlock *, 16> Preds(predecessors(BB));
835
836    // Ok, now we know that all of the PHI nodes are missing entries for some
837    // basic blocks.  Start by sorting the incoming predecessors for efficient
838    // access.
839    auto CompareBBNumbers = [this](BasicBlock *A, BasicBlock *B) {
840      return BBNumbers.find(A)->second < BBNumbers.find(B)->second;
841    };
842    llvm::sort(Preds, CompareBBNumbers);
843
844    // Now we loop through all BB's which have entries in SomePHI and remove
845    // them from the Preds list.
846    for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
847      // Do a log(n) search of the Preds list for the entry we want.
848      SmallVectorImpl<BasicBlock *>::iterator EntIt = llvm::lower_bound(
849          Preds, SomePHI->getIncomingBlock(i), CompareBBNumbers);
850      assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i) &&
851             "PHI node has entry for a block which is not a predecessor!");
852
853      // Remove the entry
854      Preds.erase(EntIt);
855    }
856
857    // At this point, the blocks left in the preds list must have dummy
858    // entries inserted into every PHI nodes for the block.  Update all the phi
859    // nodes in this block that we are inserting (there could be phis before
860    // mem2reg runs).
861    unsigned NumBadPreds = SomePHI->getNumIncomingValues();
862    BasicBlock::iterator BBI = BB->begin();
863    while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
864           SomePHI->getNumIncomingValues() == NumBadPreds) {
865      Value *UndefVal = UndefValue::get(SomePHI->getType());
866      for (BasicBlock *Pred : Preds)
867        SomePHI->addIncoming(UndefVal, Pred);
868    }
869  }
870
871  NewPhiNodes.clear();
872}
873
874/// Determine which blocks the value is live in.
875///
876/// These are blocks which lead to uses.  Knowing this allows us to avoid
877/// inserting PHI nodes into blocks which don't lead to uses (thus, the
878/// inserted phi nodes would be dead).
879void PromoteMem2Reg::ComputeLiveInBlocks(
880    AllocaInst *AI, AllocaInfo &Info,
881    const SmallPtrSetImpl<BasicBlock *> &DefBlocks,
882    SmallPtrSetImpl<BasicBlock *> &LiveInBlocks) {
883  // To determine liveness, we must iterate through the predecessors of blocks
884  // where the def is live.  Blocks are added to the worklist if we need to
885  // check their predecessors.  Start with all the using blocks.
886  SmallVector<BasicBlock *, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
887                                                    Info.UsingBlocks.end());
888
889  // If any of the using blocks is also a definition block, check to see if the
890  // definition occurs before or after the use.  If it happens before the use,
891  // the value isn't really live-in.
892  for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
893    BasicBlock *BB = LiveInBlockWorklist[i];
894    if (!DefBlocks.count(BB))
895      continue;
896
897    // Okay, this is a block that both uses and defines the value.  If the first
898    // reference to the alloca is a def (store), then we know it isn't live-in.
899    for (BasicBlock::iterator I = BB->begin();; ++I) {
900      if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
901        if (SI->getOperand(1) != AI)
902          continue;
903
904        // We found a store to the alloca before a load.  The alloca is not
905        // actually live-in here.
906        LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
907        LiveInBlockWorklist.pop_back();
908        --i;
909        --e;
910        break;
911      }
912
913      if (LoadInst *LI = dyn_cast<LoadInst>(I))
914        // Okay, we found a load before a store to the alloca.  It is actually
915        // live into this block.
916        if (LI->getOperand(0) == AI)
917          break;
918    }
919  }
920
921  // Now that we have a set of blocks where the phi is live-in, recursively add
922  // their predecessors until we find the full region the value is live.
923  while (!LiveInBlockWorklist.empty()) {
924    BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
925
926    // The block really is live in here, insert it into the set.  If already in
927    // the set, then it has already been processed.
928    if (!LiveInBlocks.insert(BB).second)
929      continue;
930
931    // Since the value is live into BB, it is either defined in a predecessor or
932    // live into it to.  Add the preds to the worklist unless they are a
933    // defining block.
934    for (BasicBlock *P : predecessors(BB)) {
935      // The value is not live into a predecessor if it defines the value.
936      if (DefBlocks.count(P))
937        continue;
938
939      // Otherwise it is, add to the worklist.
940      LiveInBlockWorklist.push_back(P);
941    }
942  }
943}
944
945/// Queue a phi-node to be added to a basic-block for a specific Alloca.
946///
947/// Returns true if there wasn't already a phi-node for that variable
948bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
949                                  unsigned &Version) {
950  // Look up the basic-block in question.
951  PHINode *&PN = NewPhiNodes[std::make_pair(BBNumbers[BB], AllocaNo)];
952
953  // If the BB already has a phi node added for the i'th alloca then we're done!
954  if (PN)
955    return false;
956
957  // Create a PhiNode using the dereferenced type... and add the phi-node to the
958  // BasicBlock.
959  PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB),
960                       Allocas[AllocaNo]->getName() + "." + Twine(Version++),
961                       &BB->front());
962  ++NumPHIInsert;
963  PhiToAllocaMap[PN] = AllocaNo;
964  return true;
965}
966
967/// Update the debug location of a phi. \p ApplyMergedLoc indicates whether to
968/// create a merged location incorporating \p DL, or to set \p DL directly.
969static void updateForIncomingValueLocation(PHINode *PN, DebugLoc DL,
970                                           bool ApplyMergedLoc) {
971  if (ApplyMergedLoc)
972    PN->applyMergedLocation(PN->getDebugLoc(), DL);
973  else
974    PN->setDebugLoc(DL);
975}
976
977/// Recursively traverse the CFG of the function, renaming loads and
978/// stores to the allocas which we are promoting.
979///
980/// IncomingVals indicates what value each Alloca contains on exit from the
981/// predecessor block Pred.
982void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
983                                RenamePassData::ValVector &IncomingVals,
984                                RenamePassData::LocationVector &IncomingLocs,
985                                std::vector<RenamePassData> &Worklist) {
986NextIteration:
987  // If we are inserting any phi nodes into this BB, they will already be in the
988  // block.
989  if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
990    // If we have PHI nodes to update, compute the number of edges from Pred to
991    // BB.
992    if (PhiToAllocaMap.count(APN)) {
993      // We want to be able to distinguish between PHI nodes being inserted by
994      // this invocation of mem2reg from those phi nodes that already existed in
995      // the IR before mem2reg was run.  We determine that APN is being inserted
996      // because it is missing incoming edges.  All other PHI nodes being
997      // inserted by this pass of mem2reg will have the same number of incoming
998      // operands so far.  Remember this count.
999      unsigned NewPHINumOperands = APN->getNumOperands();
1000
1001      unsigned NumEdges = llvm::count(successors(Pred), BB);
1002      assert(NumEdges && "Must be at least one edge from Pred to BB!");
1003
1004      // Add entries for all the phis.
1005      BasicBlock::iterator PNI = BB->begin();
1006      do {
1007        unsigned AllocaNo = PhiToAllocaMap[APN];
1008
1009        // Update the location of the phi node.
1010        updateForIncomingValueLocation(APN, IncomingLocs[AllocaNo],
1011                                       APN->getNumIncomingValues() > 0);
1012
1013        // Add N incoming values to the PHI node.
1014        for (unsigned i = 0; i != NumEdges; ++i)
1015          APN->addIncoming(IncomingVals[AllocaNo], Pred);
1016
1017        // The currently active variable for this block is now the PHI.
1018        IncomingVals[AllocaNo] = APN;
1019        AllocaATInfo[AllocaNo].updateForNewPhi(APN, DIB);
1020        for (DbgVariableIntrinsic *DII : AllocaDbgUsers[AllocaNo])
1021          if (DII->isAddressOfVariable())
1022            ConvertDebugDeclareToDebugValue(DII, APN, DIB);
1023
1024        // Get the next phi node.
1025        ++PNI;
1026        APN = dyn_cast<PHINode>(PNI);
1027        if (!APN)
1028          break;
1029
1030        // Verify that it is missing entries.  If not, it is not being inserted
1031        // by this mem2reg invocation so we want to ignore it.
1032      } while (APN->getNumOperands() == NewPHINumOperands);
1033    }
1034  }
1035
1036  // Don't revisit blocks.
1037  if (!Visited.insert(BB).second)
1038    return;
1039
1040  for (BasicBlock::iterator II = BB->begin(); !II->isTerminator();) {
1041    Instruction *I = &*II++; // get the instruction, increment iterator
1042
1043    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1044      AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
1045      if (!Src)
1046        continue;
1047
1048      DenseMap<AllocaInst *, unsigned>::iterator AI = AllocaLookup.find(Src);
1049      if (AI == AllocaLookup.end())
1050        continue;
1051
1052      Value *V = IncomingVals[AI->second];
1053      convertMetadataToAssumes(LI, V, SQ.DL, AC, &DT);
1054
1055      // Anything using the load now uses the current value.
1056      LI->replaceAllUsesWith(V);
1057      LI->eraseFromParent();
1058    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1059      // Delete this instruction and mark the name as the current holder of the
1060      // value
1061      AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
1062      if (!Dest)
1063        continue;
1064
1065      DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
1066      if (ai == AllocaLookup.end())
1067        continue;
1068
1069      // what value were we writing?
1070      unsigned AllocaNo = ai->second;
1071      IncomingVals[AllocaNo] = SI->getOperand(0);
1072
1073      // Record debuginfo for the store before removing it.
1074      IncomingLocs[AllocaNo] = SI->getDebugLoc();
1075      AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB);
1076      for (DbgVariableIntrinsic *DII : AllocaDbgUsers[ai->second])
1077        if (DII->isAddressOfVariable())
1078          ConvertDebugDeclareToDebugValue(DII, SI, DIB);
1079      SI->eraseFromParent();
1080    }
1081  }
1082
1083  // 'Recurse' to our successors.
1084  succ_iterator I = succ_begin(BB), E = succ_end(BB);
1085  if (I == E)
1086    return;
1087
1088  // Keep track of the successors so we don't visit the same successor twice
1089  SmallPtrSet<BasicBlock *, 8> VisitedSuccs;
1090
1091  // Handle the first successor without using the worklist.
1092  VisitedSuccs.insert(*I);
1093  Pred = BB;
1094  BB = *I;
1095  ++I;
1096
1097  for (; I != E; ++I)
1098    if (VisitedSuccs.insert(*I).second)
1099      Worklist.emplace_back(*I, Pred, IncomingVals, IncomingLocs);
1100
1101  goto NextIteration;
1102}
1103
1104void llvm::PromoteMemToReg(ArrayRef<AllocaInst *> Allocas, DominatorTree &DT,
1105                           AssumptionCache *AC) {
1106  // If there is nothing to do, bail out...
1107  if (Allocas.empty())
1108    return;
1109
1110  PromoteMem2Reg(Allocas, DT, AC).run();
1111}
1112