1//===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This file contains classes used to discover if for a particular value
9// there from sue to definition that crosses a suspend block.
10//
11// Using the information discovered we form a Coroutine Frame structure to
12// contain those values. All uses of those values are replaced with appropriate
13// GEP + load from the coroutine frame. At the point of the definition we spill
14// the value into the coroutine frame.
15//
16// TODO: pack values tightly using liveness info.
17//===----------------------------------------------------------------------===//
18
19#include "CoroInternal.h"
20#include "llvm/ADT/BitVector.h"
21#include "llvm/Analysis/PtrUseVisitor.h"
22#include "llvm/Transforms/Utils/Local.h"
23#include "llvm/Config/llvm-config.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/IRBuilder.h"
27#include "llvm/IR/InstIterator.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/MathExtras.h"
30#include "llvm/Support/circular_raw_ostream.h"
31#include "llvm/Transforms/Utils/BasicBlockUtils.h"
32#include "llvm/Transforms/Utils/PromoteMemToReg.h"
33
34using namespace llvm;
35
36// The "coro-suspend-crossing" flag is very noisy. There is another debug type,
37// "coro-frame", which results in leaner debug spew.
38#define DEBUG_TYPE "coro-suspend-crossing"
39
40enum { SmallVectorThreshold = 32 };
41
42// Provides two way mapping between the blocks and numbers.
43namespace {
44class BlockToIndexMapping {
45  SmallVector<BasicBlock *, SmallVectorThreshold> V;
46
47public:
48  size_t size() const { return V.size(); }
49
50  BlockToIndexMapping(Function &F) {
51    for (BasicBlock &BB : F)
52      V.push_back(&BB);
53    llvm::sort(V);
54  }
55
56  size_t blockToIndex(BasicBlock *BB) const {
57    auto *I = llvm::lower_bound(V, BB);
58    assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
59    return I - V.begin();
60  }
61
62  BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; }
63};
64} // end anonymous namespace
65
66// The SuspendCrossingInfo maintains data that allows to answer a question
67// whether given two BasicBlocks A and B there is a path from A to B that
68// passes through a suspend point.
69//
70// For every basic block 'i' it maintains a BlockData that consists of:
71//   Consumes:  a bit vector which contains a set of indices of blocks that can
72//              reach block 'i'
73//   Kills: a bit vector which contains a set of indices of blocks that can
74//          reach block 'i', but one of the path will cross a suspend point
75//   Suspend: a boolean indicating whether block 'i' contains a suspend point.
76//   End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
77//
78namespace {
79struct SuspendCrossingInfo {
80  BlockToIndexMapping Mapping;
81
82  struct BlockData {
83    BitVector Consumes;
84    BitVector Kills;
85    bool Suspend = false;
86    bool End = false;
87  };
88  SmallVector<BlockData, SmallVectorThreshold> Block;
89
90  iterator_range<succ_iterator> successors(BlockData const &BD) const {
91    BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
92    return llvm::successors(BB);
93  }
94
95  BlockData &getBlockData(BasicBlock *BB) {
96    return Block[Mapping.blockToIndex(BB)];
97  }
98
99  void dump() const;
100  void dump(StringRef Label, BitVector const &BV) const;
101
102  SuspendCrossingInfo(Function &F, coro::Shape &Shape);
103
104  bool hasPathCrossingSuspendPoint(BasicBlock *DefBB, BasicBlock *UseBB) const {
105    size_t const DefIndex = Mapping.blockToIndex(DefBB);
106    size_t const UseIndex = Mapping.blockToIndex(UseBB);
107
108    assert(Block[UseIndex].Consumes[DefIndex] && "use must consume def");
109    bool const Result = Block[UseIndex].Kills[DefIndex];
110    LLVM_DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName()
111                      << " answer is " << Result << "\n");
112    return Result;
113  }
114
115  bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const {
116    auto *I = cast<Instruction>(U);
117
118    // We rewrote PHINodes, so that only the ones with exactly one incoming
119    // value need to be analyzed.
120    if (auto *PN = dyn_cast<PHINode>(I))
121      if (PN->getNumIncomingValues() > 1)
122        return false;
123
124    BasicBlock *UseBB = I->getParent();
125
126    // As a special case, treat uses by an llvm.coro.suspend.retcon
127    // as if they were uses in the suspend's single predecessor: the
128    // uses conceptually occur before the suspend.
129    if (isa<CoroSuspendRetconInst>(I)) {
130      UseBB = UseBB->getSinglePredecessor();
131      assert(UseBB && "should have split coro.suspend into its own block");
132    }
133
134    return hasPathCrossingSuspendPoint(DefBB, UseBB);
135  }
136
137  bool isDefinitionAcrossSuspend(Argument &A, User *U) const {
138    return isDefinitionAcrossSuspend(&A.getParent()->getEntryBlock(), U);
139  }
140
141  bool isDefinitionAcrossSuspend(Instruction &I, User *U) const {
142    auto *DefBB = I.getParent();
143
144    // As a special case, treat values produced by an llvm.coro.suspend.*
145    // as if they were defined in the single successor: the uses
146    // conceptually occur after the suspend.
147    if (isa<AnyCoroSuspendInst>(I)) {
148      DefBB = DefBB->getSingleSuccessor();
149      assert(DefBB && "should have split coro.suspend into its own block");
150    }
151
152    return isDefinitionAcrossSuspend(DefBB, U);
153  }
154};
155} // end anonymous namespace
156
157#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
158LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
159                                                BitVector const &BV) const {
160  dbgs() << Label << ":";
161  for (size_t I = 0, N = BV.size(); I < N; ++I)
162    if (BV[I])
163      dbgs() << " " << Mapping.indexToBlock(I)->getName();
164  dbgs() << "\n";
165}
166
167LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
168  for (size_t I = 0, N = Block.size(); I < N; ++I) {
169    BasicBlock *const B = Mapping.indexToBlock(I);
170    dbgs() << B->getName() << ":\n";
171    dump("   Consumes", Block[I].Consumes);
172    dump("      Kills", Block[I].Kills);
173  }
174  dbgs() << "\n";
175}
176#endif
177
178SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
179    : Mapping(F) {
180  const size_t N = Mapping.size();
181  Block.resize(N);
182
183  // Initialize every block so that it consumes itself
184  for (size_t I = 0; I < N; ++I) {
185    auto &B = Block[I];
186    B.Consumes.resize(N);
187    B.Kills.resize(N);
188    B.Consumes.set(I);
189  }
190
191  // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
192  // the code beyond coro.end is reachable during initial invocation of the
193  // coroutine.
194  for (auto *CE : Shape.CoroEnds)
195    getBlockData(CE->getParent()).End = true;
196
197  // Mark all suspend blocks and indicate that they kill everything they
198  // consume. Note, that crossing coro.save also requires a spill, as any code
199  // between coro.save and coro.suspend may resume the coroutine and all of the
200  // state needs to be saved by that time.
201  auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) {
202    BasicBlock *SuspendBlock = BarrierInst->getParent();
203    auto &B = getBlockData(SuspendBlock);
204    B.Suspend = true;
205    B.Kills |= B.Consumes;
206  };
207  for (auto *CSI : Shape.CoroSuspends) {
208    markSuspendBlock(CSI);
209    if (auto *Save = CSI->getCoroSave())
210      markSuspendBlock(Save);
211  }
212
213  // Iterate propagating consumes and kills until they stop changing.
214  int Iteration = 0;
215  (void)Iteration;
216
217  bool Changed;
218  do {
219    LLVM_DEBUG(dbgs() << "iteration " << ++Iteration);
220    LLVM_DEBUG(dbgs() << "==============\n");
221
222    Changed = false;
223    for (size_t I = 0; I < N; ++I) {
224      auto &B = Block[I];
225      for (BasicBlock *SI : successors(B)) {
226
227        auto SuccNo = Mapping.blockToIndex(SI);
228
229        // Saved Consumes and Kills bitsets so that it is easy to see
230        // if anything changed after propagation.
231        auto &S = Block[SuccNo];
232        auto SavedConsumes = S.Consumes;
233        auto SavedKills = S.Kills;
234
235        // Propagate Kills and Consumes from block B into its successor S.
236        S.Consumes |= B.Consumes;
237        S.Kills |= B.Kills;
238
239        // If block B is a suspend block, it should propagate kills into the
240        // its successor for every block B consumes.
241        if (B.Suspend) {
242          S.Kills |= B.Consumes;
243        }
244        if (S.Suspend) {
245          // If block S is a suspend block, it should kill all of the blocks it
246          // consumes.
247          S.Kills |= S.Consumes;
248        } else if (S.End) {
249          // If block S is an end block, it should not propagate kills as the
250          // blocks following coro.end() are reached during initial invocation
251          // of the coroutine while all the data are still available on the
252          // stack or in the registers.
253          S.Kills.reset();
254        } else {
255          // This is reached when S block it not Suspend nor coro.end and it
256          // need to make sure that it is not in the kill set.
257          S.Kills.reset(SuccNo);
258        }
259
260        // See if anything changed.
261        Changed |= (S.Kills != SavedKills) || (S.Consumes != SavedConsumes);
262
263        if (S.Kills != SavedKills) {
264          LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI->getName()
265                            << "\n");
266          LLVM_DEBUG(dump("S.Kills", S.Kills));
267          LLVM_DEBUG(dump("SavedKills", SavedKills));
268        }
269        if (S.Consumes != SavedConsumes) {
270          LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI << "\n");
271          LLVM_DEBUG(dump("S.Consume", S.Consumes));
272          LLVM_DEBUG(dump("SavedCons", SavedConsumes));
273        }
274      }
275    }
276  } while (Changed);
277  LLVM_DEBUG(dump());
278}
279
280#undef DEBUG_TYPE // "coro-suspend-crossing"
281#define DEBUG_TYPE "coro-frame"
282
283// We build up the list of spills for every case where a use is separated
284// from the definition by a suspend point.
285
286static const unsigned InvalidFieldIndex = ~0U;
287
288namespace {
289class Spill {
290  Value *Def = nullptr;
291  Instruction *User = nullptr;
292  unsigned FieldNo = InvalidFieldIndex;
293
294public:
295  Spill(Value *Def, llvm::User *U) : Def(Def), User(cast<Instruction>(U)) {}
296
297  Value *def() const { return Def; }
298  Instruction *user() const { return User; }
299  BasicBlock *userBlock() const { return User->getParent(); }
300
301  // Note that field index is stored in the first SpillEntry for a particular
302  // definition. Subsequent mentions of a defintion do not have fieldNo
303  // assigned. This works out fine as the users of Spills capture the info about
304  // the definition the first time they encounter it. Consider refactoring
305  // SpillInfo into two arrays to normalize the spill representation.
306  unsigned fieldIndex() const {
307    assert(FieldNo != InvalidFieldIndex && "Accessing unassigned field");
308    return FieldNo;
309  }
310  void setFieldIndex(unsigned FieldNumber) {
311    assert(FieldNo == InvalidFieldIndex && "Reassigning field number");
312    FieldNo = FieldNumber;
313  }
314};
315} // namespace
316
317// Note that there may be more than one record with the same value of Def in
318// the SpillInfo vector.
319using SpillInfo = SmallVector<Spill, 8>;
320
321#ifndef NDEBUG
322static void dump(StringRef Title, SpillInfo const &Spills) {
323  dbgs() << "------------- " << Title << "--------------\n";
324  Value *CurrentValue = nullptr;
325  for (auto const &E : Spills) {
326    if (CurrentValue != E.def()) {
327      CurrentValue = E.def();
328      CurrentValue->dump();
329    }
330    dbgs() << "   user: ";
331    E.user()->dump();
332  }
333}
334#endif
335
336namespace {
337// We cannot rely solely on natural alignment of a type when building a
338// coroutine frame and if the alignment specified on the Alloca instruction
339// differs from the natural alignment of the alloca type we will need to insert
340// padding.
341struct PaddingCalculator {
342  const DataLayout &DL;
343  LLVMContext &Context;
344  unsigned StructSize = 0;
345
346  PaddingCalculator(LLVMContext &Context, DataLayout const &DL)
347      : DL(DL), Context(Context) {}
348
349  // Replicate the logic from IR/DataLayout.cpp to match field offset
350  // computation for LLVM structs.
351  void addType(Type *Ty) {
352    unsigned TyAlign = DL.getABITypeAlignment(Ty);
353    if ((StructSize & (TyAlign - 1)) != 0)
354      StructSize = alignTo(StructSize, TyAlign);
355
356    StructSize += DL.getTypeAllocSize(Ty); // Consume space for this data item.
357  }
358
359  void addTypes(SmallVectorImpl<Type *> const &Types) {
360    for (auto *Ty : Types)
361      addType(Ty);
362  }
363
364  unsigned computePadding(Type *Ty, unsigned ForcedAlignment) {
365    unsigned TyAlign = DL.getABITypeAlignment(Ty);
366    auto Natural = alignTo(StructSize, TyAlign);
367    auto Forced = alignTo(StructSize, ForcedAlignment);
368
369    // Return how many bytes of padding we need to insert.
370    if (Natural != Forced)
371      return std::max(Natural, Forced) - StructSize;
372
373    // Rely on natural alignment.
374    return 0;
375  }
376
377  // If padding required, return the padding field type to insert.
378  ArrayType *getPaddingType(Type *Ty, unsigned ForcedAlignment) {
379    if (auto Padding = computePadding(Ty, ForcedAlignment))
380      return ArrayType::get(Type::getInt8Ty(Context), Padding);
381
382    return nullptr;
383  }
384};
385} // namespace
386
387// Build a struct that will keep state for an active coroutine.
388//   struct f.frame {
389//     ResumeFnTy ResumeFnAddr;
390//     ResumeFnTy DestroyFnAddr;
391//     int ResumeIndex;
392//     ... promise (if present) ...
393//     ... spills ...
394//   };
395static StructType *buildFrameType(Function &F, coro::Shape &Shape,
396                                  SpillInfo &Spills) {
397  LLVMContext &C = F.getContext();
398  const DataLayout &DL = F.getParent()->getDataLayout();
399  PaddingCalculator Padder(C, DL);
400  SmallString<32> Name(F.getName());
401  Name.append(".Frame");
402  StructType *FrameTy = StructType::create(C, Name);
403  SmallVector<Type *, 8> Types;
404
405  AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
406
407  if (Shape.ABI == coro::ABI::Switch) {
408    auto *FramePtrTy = FrameTy->getPointerTo();
409    auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
410                                   /*IsVarArg=*/false);
411    auto *FnPtrTy = FnTy->getPointerTo();
412
413    // Figure out how wide should be an integer type storing the suspend index.
414    unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
415    Type *PromiseType = PromiseAlloca
416                            ? PromiseAlloca->getType()->getElementType()
417                            : Type::getInt1Ty(C);
418    Type *IndexType = Type::getIntNTy(C, IndexBits);
419    Types.push_back(FnPtrTy);
420    Types.push_back(FnPtrTy);
421    Types.push_back(PromiseType);
422    Types.push_back(IndexType);
423  } else {
424    assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
425  }
426
427  Value *CurrentDef = nullptr;
428
429  Padder.addTypes(Types);
430
431  // Create an entry for every spilled value.
432  for (auto &S : Spills) {
433    if (CurrentDef == S.def())
434      continue;
435
436    CurrentDef = S.def();
437    // PromiseAlloca was already added to Types array earlier.
438    if (CurrentDef == PromiseAlloca)
439      continue;
440
441    uint64_t Count = 1;
442    Type *Ty = nullptr;
443    if (auto *AI = dyn_cast<AllocaInst>(CurrentDef)) {
444      Ty = AI->getAllocatedType();
445      if (unsigned AllocaAlignment = AI->getAlignment()) {
446        // If alignment is specified in alloca, see if we need to insert extra
447        // padding.
448        if (auto PaddingTy = Padder.getPaddingType(Ty, AllocaAlignment)) {
449          Types.push_back(PaddingTy);
450          Padder.addType(PaddingTy);
451        }
452      }
453      if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
454        Count = CI->getValue().getZExtValue();
455      else
456        report_fatal_error("Coroutines cannot handle non static allocas yet");
457    } else {
458      Ty = CurrentDef->getType();
459    }
460    S.setFieldIndex(Types.size());
461    if (Count == 1)
462      Types.push_back(Ty);
463    else
464      Types.push_back(ArrayType::get(Ty, Count));
465    Padder.addType(Ty);
466  }
467  FrameTy->setBody(Types);
468
469  switch (Shape.ABI) {
470  case coro::ABI::Switch:
471    break;
472
473  // Remember whether the frame is inline in the storage.
474  case coro::ABI::Retcon:
475  case coro::ABI::RetconOnce: {
476    auto &Layout = F.getParent()->getDataLayout();
477    auto Id = Shape.getRetconCoroId();
478    Shape.RetconLowering.IsFrameInlineInStorage
479      = (Layout.getTypeAllocSize(FrameTy) <= Id->getStorageSize() &&
480         Layout.getABITypeAlignment(FrameTy) <= Id->getStorageAlignment());
481    break;
482  }
483  }
484
485  return FrameTy;
486}
487
488// We use a pointer use visitor to discover if there are any writes into an
489// alloca that dominates CoroBegin. If that is the case, insertSpills will copy
490// the value from the alloca into the coroutine frame spill slot corresponding
491// to that alloca.
492namespace {
493struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
494  using Base = PtrUseVisitor<AllocaUseVisitor>;
495  AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
496                   const CoroBeginInst &CB)
497      : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {}
498
499  // We are only interested in uses that dominate coro.begin.
500  void visit(Instruction &I) {
501    if (DT.dominates(&I, &CoroBegin))
502      Base::visit(I);
503  }
504  // We need to provide this overload as PtrUseVisitor uses a pointer based
505  // visiting function.
506  void visit(Instruction *I) { return visit(*I); }
507
508  void visitLoadInst(LoadInst &) {} // Good. Nothing to do.
509
510  // If the use is an operand, the pointer escaped and anything can write into
511  // that memory. If the use is the pointer, we are definitely writing into the
512  // alloca and therefore we need to copy.
513  void visitStoreInst(StoreInst &SI) { PI.setAborted(&SI); }
514
515  // Any other instruction that is not filtered out by PtrUseVisitor, will
516  // result in the copy.
517  void visitInstruction(Instruction &I) { PI.setAborted(&I); }
518
519private:
520  const DominatorTree &DT;
521  const CoroBeginInst &CoroBegin;
522};
523} // namespace
524static bool mightWriteIntoAllocaPtr(AllocaInst &A, const DominatorTree &DT,
525                                    const CoroBeginInst &CB) {
526  const DataLayout &DL = A.getModule()->getDataLayout();
527  AllocaUseVisitor Visitor(DL, DT, CB);
528  auto PtrI = Visitor.visitPtr(A);
529  if (PtrI.isEscaped() || PtrI.isAborted()) {
530    auto *PointerEscapingInstr = PtrI.getEscapingInst()
531                                     ? PtrI.getEscapingInst()
532                                     : PtrI.getAbortingInst();
533    if (PointerEscapingInstr) {
534      LLVM_DEBUG(
535          dbgs() << "AllocaInst copy was triggered by instruction: "
536                 << *PointerEscapingInstr << "\n");
537    }
538    return true;
539  }
540  return false;
541}
542
543// We need to make room to insert a spill after initial PHIs, but before
544// catchswitch instruction. Placing it before violates the requirement that
545// catchswitch, like all other EHPads must be the first nonPHI in a block.
546//
547// Split away catchswitch into a separate block and insert in its place:
548//
549//   cleanuppad <InsertPt> cleanupret.
550//
551// cleanupret instruction will act as an insert point for the spill.
552static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
553  BasicBlock *CurrentBlock = CatchSwitch->getParent();
554  BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch);
555  CurrentBlock->getTerminator()->eraseFromParent();
556
557  auto *CleanupPad =
558      CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock);
559  auto *CleanupRet =
560      CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock);
561  return CleanupRet;
562}
563
564// Replace all alloca and SSA values that are accessed across suspend points
565// with GetElementPointer from coroutine frame + loads and stores. Create an
566// AllocaSpillBB that will become the new entry block for the resume parts of
567// the coroutine:
568//
569//    %hdl = coro.begin(...)
570//    whatever
571//
572// becomes:
573//
574//    %hdl = coro.begin(...)
575//    %FramePtr = bitcast i8* hdl to %f.frame*
576//    br label %AllocaSpillBB
577//
578//  AllocaSpillBB:
579//    ; geps corresponding to allocas that were moved to coroutine frame
580//    br label PostSpill
581//
582//  PostSpill:
583//    whatever
584//
585//
586static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
587  auto *CB = Shape.CoroBegin;
588  LLVMContext &C = CB->getContext();
589  IRBuilder<> Builder(CB->getNextNode());
590  StructType *FrameTy = Shape.FrameTy;
591  PointerType *FramePtrTy = FrameTy->getPointerTo();
592  auto *FramePtr =
593      cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
594  DominatorTree DT(*CB->getFunction());
595
596  Value *CurrentValue = nullptr;
597  BasicBlock *CurrentBlock = nullptr;
598  Value *CurrentReload = nullptr;
599
600  // Proper field number will be read from field definition.
601  unsigned Index = InvalidFieldIndex;
602
603  // We need to keep track of any allocas that need "spilling"
604  // since they will live in the coroutine frame now, all access to them
605  // need to be changed, not just the access across suspend points
606  // we remember allocas and their indices to be handled once we processed
607  // all the spills.
608  SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas;
609  // Promise alloca (if present) has a fixed field number.
610  if (auto *PromiseAlloca = Shape.getPromiseAlloca()) {
611    assert(Shape.ABI == coro::ABI::Switch);
612    Allocas.emplace_back(PromiseAlloca, coro::Shape::SwitchFieldIndex::Promise);
613  }
614
615  // Create a GEP with the given index into the coroutine frame for the original
616  // value Orig. Appends an extra 0 index for array-allocas, preserving the
617  // original type.
618  auto GetFramePointer = [&](uint32_t Index, Value *Orig) -> Value * {
619    SmallVector<Value *, 3> Indices = {
620        ConstantInt::get(Type::getInt32Ty(C), 0),
621        ConstantInt::get(Type::getInt32Ty(C), Index),
622    };
623
624    if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
625      if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
626        auto Count = CI->getValue().getZExtValue();
627        if (Count > 1) {
628          Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
629        }
630      } else {
631        report_fatal_error("Coroutines cannot handle non static allocas yet");
632      }
633    }
634
635    return Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices);
636  };
637
638  // Create a load instruction to reload the spilled value from the coroutine
639  // frame.
640  auto CreateReload = [&](Instruction *InsertBefore) {
641    assert(Index != InvalidFieldIndex && "accessing unassigned field number");
642    Builder.SetInsertPoint(InsertBefore);
643
644    auto *G = GetFramePointer(Index, CurrentValue);
645    G->setName(CurrentValue->getName() + Twine(".reload.addr"));
646
647    return isa<AllocaInst>(CurrentValue)
648               ? G
649               : Builder.CreateLoad(FrameTy->getElementType(Index), G,
650                                    CurrentValue->getName() + Twine(".reload"));
651  };
652
653  for (auto const &E : Spills) {
654    // If we have not seen the value, generate a spill.
655    if (CurrentValue != E.def()) {
656      CurrentValue = E.def();
657      CurrentBlock = nullptr;
658      CurrentReload = nullptr;
659
660      Index = E.fieldIndex();
661
662      if (auto *AI = dyn_cast<AllocaInst>(CurrentValue)) {
663        // Spilled AllocaInst will be replaced with GEP from the coroutine frame
664        // there is no spill required.
665        Allocas.emplace_back(AI, Index);
666        if (!AI->isStaticAlloca())
667          report_fatal_error("Coroutines cannot handle non static allocas yet");
668      } else {
669        // Otherwise, create a store instruction storing the value into the
670        // coroutine frame.
671
672        Instruction *InsertPt = nullptr;
673        if (auto Arg = dyn_cast<Argument>(CurrentValue)) {
674          // For arguments, we will place the store instruction right after
675          // the coroutine frame pointer instruction, i.e. bitcast of
676          // coro.begin from i8* to %f.frame*.
677          InsertPt = FramePtr->getNextNode();
678
679          // If we're spilling an Argument, make sure we clear 'nocapture'
680          // from the coroutine function.
681          Arg->getParent()->removeParamAttr(Arg->getArgNo(),
682                                            Attribute::NoCapture);
683
684        } else if (auto *II = dyn_cast<InvokeInst>(CurrentValue)) {
685          // If we are spilling the result of the invoke instruction, split the
686          // normal edge and insert the spill in the new block.
687          auto NewBB = SplitEdge(II->getParent(), II->getNormalDest());
688          InsertPt = NewBB->getTerminator();
689        } else if (isa<PHINode>(CurrentValue)) {
690          // Skip the PHINodes and EH pads instructions.
691          BasicBlock *DefBlock = cast<Instruction>(E.def())->getParent();
692          if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
693            InsertPt = splitBeforeCatchSwitch(CSI);
694          else
695            InsertPt = &*DefBlock->getFirstInsertionPt();
696        } else if (auto CSI = dyn_cast<AnyCoroSuspendInst>(CurrentValue)) {
697          // Don't spill immediately after a suspend; splitting assumes
698          // that the suspend will be followed by a branch.
699          InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI();
700        } else {
701          auto *I = cast<Instruction>(E.def());
702          assert(!I->isTerminator() && "unexpected terminator");
703          // For all other values, the spill is placed immediately after
704          // the definition.
705          if (DT.dominates(CB, I)) {
706            InsertPt = I->getNextNode();
707          } else {
708            // Unless, it is not dominated by CoroBegin, then it will be
709            // inserted immediately after CoroFrame is computed.
710            InsertPt = FramePtr->getNextNode();
711          }
712        }
713
714        Builder.SetInsertPoint(InsertPt);
715        auto *G = Builder.CreateConstInBoundsGEP2_32(
716            FrameTy, FramePtr, 0, Index,
717            CurrentValue->getName() + Twine(".spill.addr"));
718        Builder.CreateStore(CurrentValue, G);
719      }
720    }
721
722    // If we have not seen the use block, generate a reload in it.
723    if (CurrentBlock != E.userBlock()) {
724      CurrentBlock = E.userBlock();
725      CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt());
726    }
727
728    // If we have a single edge PHINode, remove it and replace it with a reload
729    // from the coroutine frame. (We already took care of multi edge PHINodes
730    // by rewriting them in the rewritePHIs function).
731    if (auto *PN = dyn_cast<PHINode>(E.user())) {
732      assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
733                                                "values in the PHINode");
734      PN->replaceAllUsesWith(CurrentReload);
735      PN->eraseFromParent();
736      continue;
737    }
738
739    // Replace all uses of CurrentValue in the current instruction with reload.
740    E.user()->replaceUsesOfWith(CurrentValue, CurrentReload);
741  }
742
743  BasicBlock *FramePtrBB = FramePtr->getParent();
744
745  auto SpillBlock =
746    FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB");
747  SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
748  Shape.AllocaSpillBlock = SpillBlock;
749  // If we found any allocas, replace all of their remaining uses with Geps.
750  // Note: we cannot do it indiscriminately as some of the uses may not be
751  // dominated by CoroBegin.
752  bool MightNeedToCopy = false;
753  Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
754  SmallVector<Instruction *, 4> UsersToUpdate;
755  for (auto &P : Allocas) {
756    AllocaInst *const A = P.first;
757    UsersToUpdate.clear();
758    for (User *U : A->users()) {
759      auto *I = cast<Instruction>(U);
760      if (DT.dominates(CB, I))
761        UsersToUpdate.push_back(I);
762      else
763        MightNeedToCopy = true;
764    }
765    if (!UsersToUpdate.empty()) {
766      auto *G = GetFramePointer(P.second, A);
767      G->takeName(A);
768      for (Instruction *I : UsersToUpdate)
769        I->replaceUsesOfWith(A, G);
770    }
771  }
772  // If we discovered such uses not dominated by CoroBegin, see if any of them
773  // preceed coro begin and have instructions that can modify the
774  // value of the alloca and therefore would require a copying the value into
775  // the spill slot in the coroutine frame.
776  if (MightNeedToCopy) {
777    Builder.SetInsertPoint(FramePtr->getNextNode());
778
779    for (auto &P : Allocas) {
780      AllocaInst *const A = P.first;
781      if (mightWriteIntoAllocaPtr(*A, DT, *CB)) {
782        if (A->isArrayAllocation())
783          report_fatal_error(
784              "Coroutines cannot handle copying of array allocas yet");
785
786        auto *G = GetFramePointer(P.second, A);
787        auto *Value = Builder.CreateLoad(A);
788        Builder.CreateStore(Value, G);
789      }
790    }
791  }
792  return FramePtr;
793}
794
795// Sets the unwind edge of an instruction to a particular successor.
796static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {
797  if (auto *II = dyn_cast<InvokeInst>(TI))
798    II->setUnwindDest(Succ);
799  else if (auto *CS = dyn_cast<CatchSwitchInst>(TI))
800    CS->setUnwindDest(Succ);
801  else if (auto *CR = dyn_cast<CleanupReturnInst>(TI))
802    CR->setUnwindDest(Succ);
803  else
804    llvm_unreachable("unexpected terminator instruction");
805}
806
807// Replaces all uses of OldPred with the NewPred block in all PHINodes in a
808// block.
809static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred,
810                           BasicBlock *NewPred,
811                           PHINode *LandingPadReplacement) {
812  unsigned BBIdx = 0;
813  for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
814    PHINode *PN = cast<PHINode>(I);
815
816    // We manually update the LandingPadReplacement PHINode and it is the last
817    // PHI Node. So, if we find it, we are done.
818    if (LandingPadReplacement == PN)
819      break;
820
821    // Reuse the previous value of BBIdx if it lines up.  In cases where we
822    // have multiple phi nodes with *lots* of predecessors, this is a speed
823    // win because we don't have to scan the PHI looking for TIBB.  This
824    // happens because the BB list of PHI nodes are usually in the same
825    // order.
826    if (PN->getIncomingBlock(BBIdx) != OldPred)
827      BBIdx = PN->getBasicBlockIndex(OldPred);
828
829    assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!");
830    PN->setIncomingBlock(BBIdx, NewPred);
831  }
832}
833
834// Uses SplitEdge unless the successor block is an EHPad, in which case do EH
835// specific handling.
836static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ,
837                                    LandingPadInst *OriginalPad,
838                                    PHINode *LandingPadReplacement) {
839  auto *PadInst = Succ->getFirstNonPHI();
840  if (!LandingPadReplacement && !PadInst->isEHPad())
841    return SplitEdge(BB, Succ);
842
843  auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ);
844  setUnwindEdgeTo(BB->getTerminator(), NewBB);
845  updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement);
846
847  if (LandingPadReplacement) {
848    auto *NewLP = OriginalPad->clone();
849    auto *Terminator = BranchInst::Create(Succ, NewBB);
850    NewLP->insertBefore(Terminator);
851    LandingPadReplacement->addIncoming(NewLP, NewBB);
852    return NewBB;
853  }
854  Value *ParentPad = nullptr;
855  if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst))
856    ParentPad = FuncletPad->getParentPad();
857  else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst))
858    ParentPad = CatchSwitch->getParentPad();
859  else
860    llvm_unreachable("handling for other EHPads not implemented yet");
861
862  auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB);
863  CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB);
864  return NewBB;
865}
866
867static void rewritePHIs(BasicBlock &BB) {
868  // For every incoming edge we will create a block holding all
869  // incoming values in a single PHI nodes.
870  //
871  // loop:
872  //    %n.val = phi i32[%n, %entry], [%inc, %loop]
873  //
874  // It will create:
875  //
876  // loop.from.entry:
877  //    %n.loop.pre = phi i32 [%n, %entry]
878  //    br %label loop
879  // loop.from.loop:
880  //    %inc.loop.pre = phi i32 [%inc, %loop]
881  //    br %label loop
882  //
883  // After this rewrite, further analysis will ignore any phi nodes with more
884  // than one incoming edge.
885
886  // TODO: Simplify PHINodes in the basic block to remove duplicate
887  // predecessors.
888
889  LandingPadInst *LandingPad = nullptr;
890  PHINode *ReplPHI = nullptr;
891  if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) {
892    // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
893    // We replace the original landing pad with a PHINode that will collect the
894    // results from all of them.
895    ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad);
896    ReplPHI->takeName(LandingPad);
897    LandingPad->replaceAllUsesWith(ReplPHI);
898    // We will erase the original landing pad at the end of this function after
899    // ehAwareSplitEdge cloned it in the transition blocks.
900  }
901
902  SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB));
903  for (BasicBlock *Pred : Preds) {
904    auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI);
905    IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
906    auto *PN = cast<PHINode>(&BB.front());
907    do {
908      int Index = PN->getBasicBlockIndex(IncomingBB);
909      Value *V = PN->getIncomingValue(Index);
910      PHINode *InputV = PHINode::Create(
911          V->getType(), 1, V->getName() + Twine(".") + BB.getName(),
912          &IncomingBB->front());
913      InputV->addIncoming(V, Pred);
914      PN->setIncomingValue(Index, InputV);
915      PN = dyn_cast<PHINode>(PN->getNextNode());
916    } while (PN != ReplPHI); // ReplPHI is either null or the PHI that replaced
917                             // the landing pad.
918  }
919
920  if (LandingPad) {
921    // Calls to ehAwareSplitEdge function cloned the original lading pad.
922    // No longer need it.
923    LandingPad->eraseFromParent();
924  }
925}
926
927static void rewritePHIs(Function &F) {
928  SmallVector<BasicBlock *, 8> WorkList;
929
930  for (BasicBlock &BB : F)
931    if (auto *PN = dyn_cast<PHINode>(&BB.front()))
932      if (PN->getNumIncomingValues() > 1)
933        WorkList.push_back(&BB);
934
935  for (BasicBlock *BB : WorkList)
936    rewritePHIs(*BB);
937}
938
939// Check for instructions that we can recreate on resume as opposed to spill
940// the result into a coroutine frame.
941static bool materializable(Instruction &V) {
942  return isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
943         isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V);
944}
945
946// Check for structural coroutine intrinsics that should not be spilled into
947// the coroutine frame.
948static bool isCoroutineStructureIntrinsic(Instruction &I) {
949  return isa<CoroIdInst>(&I) || isa<CoroSaveInst>(&I) ||
950         isa<CoroSuspendInst>(&I);
951}
952
953// For every use of the value that is across suspend point, recreate that value
954// after a suspend point.
955static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
956                                              SpillInfo const &Spills) {
957  BasicBlock *CurrentBlock = nullptr;
958  Instruction *CurrentMaterialization = nullptr;
959  Instruction *CurrentDef = nullptr;
960
961  for (auto const &E : Spills) {
962    // If it is a new definition, update CurrentXXX variables.
963    if (CurrentDef != E.def()) {
964      CurrentDef = cast<Instruction>(E.def());
965      CurrentBlock = nullptr;
966      CurrentMaterialization = nullptr;
967    }
968
969    // If we have not seen this block, materialize the value.
970    if (CurrentBlock != E.userBlock()) {
971      CurrentBlock = E.userBlock();
972      CurrentMaterialization = cast<Instruction>(CurrentDef)->clone();
973      CurrentMaterialization->setName(CurrentDef->getName());
974      CurrentMaterialization->insertBefore(
975          &*CurrentBlock->getFirstInsertionPt());
976    }
977
978    if (auto *PN = dyn_cast<PHINode>(E.user())) {
979      assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
980                                                "values in the PHINode");
981      PN->replaceAllUsesWith(CurrentMaterialization);
982      PN->eraseFromParent();
983      continue;
984    }
985
986    // Replace all uses of CurrentDef in the current instruction with the
987    // CurrentMaterialization for the block.
988    E.user()->replaceUsesOfWith(CurrentDef, CurrentMaterialization);
989  }
990}
991
992// Splits the block at a particular instruction unless it is the first
993// instruction in the block with a single predecessor.
994static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
995  auto *BB = I->getParent();
996  if (&BB->front() == I) {
997    if (BB->getSinglePredecessor()) {
998      BB->setName(Name);
999      return BB;
1000    }
1001  }
1002  return BB->splitBasicBlock(I, Name);
1003}
1004
1005// Split above and below a particular instruction so that it
1006// will be all alone by itself in a block.
1007static void splitAround(Instruction *I, const Twine &Name) {
1008  splitBlockIfNotFirst(I, Name);
1009  splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
1010}
1011
1012static bool isSuspendBlock(BasicBlock *BB) {
1013  return isa<AnyCoroSuspendInst>(BB->front());
1014}
1015
1016typedef SmallPtrSet<BasicBlock*, 8> VisitedBlocksSet;
1017
1018/// Does control flow starting at the given block ever reach a suspend
1019/// instruction before reaching a block in VisitedOrFreeBBs?
1020static bool isSuspendReachableFrom(BasicBlock *From,
1021                                   VisitedBlocksSet &VisitedOrFreeBBs) {
1022  // Eagerly try to add this block to the visited set.  If it's already
1023  // there, stop recursing; this path doesn't reach a suspend before
1024  // either looping or reaching a freeing block.
1025  if (!VisitedOrFreeBBs.insert(From).second)
1026    return false;
1027
1028  // We assume that we'll already have split suspends into their own blocks.
1029  if (isSuspendBlock(From))
1030    return true;
1031
1032  // Recurse on the successors.
1033  for (auto Succ : successors(From)) {
1034    if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs))
1035      return true;
1036  }
1037
1038  return false;
1039}
1040
1041/// Is the given alloca "local", i.e. bounded in lifetime to not cross a
1042/// suspend point?
1043static bool isLocalAlloca(CoroAllocaAllocInst *AI) {
1044  // Seed the visited set with all the basic blocks containing a free
1045  // so that we won't pass them up.
1046  VisitedBlocksSet VisitedOrFreeBBs;
1047  for (auto User : AI->users()) {
1048    if (auto FI = dyn_cast<CoroAllocaFreeInst>(User))
1049      VisitedOrFreeBBs.insert(FI->getParent());
1050  }
1051
1052  return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs);
1053}
1054
1055/// After we split the coroutine, will the given basic block be along
1056/// an obvious exit path for the resumption function?
1057static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
1058                                              unsigned depth = 3) {
1059  // If we've bottomed out our depth count, stop searching and assume
1060  // that the path might loop back.
1061  if (depth == 0) return false;
1062
1063  // If this is a suspend block, we're about to exit the resumption function.
1064  if (isSuspendBlock(BB)) return true;
1065
1066  // Recurse into the successors.
1067  for (auto Succ : successors(BB)) {
1068    if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1))
1069      return false;
1070  }
1071
1072  // If none of the successors leads back in a loop, we're on an exit/abort.
1073  return true;
1074}
1075
1076static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
1077  // Look for a free that isn't sufficiently obviously followed by
1078  // either a suspend or a termination, i.e. something that will leave
1079  // the coro resumption frame.
1080  for (auto U : AI->users()) {
1081    auto FI = dyn_cast<CoroAllocaFreeInst>(U);
1082    if (!FI) continue;
1083
1084    if (!willLeaveFunctionImmediatelyAfter(FI->getParent()))
1085      return true;
1086  }
1087
1088  // If we never found one, we don't need a stack save.
1089  return false;
1090}
1091
1092/// Turn each of the given local allocas into a normal (dynamic) alloca
1093/// instruction.
1094static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
1095                              SmallVectorImpl<Instruction*> &DeadInsts) {
1096  for (auto AI : LocalAllocas) {
1097    auto M = AI->getModule();
1098    IRBuilder<> Builder(AI);
1099
1100    // Save the stack depth.  Try to avoid doing this if the stackrestore
1101    // is going to immediately precede a return or something.
1102    Value *StackSave = nullptr;
1103    if (localAllocaNeedsStackSave(AI))
1104      StackSave = Builder.CreateCall(
1105                            Intrinsic::getDeclaration(M, Intrinsic::stacksave));
1106
1107    // Allocate memory.
1108    auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
1109    Alloca->setAlignment(MaybeAlign(AI->getAlignment()));
1110
1111    for (auto U : AI->users()) {
1112      // Replace gets with the allocation.
1113      if (isa<CoroAllocaGetInst>(U)) {
1114        U->replaceAllUsesWith(Alloca);
1115
1116      // Replace frees with stackrestores.  This is safe because
1117      // alloca.alloc is required to obey a stack discipline, although we
1118      // don't enforce that structurally.
1119      } else {
1120        auto FI = cast<CoroAllocaFreeInst>(U);
1121        if (StackSave) {
1122          Builder.SetInsertPoint(FI);
1123          Builder.CreateCall(
1124                    Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
1125                             StackSave);
1126        }
1127      }
1128      DeadInsts.push_back(cast<Instruction>(U));
1129    }
1130
1131    DeadInsts.push_back(AI);
1132  }
1133}
1134
1135/// Turn the given coro.alloca.alloc call into a dynamic allocation.
1136/// This happens during the all-instructions iteration, so it must not
1137/// delete the call.
1138static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI,
1139                                        coro::Shape &Shape,
1140                                   SmallVectorImpl<Instruction*> &DeadInsts) {
1141  IRBuilder<> Builder(AI);
1142  auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr);
1143
1144  for (User *U : AI->users()) {
1145    if (isa<CoroAllocaGetInst>(U)) {
1146      U->replaceAllUsesWith(Alloc);
1147    } else {
1148      auto FI = cast<CoroAllocaFreeInst>(U);
1149      Builder.SetInsertPoint(FI);
1150      Shape.emitDealloc(Builder, Alloc, nullptr);
1151    }
1152    DeadInsts.push_back(cast<Instruction>(U));
1153  }
1154
1155  // Push this on last so that it gets deleted after all the others.
1156  DeadInsts.push_back(AI);
1157
1158  // Return the new allocation value so that we can check for needed spills.
1159  return cast<Instruction>(Alloc);
1160}
1161
1162/// Get the current swifterror value.
1163static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
1164                                     coro::Shape &Shape) {
1165  // Make a fake function pointer as a sort of intrinsic.
1166  auto FnTy = FunctionType::get(ValueTy, {}, false);
1167  auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
1168
1169  auto Call = Builder.CreateCall(Fn, {});
1170  Shape.SwiftErrorOps.push_back(Call);
1171
1172  return Call;
1173}
1174
1175/// Set the given value as the current swifterror value.
1176///
1177/// Returns a slot that can be used as a swifterror slot.
1178static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
1179                                     coro::Shape &Shape) {
1180  // Make a fake function pointer as a sort of intrinsic.
1181  auto FnTy = FunctionType::get(V->getType()->getPointerTo(),
1182                                {V->getType()}, false);
1183  auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
1184
1185  auto Call = Builder.CreateCall(Fn, { V });
1186  Shape.SwiftErrorOps.push_back(Call);
1187
1188  return Call;
1189}
1190
1191/// Set the swifterror value from the given alloca before a call,
1192/// then put in back in the alloca afterwards.
1193///
1194/// Returns an address that will stand in for the swifterror slot
1195/// until splitting.
1196static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
1197                                                 AllocaInst *Alloca,
1198                                                 coro::Shape &Shape) {
1199  auto ValueTy = Alloca->getAllocatedType();
1200  IRBuilder<> Builder(Call);
1201
1202  // Load the current value from the alloca and set it as the
1203  // swifterror value.
1204  auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca);
1205  auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape);
1206
1207  // Move to after the call.  Since swifterror only has a guaranteed
1208  // value on normal exits, we can ignore implicit and explicit unwind
1209  // edges.
1210  if (isa<CallInst>(Call)) {
1211    Builder.SetInsertPoint(Call->getNextNode());
1212  } else {
1213    auto Invoke = cast<InvokeInst>(Call);
1214    Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
1215  }
1216
1217  // Get the current swifterror value and store it to the alloca.
1218  auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
1219  Builder.CreateStore(ValueAfterCall, Alloca);
1220
1221  return Addr;
1222}
1223
1224/// Eliminate a formerly-swifterror alloca by inserting the get/set
1225/// intrinsics and attempting to MemToReg the alloca away.
1226static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
1227                                      coro::Shape &Shape) {
1228  for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) {
1229    // We're likely changing the use list, so use a mutation-safe
1230    // iteration pattern.
1231    auto &Use = *UI;
1232    ++UI;
1233
1234    // swifterror values can only be used in very specific ways.
1235    // We take advantage of that here.
1236    auto User = Use.getUser();
1237    if (isa<LoadInst>(User) || isa<StoreInst>(User))
1238      continue;
1239
1240    assert(isa<CallInst>(User) || isa<InvokeInst>(User));
1241    auto Call = cast<Instruction>(User);
1242
1243    auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
1244
1245    // Use the returned slot address as the call argument.
1246    Use.set(Addr);
1247  }
1248
1249  // All the uses should be loads and stores now.
1250  assert(isAllocaPromotable(Alloca));
1251}
1252
1253/// "Eliminate" a swifterror argument by reducing it to the alloca case
1254/// and then loading and storing in the prologue and epilog.
1255///
1256/// The argument keeps the swifterror flag.
1257static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
1258                                        coro::Shape &Shape,
1259                             SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
1260  IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
1261
1262  auto ArgTy = cast<PointerType>(Arg.getType());
1263  auto ValueTy = ArgTy->getElementType();
1264
1265  // Reduce to the alloca case:
1266
1267  // Create an alloca and replace all uses of the arg with it.
1268  auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace());
1269  Arg.replaceAllUsesWith(Alloca);
1270
1271  // Set an initial value in the alloca.  swifterror is always null on entry.
1272  auto InitialValue = Constant::getNullValue(ValueTy);
1273  Builder.CreateStore(InitialValue, Alloca);
1274
1275  // Find all the suspends in the function and save and restore around them.
1276  for (auto Suspend : Shape.CoroSuspends) {
1277    (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape);
1278  }
1279
1280  // Find all the coro.ends in the function and restore the error value.
1281  for (auto End : Shape.CoroEnds) {
1282    Builder.SetInsertPoint(End);
1283    auto FinalValue = Builder.CreateLoad(ValueTy, Alloca);
1284    (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape);
1285  }
1286
1287  // Now we can use the alloca logic.
1288  AllocasToPromote.push_back(Alloca);
1289  eliminateSwiftErrorAlloca(F, Alloca, Shape);
1290}
1291
1292/// Eliminate all problematic uses of swifterror arguments and allocas
1293/// from the function.  We'll fix them up later when splitting the function.
1294static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
1295  SmallVector<AllocaInst*, 4> AllocasToPromote;
1296
1297  // Look for a swifterror argument.
1298  for (auto &Arg : F.args()) {
1299    if (!Arg.hasSwiftErrorAttr()) continue;
1300
1301    eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
1302    break;
1303  }
1304
1305  // Look for swifterror allocas.
1306  for (auto &Inst : F.getEntryBlock()) {
1307    auto Alloca = dyn_cast<AllocaInst>(&Inst);
1308    if (!Alloca || !Alloca->isSwiftError()) continue;
1309
1310    // Clear the swifterror flag.
1311    Alloca->setSwiftError(false);
1312
1313    AllocasToPromote.push_back(Alloca);
1314    eliminateSwiftErrorAlloca(F, Alloca, Shape);
1315  }
1316
1317  // If we have any allocas to promote, compute a dominator tree and
1318  // promote them en masse.
1319  if (!AllocasToPromote.empty()) {
1320    DominatorTree DT(F);
1321    PromoteMemToReg(AllocasToPromote, DT);
1322  }
1323}
1324
1325void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
1326  // Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite
1327  // access to local variables.
1328  LowerDbgDeclare(F);
1329
1330  eliminateSwiftError(F, Shape);
1331
1332  if (Shape.ABI == coro::ABI::Switch &&
1333      Shape.SwitchLowering.PromiseAlloca) {
1334    Shape.getSwitchCoroId()->clearPromise();
1335  }
1336
1337  // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
1338  // intrinsics are in their own blocks to simplify the logic of building up
1339  // SuspendCrossing data.
1340  for (auto *CSI : Shape.CoroSuspends) {
1341    if (auto *Save = CSI->getCoroSave())
1342      splitAround(Save, "CoroSave");
1343    splitAround(CSI, "CoroSuspend");
1344  }
1345
1346  // Put CoroEnds into their own blocks.
1347  for (CoroEndInst *CE : Shape.CoroEnds)
1348    splitAround(CE, "CoroEnd");
1349
1350  // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
1351  // never has its definition separated from the PHI by the suspend point.
1352  rewritePHIs(F);
1353
1354  // Build suspend crossing info.
1355  SuspendCrossingInfo Checker(F, Shape);
1356
1357  IRBuilder<> Builder(F.getContext());
1358  SpillInfo Spills;
1359  SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas;
1360  SmallVector<Instruction*, 4> DeadInstructions;
1361
1362  for (int Repeat = 0; Repeat < 4; ++Repeat) {
1363    // See if there are materializable instructions across suspend points.
1364    for (Instruction &I : instructions(F))
1365      if (materializable(I))
1366        for (User *U : I.users())
1367          if (Checker.isDefinitionAcrossSuspend(I, U))
1368            Spills.emplace_back(&I, U);
1369
1370    if (Spills.empty())
1371      break;
1372
1373    // Rewrite materializable instructions to be materialized at the use point.
1374    LLVM_DEBUG(dump("Materializations", Spills));
1375    rewriteMaterializableInstructions(Builder, Spills);
1376    Spills.clear();
1377  }
1378
1379  // Collect the spills for arguments and other not-materializable values.
1380  for (Argument &A : F.args())
1381    for (User *U : A.users())
1382      if (Checker.isDefinitionAcrossSuspend(A, U))
1383        Spills.emplace_back(&A, U);
1384
1385  for (Instruction &I : instructions(F)) {
1386    // Values returned from coroutine structure intrinsics should not be part
1387    // of the Coroutine Frame.
1388    if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin)
1389      continue;
1390
1391    // The Coroutine Promise always included into coroutine frame, no need to
1392    // check for suspend crossing.
1393    if (Shape.ABI == coro::ABI::Switch &&
1394        Shape.SwitchLowering.PromiseAlloca == &I)
1395      continue;
1396
1397    // Handle alloca.alloc specially here.
1398    if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) {
1399      // Check whether the alloca's lifetime is bounded by suspend points.
1400      if (isLocalAlloca(AI)) {
1401        LocalAllocas.push_back(AI);
1402        continue;
1403      }
1404
1405      // If not, do a quick rewrite of the alloca and then add spills of
1406      // the rewritten value.  The rewrite doesn't invalidate anything in
1407      // Spills because the other alloca intrinsics have no other operands
1408      // besides AI, and it doesn't invalidate the iteration because we delay
1409      // erasing AI.
1410      auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions);
1411
1412      for (User *U : Alloc->users()) {
1413        if (Checker.isDefinitionAcrossSuspend(*Alloc, U))
1414          Spills.emplace_back(Alloc, U);
1415      }
1416      continue;
1417    }
1418
1419    // Ignore alloca.get; we process this as part of coro.alloca.alloc.
1420    if (isa<CoroAllocaGetInst>(I)) {
1421      continue;
1422    }
1423
1424    for (User *U : I.users())
1425      if (Checker.isDefinitionAcrossSuspend(I, U)) {
1426        // We cannot spill a token.
1427        if (I.getType()->isTokenTy())
1428          report_fatal_error(
1429              "token definition is separated from the use by a suspend point");
1430        Spills.emplace_back(&I, U);
1431      }
1432  }
1433  LLVM_DEBUG(dump("Spills", Spills));
1434  Shape.FrameTy = buildFrameType(F, Shape, Spills);
1435  Shape.FramePtr = insertSpills(Spills, Shape);
1436  lowerLocalAllocas(LocalAllocas, DeadInstructions);
1437
1438  for (auto I : DeadInstructions)
1439    I->eraseFromParent();
1440}
1441