1//===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This file contains classes used to discover if for a particular value
9// there from sue to definition that crosses a suspend block.
10//
11// Using the information discovered we form a Coroutine Frame structure to
12// contain those values. All uses of those values are replaced with appropriate
13// GEP + load from the coroutine frame. At the point of the definition we spill
14// the value into the coroutine frame.
15//
16// TODO: pack values tightly using liveness info.
17//===----------------------------------------------------------------------===//
18
19#include "CoroInternal.h"
20#include "llvm/ADT/BitVector.h"
21#include "llvm/ADT/SmallString.h"
22#include "llvm/Analysis/PtrUseVisitor.h"
23#include "llvm/Config/llvm-config.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/DIBuilder.h"
26#include "llvm/IR/Dominators.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/InstIterator.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/MathExtras.h"
31#include "llvm/Support/circular_raw_ostream.h"
32#include "llvm/Support/OptimizedStructLayout.h"
33#include "llvm/Transforms/Utils/BasicBlockUtils.h"
34#include "llvm/Transforms/Utils/Local.h"
35#include "llvm/Transforms/Utils/PromoteMemToReg.h"
36#include <algorithm>
37
38using namespace llvm;
39
40// The "coro-suspend-crossing" flag is very noisy. There is another debug type,
41// "coro-frame", which results in leaner debug spew.
42#define DEBUG_TYPE "coro-suspend-crossing"
43
44enum { SmallVectorThreshold = 32 };
45
46// Provides two way mapping between the blocks and numbers.
47namespace {
48class BlockToIndexMapping {
49  SmallVector<BasicBlock *, SmallVectorThreshold> V;
50
51public:
52  size_t size() const { return V.size(); }
53
54  BlockToIndexMapping(Function &F) {
55    for (BasicBlock &BB : F)
56      V.push_back(&BB);
57    llvm::sort(V);
58  }
59
60  size_t blockToIndex(BasicBlock *BB) const {
61    auto *I = llvm::lower_bound(V, BB);
62    assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
63    return I - V.begin();
64  }
65
66  BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; }
67};
68} // end anonymous namespace
69
70// The SuspendCrossingInfo maintains data that allows to answer a question
71// whether given two BasicBlocks A and B there is a path from A to B that
72// passes through a suspend point.
73//
74// For every basic block 'i' it maintains a BlockData that consists of:
75//   Consumes:  a bit vector which contains a set of indices of blocks that can
76//              reach block 'i'
77//   Kills: a bit vector which contains a set of indices of blocks that can
78//          reach block 'i', but one of the path will cross a suspend point
79//   Suspend: a boolean indicating whether block 'i' contains a suspend point.
80//   End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
81//
82namespace {
83struct SuspendCrossingInfo {
84  BlockToIndexMapping Mapping;
85
86  struct BlockData {
87    BitVector Consumes;
88    BitVector Kills;
89    bool Suspend = false;
90    bool End = false;
91  };
92  SmallVector<BlockData, SmallVectorThreshold> Block;
93
94  iterator_range<succ_iterator> successors(BlockData const &BD) const {
95    BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
96    return llvm::successors(BB);
97  }
98
99  BlockData &getBlockData(BasicBlock *BB) {
100    return Block[Mapping.blockToIndex(BB)];
101  }
102
103  void dump() const;
104  void dump(StringRef Label, BitVector const &BV) const;
105
106  SuspendCrossingInfo(Function &F, coro::Shape &Shape);
107
108  bool hasPathCrossingSuspendPoint(BasicBlock *DefBB, BasicBlock *UseBB) const {
109    size_t const DefIndex = Mapping.blockToIndex(DefBB);
110    size_t const UseIndex = Mapping.blockToIndex(UseBB);
111
112    bool const Result = Block[UseIndex].Kills[DefIndex];
113    LLVM_DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName()
114                      << " answer is " << Result << "\n");
115    return Result;
116  }
117
118  bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const {
119    auto *I = cast<Instruction>(U);
120
121    // We rewrote PHINodes, so that only the ones with exactly one incoming
122    // value need to be analyzed.
123    if (auto *PN = dyn_cast<PHINode>(I))
124      if (PN->getNumIncomingValues() > 1)
125        return false;
126
127    BasicBlock *UseBB = I->getParent();
128
129    // As a special case, treat uses by an llvm.coro.suspend.retcon
130    // as if they were uses in the suspend's single predecessor: the
131    // uses conceptually occur before the suspend.
132    if (isa<CoroSuspendRetconInst>(I)) {
133      UseBB = UseBB->getSinglePredecessor();
134      assert(UseBB && "should have split coro.suspend into its own block");
135    }
136
137    return hasPathCrossingSuspendPoint(DefBB, UseBB);
138  }
139
140  bool isDefinitionAcrossSuspend(Argument &A, User *U) const {
141    return isDefinitionAcrossSuspend(&A.getParent()->getEntryBlock(), U);
142  }
143
144  bool isDefinitionAcrossSuspend(Instruction &I, User *U) const {
145    auto *DefBB = I.getParent();
146
147    // As a special case, treat values produced by an llvm.coro.suspend.*
148    // as if they were defined in the single successor: the uses
149    // conceptually occur after the suspend.
150    if (isa<AnyCoroSuspendInst>(I)) {
151      DefBB = DefBB->getSingleSuccessor();
152      assert(DefBB && "should have split coro.suspend into its own block");
153    }
154
155    return isDefinitionAcrossSuspend(DefBB, U);
156  }
157};
158} // end anonymous namespace
159
160#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
161LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
162                                                BitVector const &BV) const {
163  dbgs() << Label << ":";
164  for (size_t I = 0, N = BV.size(); I < N; ++I)
165    if (BV[I])
166      dbgs() << " " << Mapping.indexToBlock(I)->getName();
167  dbgs() << "\n";
168}
169
170LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
171  for (size_t I = 0, N = Block.size(); I < N; ++I) {
172    BasicBlock *const B = Mapping.indexToBlock(I);
173    dbgs() << B->getName() << ":\n";
174    dump("   Consumes", Block[I].Consumes);
175    dump("      Kills", Block[I].Kills);
176  }
177  dbgs() << "\n";
178}
179#endif
180
181SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
182    : Mapping(F) {
183  const size_t N = Mapping.size();
184  Block.resize(N);
185
186  // Initialize every block so that it consumes itself
187  for (size_t I = 0; I < N; ++I) {
188    auto &B = Block[I];
189    B.Consumes.resize(N);
190    B.Kills.resize(N);
191    B.Consumes.set(I);
192  }
193
194  // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
195  // the code beyond coro.end is reachable during initial invocation of the
196  // coroutine.
197  for (auto *CE : Shape.CoroEnds)
198    getBlockData(CE->getParent()).End = true;
199
200  // Mark all suspend blocks and indicate that they kill everything they
201  // consume. Note, that crossing coro.save also requires a spill, as any code
202  // between coro.save and coro.suspend may resume the coroutine and all of the
203  // state needs to be saved by that time.
204  auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) {
205    BasicBlock *SuspendBlock = BarrierInst->getParent();
206    auto &B = getBlockData(SuspendBlock);
207    B.Suspend = true;
208    B.Kills |= B.Consumes;
209  };
210  for (auto *CSI : Shape.CoroSuspends) {
211    markSuspendBlock(CSI);
212    if (auto *Save = CSI->getCoroSave())
213      markSuspendBlock(Save);
214  }
215
216  // Iterate propagating consumes and kills until they stop changing.
217  int Iteration = 0;
218  (void)Iteration;
219
220  bool Changed;
221  do {
222    LLVM_DEBUG(dbgs() << "iteration " << ++Iteration);
223    LLVM_DEBUG(dbgs() << "==============\n");
224
225    Changed = false;
226    for (size_t I = 0; I < N; ++I) {
227      auto &B = Block[I];
228      for (BasicBlock *SI : successors(B)) {
229
230        auto SuccNo = Mapping.blockToIndex(SI);
231
232        // Saved Consumes and Kills bitsets so that it is easy to see
233        // if anything changed after propagation.
234        auto &S = Block[SuccNo];
235        auto SavedConsumes = S.Consumes;
236        auto SavedKills = S.Kills;
237
238        // Propagate Kills and Consumes from block B into its successor S.
239        S.Consumes |= B.Consumes;
240        S.Kills |= B.Kills;
241
242        // If block B is a suspend block, it should propagate kills into the
243        // its successor for every block B consumes.
244        if (B.Suspend) {
245          S.Kills |= B.Consumes;
246        }
247        if (S.Suspend) {
248          // If block S is a suspend block, it should kill all of the blocks it
249          // consumes.
250          S.Kills |= S.Consumes;
251        } else if (S.End) {
252          // If block S is an end block, it should not propagate kills as the
253          // blocks following coro.end() are reached during initial invocation
254          // of the coroutine while all the data are still available on the
255          // stack or in the registers.
256          S.Kills.reset();
257        } else {
258          // This is reached when S block it not Suspend nor coro.end and it
259          // need to make sure that it is not in the kill set.
260          S.Kills.reset(SuccNo);
261        }
262
263        // See if anything changed.
264        Changed |= (S.Kills != SavedKills) || (S.Consumes != SavedConsumes);
265
266        if (S.Kills != SavedKills) {
267          LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI->getName()
268                            << "\n");
269          LLVM_DEBUG(dump("S.Kills", S.Kills));
270          LLVM_DEBUG(dump("SavedKills", SavedKills));
271        }
272        if (S.Consumes != SavedConsumes) {
273          LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI << "\n");
274          LLVM_DEBUG(dump("S.Consume", S.Consumes));
275          LLVM_DEBUG(dump("SavedCons", SavedConsumes));
276        }
277      }
278    }
279  } while (Changed);
280  LLVM_DEBUG(dump());
281}
282
283#undef DEBUG_TYPE // "coro-suspend-crossing"
284#define DEBUG_TYPE "coro-frame"
285
286// We build up the list of spills for every case where a use is separated
287// from the definition by a suspend point.
288
289static const unsigned InvalidFieldIndex = ~0U;
290
291namespace {
292class Spill {
293  Value *Def = nullptr;
294  Instruction *User = nullptr;
295  unsigned FieldNo = InvalidFieldIndex;
296
297public:
298  Spill(Value *Def, llvm::User *U) : Def(Def), User(cast<Instruction>(U)) {}
299
300  Value *def() const { return Def; }
301  Instruction *user() const { return User; }
302  BasicBlock *userBlock() const { return User->getParent(); }
303
304  // Note that field index is stored in the first SpillEntry for a particular
305  // definition. Subsequent mentions of a defintion do not have fieldNo
306  // assigned. This works out fine as the users of Spills capture the info about
307  // the definition the first time they encounter it. Consider refactoring
308  // SpillInfo into two arrays to normalize the spill representation.
309  unsigned fieldIndex() const {
310    assert(FieldNo != InvalidFieldIndex && "Accessing unassigned field");
311    return FieldNo;
312  }
313  void setFieldIndex(unsigned FieldNumber) {
314    assert(FieldNo == InvalidFieldIndex && "Reassigning field number");
315    FieldNo = FieldNumber;
316  }
317};
318} // namespace
319
320// Note that there may be more than one record with the same value of Def in
321// the SpillInfo vector.
322using SpillInfo = SmallVector<Spill, 8>;
323
324#ifndef NDEBUG
325static void dump(StringRef Title, SpillInfo const &Spills) {
326  dbgs() << "------------- " << Title << "--------------\n";
327  Value *CurrentValue = nullptr;
328  for (auto const &E : Spills) {
329    if (CurrentValue != E.def()) {
330      CurrentValue = E.def();
331      CurrentValue->dump();
332    }
333    dbgs() << "   user: ";
334    E.user()->dump();
335  }
336}
337#endif
338
339namespace {
340// We cannot rely solely on natural alignment of a type when building a
341// coroutine frame and if the alignment specified on the Alloca instruction
342// differs from the natural alignment of the alloca type we will need to insert
343// padding.
344class FrameTypeBuilder {
345  struct Field {
346    uint64_t Size;
347    uint64_t Offset;
348    Spill *ForSpill;
349    Type *Ty;
350    unsigned FieldIndex;
351    Align Alignment;
352    Align TyAlignment;
353  };
354
355  const DataLayout &DL;
356  LLVMContext &Context;
357  uint64_t StructSize = 0;
358  Align StructAlign;
359  bool IsFinished = false;
360
361  SmallVector<Field, 8> Fields;
362  DenseMap<Value*, unsigned> FieldIndexByKey;
363
364public:
365  FrameTypeBuilder(LLVMContext &Context, DataLayout const &DL)
366    : DL(DL), Context(Context) {}
367
368  class FieldId {
369    size_t Value;
370    explicit FieldId(size_t Value) : Value(Value) {}
371
372    friend class FrameTypeBuilder;
373  };
374
375  /// Add a field to this structure for the storage of an `alloca`
376  /// instruction.
377  FieldId addFieldForAlloca(AllocaInst *AI, Spill *ForSpill = nullptr,
378                            bool IsHeader = false) {
379    Type *Ty = AI->getAllocatedType();
380
381    // Make an array type if this is a static array allocation.
382    if (AI->isArrayAllocation()) {
383      if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
384        Ty = ArrayType::get(Ty, CI->getValue().getZExtValue());
385      else
386        report_fatal_error("Coroutines cannot handle non static allocas yet");
387    }
388
389    return addField(Ty, AI->getAlign(), ForSpill, IsHeader);
390  }
391
392  /// Add a field to this structure.
393  FieldId addField(Type *Ty, MaybeAlign FieldAlignment,
394                   Spill *ForSpill = nullptr,
395                   bool IsHeader = false) {
396    assert(!IsFinished && "adding fields to a finished builder");
397    assert(Ty && "must provide a type for a field");
398
399    // The field size is always the alloc size of the type.
400    uint64_t FieldSize = DL.getTypeAllocSize(Ty);
401
402    // The field alignment might not be the type alignment, but we need
403    // to remember the type alignment anyway to build the type.
404    Align TyAlignment = DL.getABITypeAlign(Ty);
405    if (!FieldAlignment) FieldAlignment = TyAlignment;
406
407    // Lay out header fields immediately.
408    uint64_t Offset;
409    if (IsHeader) {
410      Offset = alignTo(StructSize, FieldAlignment);
411      StructSize = Offset + FieldSize;
412
413    // Everything else has a flexible offset.
414    } else {
415      Offset = OptimizedStructLayoutField::FlexibleOffset;
416    }
417
418    Fields.push_back({FieldSize, Offset, ForSpill, Ty, 0,
419                      *FieldAlignment, TyAlignment});
420    return FieldId(Fields.size() - 1);
421  }
422
423  /// Finish the layout and set the body on the given type.
424  void finish(StructType *Ty);
425
426  uint64_t getStructSize() const {
427    assert(IsFinished && "not yet finished!");
428    return StructSize;
429  }
430
431  Align getStructAlign() const {
432    assert(IsFinished && "not yet finished!");
433    return StructAlign;
434  }
435
436  unsigned getFieldIndex(FieldId Id) const {
437    assert(IsFinished && "not yet finished!");
438    return Fields[Id.Value].FieldIndex;
439  }
440};
441} // namespace
442
443void FrameTypeBuilder::finish(StructType *Ty) {
444  assert(!IsFinished && "already finished!");
445
446  // Prepare the optimal-layout field array.
447  // The Id in the layout field is a pointer to our Field for it.
448  SmallVector<OptimizedStructLayoutField, 8> LayoutFields;
449  LayoutFields.reserve(Fields.size());
450  for (auto &Field : Fields) {
451    LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment,
452                              Field.Offset);
453  }
454
455  // Perform layout.
456  auto SizeAndAlign = performOptimizedStructLayout(LayoutFields);
457  StructSize = SizeAndAlign.first;
458  StructAlign = SizeAndAlign.second;
459
460  auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
461    return *static_cast<Field *>(const_cast<void*>(LayoutField.Id));
462  };
463
464  // We need to produce a packed struct type if there's a field whose
465  // assigned offset isn't a multiple of its natural type alignment.
466  bool Packed = [&] {
467    for (auto &LayoutField : LayoutFields) {
468      auto &F = getField(LayoutField);
469      if (!isAligned(F.TyAlignment, LayoutField.Offset))
470        return true;
471    }
472    return false;
473  }();
474
475  // Build the struct body.
476  SmallVector<Type*, 16> FieldTypes;
477  FieldTypes.reserve(LayoutFields.size() * 3 / 2);
478  uint64_t LastOffset = 0;
479  for (auto &LayoutField : LayoutFields) {
480    auto &F = getField(LayoutField);
481
482    auto Offset = LayoutField.Offset;
483
484    // Add a padding field if there's a padding gap and we're either
485    // building a packed struct or the padding gap is more than we'd
486    // get from aligning to the field type's natural alignment.
487    assert(Offset >= LastOffset);
488    if (Offset != LastOffset) {
489      if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset)
490        FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context),
491                                            Offset - LastOffset));
492    }
493
494    // Record the layout information into both the Field and the
495    // original Spill, if there is one.
496    F.Offset = Offset;
497    F.FieldIndex = FieldTypes.size();
498    if (F.ForSpill) {
499      F.ForSpill->setFieldIndex(F.FieldIndex);
500    }
501
502    FieldTypes.push_back(F.Ty);
503    LastOffset = Offset + F.Size;
504  }
505
506  Ty->setBody(FieldTypes, Packed);
507
508#ifndef NDEBUG
509  // Check that the IR layout matches the offsets we expect.
510  auto Layout = DL.getStructLayout(Ty);
511  for (auto &F : Fields) {
512    assert(Ty->getElementType(F.FieldIndex) == F.Ty);
513    assert(Layout->getElementOffset(F.FieldIndex) == F.Offset);
514  }
515#endif
516
517  IsFinished = true;
518}
519
520// Build a struct that will keep state for an active coroutine.
521//   struct f.frame {
522//     ResumeFnTy ResumeFnAddr;
523//     ResumeFnTy DestroyFnAddr;
524//     int ResumeIndex;
525//     ... promise (if present) ...
526//     ... spills ...
527//   };
528static StructType *buildFrameType(Function &F, coro::Shape &Shape,
529                                  SpillInfo &Spills) {
530  LLVMContext &C = F.getContext();
531  const DataLayout &DL = F.getParent()->getDataLayout();
532  StructType *FrameTy = [&] {
533    SmallString<32> Name(F.getName());
534    Name.append(".Frame");
535    return StructType::create(C, Name);
536  }();
537
538  FrameTypeBuilder B(C, DL);
539
540  AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
541  Optional<FrameTypeBuilder::FieldId> PromiseFieldId;
542  Optional<FrameTypeBuilder::FieldId> SwitchIndexFieldId;
543
544  if (Shape.ABI == coro::ABI::Switch) {
545    auto *FramePtrTy = FrameTy->getPointerTo();
546    auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
547                                   /*IsVarArg=*/false);
548    auto *FnPtrTy = FnTy->getPointerTo();
549
550    // Add header fields for the resume and destroy functions.
551    // We can rely on these being perfectly packed.
552    B.addField(FnPtrTy, None, nullptr, /*header*/ true);
553    B.addField(FnPtrTy, None, nullptr, /*header*/ true);
554
555    // Add a header field for the promise if there is one.
556    if (PromiseAlloca) {
557      PromiseFieldId =
558        B.addFieldForAlloca(PromiseAlloca, nullptr, /*header*/ true);
559    }
560
561    // Add a field to store the suspend index.  This doesn't need to
562    // be in the header.
563    unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
564    Type *IndexType = Type::getIntNTy(C, IndexBits);
565
566    SwitchIndexFieldId = B.addField(IndexType, None);
567  } else {
568    assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
569  }
570
571  Value *CurrentDef = nullptr;
572
573  // Create an entry for every spilled value.
574  for (auto &S : Spills) {
575    // We can have multiple entries in Spills for a single value, but
576    // they should form a contiguous run.  Ignore all but the first.
577    if (CurrentDef == S.def())
578      continue;
579
580    CurrentDef = S.def();
581
582    assert(CurrentDef != PromiseAlloca &&
583           "recorded spill use of promise alloca?");
584
585    if (auto *AI = dyn_cast<AllocaInst>(CurrentDef)) {
586      B.addFieldForAlloca(AI, &S);
587    } else {
588      Type *Ty = CurrentDef->getType();
589      B.addField(Ty, None, &S);
590    }
591  }
592
593  B.finish(FrameTy);
594  Shape.FrameAlign = B.getStructAlign();
595  Shape.FrameSize = B.getStructSize();
596
597  switch (Shape.ABI) {
598  // In the switch ABI, remember the field indices for the promise and
599  // switch-index fields.
600  case coro::ABI::Switch:
601    Shape.SwitchLowering.IndexField =
602      B.getFieldIndex(*SwitchIndexFieldId);
603    Shape.SwitchLowering.PromiseField =
604      (PromiseAlloca ? B.getFieldIndex(*PromiseFieldId) : 0);
605
606    // Also round the frame size up to a multiple of its alignment, as is
607    // generally expected in C/C++.
608    Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
609    break;
610
611  // In the retcon ABI, remember whether the frame is inline in the storage.
612  case coro::ABI::Retcon:
613  case coro::ABI::RetconOnce: {
614    auto Id = Shape.getRetconCoroId();
615    Shape.RetconLowering.IsFrameInlineInStorage
616      = (B.getStructSize() <= Id->getStorageSize() &&
617         B.getStructAlign() <= Id->getStorageAlignment());
618    break;
619  }
620  }
621
622  return FrameTy;
623}
624
625// We use a pointer use visitor to discover if there are any writes into an
626// alloca that dominates CoroBegin. If that is the case, insertSpills will copy
627// the value from the alloca into the coroutine frame spill slot corresponding
628// to that alloca.
629namespace {
630struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
631  using Base = PtrUseVisitor<AllocaUseVisitor>;
632  AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
633                   const CoroBeginInst &CB)
634      : PtrUseVisitor(DL), DT(DT), CoroBegin(CB) {}
635
636  // We are only interested in uses that dominate coro.begin.
637  void visit(Instruction &I) {
638    if (DT.dominates(&I, &CoroBegin))
639      Base::visit(I);
640  }
641  // We need to provide this overload as PtrUseVisitor uses a pointer based
642  // visiting function.
643  void visit(Instruction *I) { return visit(*I); }
644
645  void visitLoadInst(LoadInst &) {} // Good. Nothing to do.
646
647  // If the use is an operand, the pointer escaped and anything can write into
648  // that memory. If the use is the pointer, we are definitely writing into the
649  // alloca and therefore we need to copy.
650  void visitStoreInst(StoreInst &SI) { PI.setAborted(&SI); }
651
652  // Any other instruction that is not filtered out by PtrUseVisitor, will
653  // result in the copy.
654  void visitInstruction(Instruction &I) { PI.setAborted(&I); }
655
656private:
657  const DominatorTree &DT;
658  const CoroBeginInst &CoroBegin;
659};
660} // namespace
661static bool mightWriteIntoAllocaPtr(AllocaInst &A, const DominatorTree &DT,
662                                    const CoroBeginInst &CB) {
663  const DataLayout &DL = A.getModule()->getDataLayout();
664  AllocaUseVisitor Visitor(DL, DT, CB);
665  auto PtrI = Visitor.visitPtr(A);
666  if (PtrI.isEscaped() || PtrI.isAborted()) {
667    auto *PointerEscapingInstr = PtrI.getEscapingInst()
668                                     ? PtrI.getEscapingInst()
669                                     : PtrI.getAbortingInst();
670    if (PointerEscapingInstr) {
671      LLVM_DEBUG(
672          dbgs() << "AllocaInst copy was triggered by instruction: "
673                 << *PointerEscapingInstr << "\n");
674    }
675    return true;
676  }
677  return false;
678}
679
680// We need to make room to insert a spill after initial PHIs, but before
681// catchswitch instruction. Placing it before violates the requirement that
682// catchswitch, like all other EHPads must be the first nonPHI in a block.
683//
684// Split away catchswitch into a separate block and insert in its place:
685//
686//   cleanuppad <InsertPt> cleanupret.
687//
688// cleanupret instruction will act as an insert point for the spill.
689static Instruction *splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch) {
690  BasicBlock *CurrentBlock = CatchSwitch->getParent();
691  BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch);
692  CurrentBlock->getTerminator()->eraseFromParent();
693
694  auto *CleanupPad =
695      CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock);
696  auto *CleanupRet =
697      CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock);
698  return CleanupRet;
699}
700
701// Replace all alloca and SSA values that are accessed across suspend points
702// with GetElementPointer from coroutine frame + loads and stores. Create an
703// AllocaSpillBB that will become the new entry block for the resume parts of
704// the coroutine:
705//
706//    %hdl = coro.begin(...)
707//    whatever
708//
709// becomes:
710//
711//    %hdl = coro.begin(...)
712//    %FramePtr = bitcast i8* hdl to %f.frame*
713//    br label %AllocaSpillBB
714//
715//  AllocaSpillBB:
716//    ; geps corresponding to allocas that were moved to coroutine frame
717//    br label PostSpill
718//
719//  PostSpill:
720//    whatever
721//
722//
723static Instruction *insertSpills(const SpillInfo &Spills, coro::Shape &Shape) {
724  auto *CB = Shape.CoroBegin;
725  LLVMContext &C = CB->getContext();
726  IRBuilder<> Builder(CB->getNextNode());
727  StructType *FrameTy = Shape.FrameTy;
728  PointerType *FramePtrTy = FrameTy->getPointerTo();
729  auto *FramePtr =
730      cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
731  DominatorTree DT(*CB->getFunction());
732
733  Value *CurrentValue = nullptr;
734  BasicBlock *CurrentBlock = nullptr;
735  Value *CurrentReload = nullptr;
736
737  // Proper field number will be read from field definition.
738  unsigned Index = InvalidFieldIndex;
739
740  // We need to keep track of any allocas that need "spilling"
741  // since they will live in the coroutine frame now, all access to them
742  // need to be changed, not just the access across suspend points
743  // we remember allocas and their indices to be handled once we processed
744  // all the spills.
745  SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas;
746
747  // Promise alloca (if present) doesn't show in the spills and has a
748  // special field number.
749  if (auto *PromiseAlloca = Shape.getPromiseAlloca()) {
750    assert(Shape.ABI == coro::ABI::Switch);
751    Allocas.emplace_back(PromiseAlloca, Shape.getPromiseField());
752  }
753
754  // Create a GEP with the given index into the coroutine frame for the original
755  // value Orig. Appends an extra 0 index for array-allocas, preserving the
756  // original type.
757  auto GetFramePointer = [&](uint32_t Index, Value *Orig) -> Value * {
758    SmallVector<Value *, 3> Indices = {
759        ConstantInt::get(Type::getInt32Ty(C), 0),
760        ConstantInt::get(Type::getInt32Ty(C), Index),
761    };
762
763    if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
764      if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
765        auto Count = CI->getValue().getZExtValue();
766        if (Count > 1) {
767          Indices.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
768        }
769      } else {
770        report_fatal_error("Coroutines cannot handle non static allocas yet");
771      }
772    }
773
774    return Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices);
775  };
776
777  // Create a load instruction to reload the spilled value from the coroutine
778  // frame. Populates the Value pointer reference provided with the frame GEP.
779  auto CreateReload = [&](Instruction *InsertBefore, Value *&G) {
780    assert(Index != InvalidFieldIndex && "accessing unassigned field number");
781    Builder.SetInsertPoint(InsertBefore);
782
783    G = GetFramePointer(Index, CurrentValue);
784    G->setName(CurrentValue->getName() + Twine(".reload.addr"));
785
786    return isa<AllocaInst>(CurrentValue)
787               ? G
788               : Builder.CreateLoad(FrameTy->getElementType(Index), G,
789                                    CurrentValue->getName() + Twine(".reload"));
790  };
791
792  Value *GEP = nullptr, *CurrentGEP = nullptr;
793  for (auto const &E : Spills) {
794    // If we have not seen the value, generate a spill.
795    if (CurrentValue != E.def()) {
796      CurrentValue = E.def();
797      CurrentBlock = nullptr;
798      CurrentReload = nullptr;
799
800      Index = E.fieldIndex();
801
802      if (auto *AI = dyn_cast<AllocaInst>(CurrentValue)) {
803        // Spilled AllocaInst will be replaced with GEP from the coroutine frame
804        // there is no spill required.
805        Allocas.emplace_back(AI, Index);
806        if (!AI->isStaticAlloca())
807          report_fatal_error("Coroutines cannot handle non static allocas yet");
808      } else {
809        // Otherwise, create a store instruction storing the value into the
810        // coroutine frame.
811
812        Instruction *InsertPt = nullptr;
813        if (auto Arg = dyn_cast<Argument>(CurrentValue)) {
814          // For arguments, we will place the store instruction right after
815          // the coroutine frame pointer instruction, i.e. bitcast of
816          // coro.begin from i8* to %f.frame*.
817          InsertPt = FramePtr->getNextNode();
818
819          // If we're spilling an Argument, make sure we clear 'nocapture'
820          // from the coroutine function.
821          Arg->getParent()->removeParamAttr(Arg->getArgNo(),
822                                            Attribute::NoCapture);
823
824        } else if (auto *II = dyn_cast<InvokeInst>(CurrentValue)) {
825          // If we are spilling the result of the invoke instruction, split the
826          // normal edge and insert the spill in the new block.
827          auto NewBB = SplitEdge(II->getParent(), II->getNormalDest());
828          InsertPt = NewBB->getTerminator();
829        } else if (isa<PHINode>(CurrentValue)) {
830          // Skip the PHINodes and EH pads instructions.
831          BasicBlock *DefBlock = cast<Instruction>(E.def())->getParent();
832          if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
833            InsertPt = splitBeforeCatchSwitch(CSI);
834          else
835            InsertPt = &*DefBlock->getFirstInsertionPt();
836        } else if (auto CSI = dyn_cast<AnyCoroSuspendInst>(CurrentValue)) {
837          // Don't spill immediately after a suspend; splitting assumes
838          // that the suspend will be followed by a branch.
839          InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI();
840        } else {
841          auto *I = cast<Instruction>(E.def());
842          assert(!I->isTerminator() && "unexpected terminator");
843          // For all other values, the spill is placed immediately after
844          // the definition.
845          if (DT.dominates(CB, I)) {
846            InsertPt = I->getNextNode();
847          } else {
848            // Unless, it is not dominated by CoroBegin, then it will be
849            // inserted immediately after CoroFrame is computed.
850            InsertPt = FramePtr->getNextNode();
851          }
852        }
853
854        Builder.SetInsertPoint(InsertPt);
855        auto *G = Builder.CreateConstInBoundsGEP2_32(
856            FrameTy, FramePtr, 0, Index,
857            CurrentValue->getName() + Twine(".spill.addr"));
858        Builder.CreateStore(CurrentValue, G);
859      }
860    }
861
862    // If we have not seen the use block, generate a reload in it.
863    if (CurrentBlock != E.userBlock()) {
864      CurrentBlock = E.userBlock();
865      CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt(), GEP);
866    }
867
868    // If we have a single edge PHINode, remove it and replace it with a reload
869    // from the coroutine frame. (We already took care of multi edge PHINodes
870    // by rewriting them in the rewritePHIs function).
871    if (auto *PN = dyn_cast<PHINode>(E.user())) {
872      assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
873                                                "values in the PHINode");
874      PN->replaceAllUsesWith(CurrentReload);
875      PN->eraseFromParent();
876      continue;
877    }
878
879    // If we have not seen this GEP instruction, migrate any dbg.declare from
880    // the alloca to it.
881    if (CurrentGEP != GEP) {
882      CurrentGEP = GEP;
883      TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(CurrentValue);
884      if (!DIs.empty())
885        DIBuilder(*CurrentBlock->getParent()->getParent(),
886                  /*AllowUnresolved*/ false)
887            .insertDeclare(CurrentGEP, DIs.front()->getVariable(),
888                           DIs.front()->getExpression(),
889                           DIs.front()->getDebugLoc(), DIs.front());
890    }
891
892    // Replace all uses of CurrentValue in the current instruction with reload.
893    E.user()->replaceUsesOfWith(CurrentValue, CurrentReload);
894  }
895
896  BasicBlock *FramePtrBB = FramePtr->getParent();
897
898  auto SpillBlock =
899    FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB");
900  SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
901  Shape.AllocaSpillBlock = SpillBlock;
902
903  // retcon and retcon.once lowering assumes all uses have been sunk.
904  if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce) {
905    // If we found any allocas, replace all of their remaining uses with Geps.
906    Builder.SetInsertPoint(&SpillBlock->front());
907    for (auto &P : Allocas) {
908      auto *G = GetFramePointer(P.second, P.first);
909
910      // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
911      // here, as we are changing location of the instruction.
912      G->takeName(P.first);
913      P.first->replaceAllUsesWith(G);
914      P.first->eraseFromParent();
915    }
916    return FramePtr;
917  }
918
919  // If we found any alloca, replace all of their remaining uses with GEP
920  // instructions. Because new dbg.declare have been created for these alloca,
921  // we also delete the original dbg.declare and replace other uses with undef.
922  // Note: We cannot replace the alloca with GEP instructions indiscriminately,
923  // as some of the uses may not be dominated by CoroBegin.
924  bool MightNeedToCopy = false;
925  Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
926  SmallVector<Instruction *, 4> UsersToUpdate;
927  for (auto &P : Allocas) {
928    AllocaInst *const A = P.first;
929
930    for (auto *DI : FindDbgDeclareUses(A))
931      DI->eraseFromParent();
932    replaceDbgUsesWithUndef(A);
933
934    UsersToUpdate.clear();
935    for (User *U : A->users()) {
936      auto *I = cast<Instruction>(U);
937      if (DT.dominates(CB, I))
938        UsersToUpdate.push_back(I);
939      else
940        MightNeedToCopy = true;
941    }
942    if (!UsersToUpdate.empty()) {
943      auto *G = GetFramePointer(P.second, A);
944      G->takeName(A);
945      for (Instruction *I : UsersToUpdate)
946        I->replaceUsesOfWith(A, G);
947    }
948  }
949  // If we discovered such uses not dominated by CoroBegin, see if any of them
950  // preceed coro begin and have instructions that can modify the
951  // value of the alloca and therefore would require a copying the value into
952  // the spill slot in the coroutine frame.
953  if (MightNeedToCopy) {
954    Builder.SetInsertPoint(FramePtr->getNextNode());
955
956    for (auto &P : Allocas) {
957      AllocaInst *const A = P.first;
958      if (mightWriteIntoAllocaPtr(*A, DT, *CB)) {
959        if (A->isArrayAllocation())
960          report_fatal_error(
961              "Coroutines cannot handle copying of array allocas yet");
962
963        auto *G = GetFramePointer(P.second, A);
964        auto *Value = Builder.CreateLoad(A->getAllocatedType(), A);
965        Builder.CreateStore(Value, G);
966      }
967    }
968  }
969  return FramePtr;
970}
971
972// Sets the unwind edge of an instruction to a particular successor.
973static void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) {
974  if (auto *II = dyn_cast<InvokeInst>(TI))
975    II->setUnwindDest(Succ);
976  else if (auto *CS = dyn_cast<CatchSwitchInst>(TI))
977    CS->setUnwindDest(Succ);
978  else if (auto *CR = dyn_cast<CleanupReturnInst>(TI))
979    CR->setUnwindDest(Succ);
980  else
981    llvm_unreachable("unexpected terminator instruction");
982}
983
984// Replaces all uses of OldPred with the NewPred block in all PHINodes in a
985// block.
986static void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred,
987                           BasicBlock *NewPred,
988                           PHINode *LandingPadReplacement) {
989  unsigned BBIdx = 0;
990  for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
991    PHINode *PN = cast<PHINode>(I);
992
993    // We manually update the LandingPadReplacement PHINode and it is the last
994    // PHI Node. So, if we find it, we are done.
995    if (LandingPadReplacement == PN)
996      break;
997
998    // Reuse the previous value of BBIdx if it lines up.  In cases where we
999    // have multiple phi nodes with *lots* of predecessors, this is a speed
1000    // win because we don't have to scan the PHI looking for TIBB.  This
1001    // happens because the BB list of PHI nodes are usually in the same
1002    // order.
1003    if (PN->getIncomingBlock(BBIdx) != OldPred)
1004      BBIdx = PN->getBasicBlockIndex(OldPred);
1005
1006    assert(BBIdx != (unsigned)-1 && "Invalid PHI Index!");
1007    PN->setIncomingBlock(BBIdx, NewPred);
1008  }
1009}
1010
1011// Uses SplitEdge unless the successor block is an EHPad, in which case do EH
1012// specific handling.
1013static BasicBlock *ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ,
1014                                    LandingPadInst *OriginalPad,
1015                                    PHINode *LandingPadReplacement) {
1016  auto *PadInst = Succ->getFirstNonPHI();
1017  if (!LandingPadReplacement && !PadInst->isEHPad())
1018    return SplitEdge(BB, Succ);
1019
1020  auto *NewBB = BasicBlock::Create(BB->getContext(), "", BB->getParent(), Succ);
1021  setUnwindEdgeTo(BB->getTerminator(), NewBB);
1022  updatePhiNodes(Succ, BB, NewBB, LandingPadReplacement);
1023
1024  if (LandingPadReplacement) {
1025    auto *NewLP = OriginalPad->clone();
1026    auto *Terminator = BranchInst::Create(Succ, NewBB);
1027    NewLP->insertBefore(Terminator);
1028    LandingPadReplacement->addIncoming(NewLP, NewBB);
1029    return NewBB;
1030  }
1031  Value *ParentPad = nullptr;
1032  if (auto *FuncletPad = dyn_cast<FuncletPadInst>(PadInst))
1033    ParentPad = FuncletPad->getParentPad();
1034  else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(PadInst))
1035    ParentPad = CatchSwitch->getParentPad();
1036  else
1037    llvm_unreachable("handling for other EHPads not implemented yet");
1038
1039  auto *NewCleanupPad = CleanupPadInst::Create(ParentPad, {}, "", NewBB);
1040  CleanupReturnInst::Create(NewCleanupPad, Succ, NewBB);
1041  return NewBB;
1042}
1043
1044static void rewritePHIs(BasicBlock &BB) {
1045  // For every incoming edge we will create a block holding all
1046  // incoming values in a single PHI nodes.
1047  //
1048  // loop:
1049  //    %n.val = phi i32[%n, %entry], [%inc, %loop]
1050  //
1051  // It will create:
1052  //
1053  // loop.from.entry:
1054  //    %n.loop.pre = phi i32 [%n, %entry]
1055  //    br %label loop
1056  // loop.from.loop:
1057  //    %inc.loop.pre = phi i32 [%inc, %loop]
1058  //    br %label loop
1059  //
1060  // After this rewrite, further analysis will ignore any phi nodes with more
1061  // than one incoming edge.
1062
1063  // TODO: Simplify PHINodes in the basic block to remove duplicate
1064  // predecessors.
1065
1066  LandingPadInst *LandingPad = nullptr;
1067  PHINode *ReplPHI = nullptr;
1068  if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) {
1069    // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
1070    // We replace the original landing pad with a PHINode that will collect the
1071    // results from all of them.
1072    ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad);
1073    ReplPHI->takeName(LandingPad);
1074    LandingPad->replaceAllUsesWith(ReplPHI);
1075    // We will erase the original landing pad at the end of this function after
1076    // ehAwareSplitEdge cloned it in the transition blocks.
1077  }
1078
1079  SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB));
1080  for (BasicBlock *Pred : Preds) {
1081    auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI);
1082    IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
1083    auto *PN = cast<PHINode>(&BB.front());
1084    do {
1085      int Index = PN->getBasicBlockIndex(IncomingBB);
1086      Value *V = PN->getIncomingValue(Index);
1087      PHINode *InputV = PHINode::Create(
1088          V->getType(), 1, V->getName() + Twine(".") + BB.getName(),
1089          &IncomingBB->front());
1090      InputV->addIncoming(V, Pred);
1091      PN->setIncomingValue(Index, InputV);
1092      PN = dyn_cast<PHINode>(PN->getNextNode());
1093    } while (PN != ReplPHI); // ReplPHI is either null or the PHI that replaced
1094                             // the landing pad.
1095  }
1096
1097  if (LandingPad) {
1098    // Calls to ehAwareSplitEdge function cloned the original lading pad.
1099    // No longer need it.
1100    LandingPad->eraseFromParent();
1101  }
1102}
1103
1104static void rewritePHIs(Function &F) {
1105  SmallVector<BasicBlock *, 8> WorkList;
1106
1107  for (BasicBlock &BB : F)
1108    if (auto *PN = dyn_cast<PHINode>(&BB.front()))
1109      if (PN->getNumIncomingValues() > 1)
1110        WorkList.push_back(&BB);
1111
1112  for (BasicBlock *BB : WorkList)
1113    rewritePHIs(*BB);
1114}
1115
1116// Check for instructions that we can recreate on resume as opposed to spill
1117// the result into a coroutine frame.
1118static bool materializable(Instruction &V) {
1119  return isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
1120         isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V);
1121}
1122
1123// Check for structural coroutine intrinsics that should not be spilled into
1124// the coroutine frame.
1125static bool isCoroutineStructureIntrinsic(Instruction &I) {
1126  return isa<CoroIdInst>(&I) || isa<CoroSaveInst>(&I) ||
1127         isa<CoroSuspendInst>(&I);
1128}
1129
1130// For every use of the value that is across suspend point, recreate that value
1131// after a suspend point.
1132static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
1133                                              SpillInfo const &Spills) {
1134  BasicBlock *CurrentBlock = nullptr;
1135  Instruction *CurrentMaterialization = nullptr;
1136  Instruction *CurrentDef = nullptr;
1137
1138  for (auto const &E : Spills) {
1139    // If it is a new definition, update CurrentXXX variables.
1140    if (CurrentDef != E.def()) {
1141      CurrentDef = cast<Instruction>(E.def());
1142      CurrentBlock = nullptr;
1143      CurrentMaterialization = nullptr;
1144    }
1145
1146    // If we have not seen this block, materialize the value.
1147    if (CurrentBlock != E.userBlock()) {
1148      CurrentBlock = E.userBlock();
1149      CurrentMaterialization = cast<Instruction>(CurrentDef)->clone();
1150      CurrentMaterialization->setName(CurrentDef->getName());
1151      CurrentMaterialization->insertBefore(
1152          &*CurrentBlock->getFirstInsertionPt());
1153    }
1154
1155    if (auto *PN = dyn_cast<PHINode>(E.user())) {
1156      assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
1157                                                "values in the PHINode");
1158      PN->replaceAllUsesWith(CurrentMaterialization);
1159      PN->eraseFromParent();
1160      continue;
1161    }
1162
1163    // Replace all uses of CurrentDef in the current instruction with the
1164    // CurrentMaterialization for the block.
1165    E.user()->replaceUsesOfWith(CurrentDef, CurrentMaterialization);
1166  }
1167}
1168
1169// Splits the block at a particular instruction unless it is the first
1170// instruction in the block with a single predecessor.
1171static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
1172  auto *BB = I->getParent();
1173  if (&BB->front() == I) {
1174    if (BB->getSinglePredecessor()) {
1175      BB->setName(Name);
1176      return BB;
1177    }
1178  }
1179  return BB->splitBasicBlock(I, Name);
1180}
1181
1182// Split above and below a particular instruction so that it
1183// will be all alone by itself in a block.
1184static void splitAround(Instruction *I, const Twine &Name) {
1185  splitBlockIfNotFirst(I, Name);
1186  splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
1187}
1188
1189static bool isSuspendBlock(BasicBlock *BB) {
1190  return isa<AnyCoroSuspendInst>(BB->front());
1191}
1192
1193typedef SmallPtrSet<BasicBlock*, 8> VisitedBlocksSet;
1194
1195/// Does control flow starting at the given block ever reach a suspend
1196/// instruction before reaching a block in VisitedOrFreeBBs?
1197static bool isSuspendReachableFrom(BasicBlock *From,
1198                                   VisitedBlocksSet &VisitedOrFreeBBs) {
1199  // Eagerly try to add this block to the visited set.  If it's already
1200  // there, stop recursing; this path doesn't reach a suspend before
1201  // either looping or reaching a freeing block.
1202  if (!VisitedOrFreeBBs.insert(From).second)
1203    return false;
1204
1205  // We assume that we'll already have split suspends into their own blocks.
1206  if (isSuspendBlock(From))
1207    return true;
1208
1209  // Recurse on the successors.
1210  for (auto Succ : successors(From)) {
1211    if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs))
1212      return true;
1213  }
1214
1215  return false;
1216}
1217
1218/// Is the given alloca "local", i.e. bounded in lifetime to not cross a
1219/// suspend point?
1220static bool isLocalAlloca(CoroAllocaAllocInst *AI) {
1221  // Seed the visited set with all the basic blocks containing a free
1222  // so that we won't pass them up.
1223  VisitedBlocksSet VisitedOrFreeBBs;
1224  for (auto User : AI->users()) {
1225    if (auto FI = dyn_cast<CoroAllocaFreeInst>(User))
1226      VisitedOrFreeBBs.insert(FI->getParent());
1227  }
1228
1229  return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs);
1230}
1231
1232/// After we split the coroutine, will the given basic block be along
1233/// an obvious exit path for the resumption function?
1234static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB,
1235                                              unsigned depth = 3) {
1236  // If we've bottomed out our depth count, stop searching and assume
1237  // that the path might loop back.
1238  if (depth == 0) return false;
1239
1240  // If this is a suspend block, we're about to exit the resumption function.
1241  if (isSuspendBlock(BB)) return true;
1242
1243  // Recurse into the successors.
1244  for (auto Succ : successors(BB)) {
1245    if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1))
1246      return false;
1247  }
1248
1249  // If none of the successors leads back in a loop, we're on an exit/abort.
1250  return true;
1251}
1252
1253static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI) {
1254  // Look for a free that isn't sufficiently obviously followed by
1255  // either a suspend or a termination, i.e. something that will leave
1256  // the coro resumption frame.
1257  for (auto U : AI->users()) {
1258    auto FI = dyn_cast<CoroAllocaFreeInst>(U);
1259    if (!FI) continue;
1260
1261    if (!willLeaveFunctionImmediatelyAfter(FI->getParent()))
1262      return true;
1263  }
1264
1265  // If we never found one, we don't need a stack save.
1266  return false;
1267}
1268
1269/// Turn each of the given local allocas into a normal (dynamic) alloca
1270/// instruction.
1271static void lowerLocalAllocas(ArrayRef<CoroAllocaAllocInst*> LocalAllocas,
1272                              SmallVectorImpl<Instruction*> &DeadInsts) {
1273  for (auto AI : LocalAllocas) {
1274    auto M = AI->getModule();
1275    IRBuilder<> Builder(AI);
1276
1277    // Save the stack depth.  Try to avoid doing this if the stackrestore
1278    // is going to immediately precede a return or something.
1279    Value *StackSave = nullptr;
1280    if (localAllocaNeedsStackSave(AI))
1281      StackSave = Builder.CreateCall(
1282                            Intrinsic::getDeclaration(M, Intrinsic::stacksave));
1283
1284    // Allocate memory.
1285    auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
1286    Alloca->setAlignment(Align(AI->getAlignment()));
1287
1288    for (auto U : AI->users()) {
1289      // Replace gets with the allocation.
1290      if (isa<CoroAllocaGetInst>(U)) {
1291        U->replaceAllUsesWith(Alloca);
1292
1293      // Replace frees with stackrestores.  This is safe because
1294      // alloca.alloc is required to obey a stack discipline, although we
1295      // don't enforce that structurally.
1296      } else {
1297        auto FI = cast<CoroAllocaFreeInst>(U);
1298        if (StackSave) {
1299          Builder.SetInsertPoint(FI);
1300          Builder.CreateCall(
1301                    Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
1302                             StackSave);
1303        }
1304      }
1305      DeadInsts.push_back(cast<Instruction>(U));
1306    }
1307
1308    DeadInsts.push_back(AI);
1309  }
1310}
1311
1312/// Turn the given coro.alloca.alloc call into a dynamic allocation.
1313/// This happens during the all-instructions iteration, so it must not
1314/// delete the call.
1315static Instruction *lowerNonLocalAlloca(CoroAllocaAllocInst *AI,
1316                                        coro::Shape &Shape,
1317                                   SmallVectorImpl<Instruction*> &DeadInsts) {
1318  IRBuilder<> Builder(AI);
1319  auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr);
1320
1321  for (User *U : AI->users()) {
1322    if (isa<CoroAllocaGetInst>(U)) {
1323      U->replaceAllUsesWith(Alloc);
1324    } else {
1325      auto FI = cast<CoroAllocaFreeInst>(U);
1326      Builder.SetInsertPoint(FI);
1327      Shape.emitDealloc(Builder, Alloc, nullptr);
1328    }
1329    DeadInsts.push_back(cast<Instruction>(U));
1330  }
1331
1332  // Push this on last so that it gets deleted after all the others.
1333  DeadInsts.push_back(AI);
1334
1335  // Return the new allocation value so that we can check for needed spills.
1336  return cast<Instruction>(Alloc);
1337}
1338
1339/// Get the current swifterror value.
1340static Value *emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy,
1341                                     coro::Shape &Shape) {
1342  // Make a fake function pointer as a sort of intrinsic.
1343  auto FnTy = FunctionType::get(ValueTy, {}, false);
1344  auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
1345
1346  auto Call = Builder.CreateCall(FnTy, Fn, {});
1347  Shape.SwiftErrorOps.push_back(Call);
1348
1349  return Call;
1350}
1351
1352/// Set the given value as the current swifterror value.
1353///
1354/// Returns a slot that can be used as a swifterror slot.
1355static Value *emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V,
1356                                     coro::Shape &Shape) {
1357  // Make a fake function pointer as a sort of intrinsic.
1358  auto FnTy = FunctionType::get(V->getType()->getPointerTo(),
1359                                {V->getType()}, false);
1360  auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
1361
1362  auto Call = Builder.CreateCall(FnTy, Fn, { V });
1363  Shape.SwiftErrorOps.push_back(Call);
1364
1365  return Call;
1366}
1367
1368/// Set the swifterror value from the given alloca before a call,
1369/// then put in back in the alloca afterwards.
1370///
1371/// Returns an address that will stand in for the swifterror slot
1372/// until splitting.
1373static Value *emitSetAndGetSwiftErrorValueAround(Instruction *Call,
1374                                                 AllocaInst *Alloca,
1375                                                 coro::Shape &Shape) {
1376  auto ValueTy = Alloca->getAllocatedType();
1377  IRBuilder<> Builder(Call);
1378
1379  // Load the current value from the alloca and set it as the
1380  // swifterror value.
1381  auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca);
1382  auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape);
1383
1384  // Move to after the call.  Since swifterror only has a guaranteed
1385  // value on normal exits, we can ignore implicit and explicit unwind
1386  // edges.
1387  if (isa<CallInst>(Call)) {
1388    Builder.SetInsertPoint(Call->getNextNode());
1389  } else {
1390    auto Invoke = cast<InvokeInst>(Call);
1391    Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
1392  }
1393
1394  // Get the current swifterror value and store it to the alloca.
1395  auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
1396  Builder.CreateStore(ValueAfterCall, Alloca);
1397
1398  return Addr;
1399}
1400
1401/// Eliminate a formerly-swifterror alloca by inserting the get/set
1402/// intrinsics and attempting to MemToReg the alloca away.
1403static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca,
1404                                      coro::Shape &Shape) {
1405  for (auto UI = Alloca->use_begin(), UE = Alloca->use_end(); UI != UE; ) {
1406    // We're likely changing the use list, so use a mutation-safe
1407    // iteration pattern.
1408    auto &Use = *UI;
1409    ++UI;
1410
1411    // swifterror values can only be used in very specific ways.
1412    // We take advantage of that here.
1413    auto User = Use.getUser();
1414    if (isa<LoadInst>(User) || isa<StoreInst>(User))
1415      continue;
1416
1417    assert(isa<CallInst>(User) || isa<InvokeInst>(User));
1418    auto Call = cast<Instruction>(User);
1419
1420    auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
1421
1422    // Use the returned slot address as the call argument.
1423    Use.set(Addr);
1424  }
1425
1426  // All the uses should be loads and stores now.
1427  assert(isAllocaPromotable(Alloca));
1428}
1429
1430/// "Eliminate" a swifterror argument by reducing it to the alloca case
1431/// and then loading and storing in the prologue and epilog.
1432///
1433/// The argument keeps the swifterror flag.
1434static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
1435                                        coro::Shape &Shape,
1436                             SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
1437  IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
1438
1439  auto ArgTy = cast<PointerType>(Arg.getType());
1440  auto ValueTy = ArgTy->getElementType();
1441
1442  // Reduce to the alloca case:
1443
1444  // Create an alloca and replace all uses of the arg with it.
1445  auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace());
1446  Arg.replaceAllUsesWith(Alloca);
1447
1448  // Set an initial value in the alloca.  swifterror is always null on entry.
1449  auto InitialValue = Constant::getNullValue(ValueTy);
1450  Builder.CreateStore(InitialValue, Alloca);
1451
1452  // Find all the suspends in the function and save and restore around them.
1453  for (auto Suspend : Shape.CoroSuspends) {
1454    (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape);
1455  }
1456
1457  // Find all the coro.ends in the function and restore the error value.
1458  for (auto End : Shape.CoroEnds) {
1459    Builder.SetInsertPoint(End);
1460    auto FinalValue = Builder.CreateLoad(ValueTy, Alloca);
1461    (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape);
1462  }
1463
1464  // Now we can use the alloca logic.
1465  AllocasToPromote.push_back(Alloca);
1466  eliminateSwiftErrorAlloca(F, Alloca, Shape);
1467}
1468
1469/// Eliminate all problematic uses of swifterror arguments and allocas
1470/// from the function.  We'll fix them up later when splitting the function.
1471static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
1472  SmallVector<AllocaInst*, 4> AllocasToPromote;
1473
1474  // Look for a swifterror argument.
1475  for (auto &Arg : F.args()) {
1476    if (!Arg.hasSwiftErrorAttr()) continue;
1477
1478    eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
1479    break;
1480  }
1481
1482  // Look for swifterror allocas.
1483  for (auto &Inst : F.getEntryBlock()) {
1484    auto Alloca = dyn_cast<AllocaInst>(&Inst);
1485    if (!Alloca || !Alloca->isSwiftError()) continue;
1486
1487    // Clear the swifterror flag.
1488    Alloca->setSwiftError(false);
1489
1490    AllocasToPromote.push_back(Alloca);
1491    eliminateSwiftErrorAlloca(F, Alloca, Shape);
1492  }
1493
1494  // If we have any allocas to promote, compute a dominator tree and
1495  // promote them en masse.
1496  if (!AllocasToPromote.empty()) {
1497    DominatorTree DT(F);
1498    PromoteMemToReg(AllocasToPromote, DT);
1499  }
1500}
1501
1502/// retcon and retcon.once conventions assume that all spill uses can be sunk
1503/// after the coro.begin intrinsic.
1504static void sinkSpillUsesAfterCoroBegin(Function &F, const SpillInfo &Spills,
1505                                        CoroBeginInst *CoroBegin) {
1506  DominatorTree Dom(F);
1507
1508  SmallSetVector<Instruction *, 32> ToMove;
1509  SmallVector<Instruction *, 32> Worklist;
1510
1511  // Collect all users that precede coro.begin.
1512  for (auto const &Entry : Spills) {
1513    auto *SpillDef = Entry.def();
1514    for (User *U : SpillDef->users()) {
1515      auto Inst = cast<Instruction>(U);
1516      if (Inst->getParent() != CoroBegin->getParent() ||
1517          Dom.dominates(CoroBegin, Inst))
1518        continue;
1519      if (ToMove.insert(Inst))
1520        Worklist.push_back(Inst);
1521    }
1522  }
1523  // Recursively collect users before coro.begin.
1524  while (!Worklist.empty()) {
1525    auto *Def = Worklist.back();
1526    Worklist.pop_back();
1527    for (User *U : Def->users()) {
1528      auto Inst = cast<Instruction>(U);
1529      if (Dom.dominates(CoroBegin, Inst))
1530        continue;
1531      if (ToMove.insert(Inst))
1532        Worklist.push_back(Inst);
1533    }
1534  }
1535
1536  // Sort by dominance.
1537  SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end());
1538  std::sort(InsertionList.begin(), InsertionList.end(),
1539            [&Dom](Instruction *A, Instruction *B) -> bool {
1540              // If a dominates b it should preceed (<) b.
1541              return Dom.dominates(A, B);
1542            });
1543
1544  Instruction *InsertPt = CoroBegin->getNextNode();
1545  for (Instruction *Inst : InsertionList)
1546    Inst->moveBefore(InsertPt);
1547
1548  return;
1549}
1550
1551/// For each local variable that all of its user are only used inside one of
1552/// suspended region, we sink their lifetime.start markers to the place where
1553/// after the suspend block. Doing so minimizes the lifetime of each variable,
1554/// hence minimizing the amount of data we end up putting on the frame.
1555static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
1556                                     SuspendCrossingInfo &Checker) {
1557  DominatorTree DT(F);
1558
1559  // Collect all possible basic blocks which may dominate all uses of allocas.
1560  SmallPtrSet<BasicBlock *, 4> DomSet;
1561  DomSet.insert(&F.getEntryBlock());
1562  for (auto *CSI : Shape.CoroSuspends) {
1563    BasicBlock *SuspendBlock = CSI->getParent();
1564    assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() &&
1565           "should have split coro.suspend into its own block");
1566    DomSet.insert(SuspendBlock->getSingleSuccessor());
1567  }
1568
1569  for (Instruction &I : instructions(F)) {
1570    if (!isa<AllocaInst>(&I))
1571      continue;
1572
1573    for (BasicBlock *DomBB : DomSet) {
1574      bool Valid = true;
1575      SmallVector<Instruction *, 1> BCInsts;
1576
1577      auto isUsedByLifetimeStart = [&](Instruction *I) {
1578        if (isa<BitCastInst>(I) && I->hasOneUse())
1579          if (auto *IT = dyn_cast<IntrinsicInst>(I->user_back()))
1580            return IT->getIntrinsicID() == Intrinsic::lifetime_start;
1581        return false;
1582      };
1583
1584      for (User *U : I.users()) {
1585        Instruction *UI = cast<Instruction>(U);
1586        // For all users except lifetime.start markers, if they are all
1587        // dominated by one of the basic blocks and do not cross
1588        // suspend points as well, then there is no need to spill the
1589        // instruction.
1590        if (!DT.dominates(DomBB, UI->getParent()) ||
1591            Checker.isDefinitionAcrossSuspend(DomBB, U)) {
1592          // Skip bitcast used by lifetime.start markers.
1593          if (isUsedByLifetimeStart(UI)) {
1594            BCInsts.push_back(UI);
1595            continue;
1596          }
1597          Valid = false;
1598          break;
1599        }
1600      }
1601      // Sink lifetime.start markers to dominate block when they are
1602      // only used outside the region.
1603      if (Valid && BCInsts.size() != 0) {
1604        auto *NewBitcast = BCInsts[0]->clone();
1605        auto *NewLifetime = cast<Instruction>(BCInsts[0]->user_back())->clone();
1606        NewLifetime->replaceUsesOfWith(BCInsts[0], NewBitcast);
1607        NewBitcast->insertBefore(DomBB->getTerminator());
1608        NewLifetime->insertBefore(DomBB->getTerminator());
1609
1610        // All the outsided lifetime.start markers are no longer necessary.
1611        for (Instruction *S : BCInsts) {
1612          S->user_back()->eraseFromParent();
1613        }
1614        break;
1615      }
1616    }
1617  }
1618}
1619
1620void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
1621  eliminateSwiftError(F, Shape);
1622
1623  if (Shape.ABI == coro::ABI::Switch &&
1624      Shape.SwitchLowering.PromiseAlloca) {
1625    Shape.getSwitchCoroId()->clearPromise();
1626  }
1627
1628  // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
1629  // intrinsics are in their own blocks to simplify the logic of building up
1630  // SuspendCrossing data.
1631  for (auto *CSI : Shape.CoroSuspends) {
1632    if (auto *Save = CSI->getCoroSave())
1633      splitAround(Save, "CoroSave");
1634    splitAround(CSI, "CoroSuspend");
1635  }
1636
1637  // Put CoroEnds into their own blocks.
1638  for (CoroEndInst *CE : Shape.CoroEnds)
1639    splitAround(CE, "CoroEnd");
1640
1641  // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
1642  // never has its definition separated from the PHI by the suspend point.
1643  rewritePHIs(F);
1644
1645  // Build suspend crossing info.
1646  SuspendCrossingInfo Checker(F, Shape);
1647
1648  IRBuilder<> Builder(F.getContext());
1649  SpillInfo Spills;
1650  SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas;
1651  SmallVector<Instruction*, 4> DeadInstructions;
1652
1653  for (int Repeat = 0; Repeat < 4; ++Repeat) {
1654    // See if there are materializable instructions across suspend points.
1655    for (Instruction &I : instructions(F))
1656      if (materializable(I))
1657        for (User *U : I.users())
1658          if (Checker.isDefinitionAcrossSuspend(I, U))
1659            Spills.emplace_back(&I, U);
1660
1661    if (Spills.empty())
1662      break;
1663
1664    // Rewrite materializable instructions to be materialized at the use point.
1665    LLVM_DEBUG(dump("Materializations", Spills));
1666    rewriteMaterializableInstructions(Builder, Spills);
1667    Spills.clear();
1668  }
1669
1670  sinkLifetimeStartMarkers(F, Shape, Checker);
1671  // Collect lifetime.start info for each alloca.
1672  using LifetimeStart = SmallPtrSet<Instruction *, 2>;
1673  llvm::DenseMap<Instruction *, std::unique_ptr<LifetimeStart>> LifetimeMap;
1674  for (Instruction &I : instructions(F)) {
1675    auto *II = dyn_cast<IntrinsicInst>(&I);
1676    if (!II || II->getIntrinsicID() != Intrinsic::lifetime_start)
1677      continue;
1678
1679    if (auto *OpInst = dyn_cast<BitCastInst>(I.getOperand(1)))
1680      if (auto *AI = dyn_cast<AllocaInst>(OpInst->getOperand(0))) {
1681
1682        if (LifetimeMap.find(AI) == LifetimeMap.end())
1683          LifetimeMap[AI] = std::make_unique<LifetimeStart>();
1684
1685        LifetimeMap[AI]->insert(OpInst);
1686      }
1687  }
1688
1689  // Collect the spills for arguments and other not-materializable values.
1690  for (Argument &A : F.args())
1691    for (User *U : A.users())
1692      if (Checker.isDefinitionAcrossSuspend(A, U))
1693        Spills.emplace_back(&A, U);
1694
1695  for (Instruction &I : instructions(F)) {
1696    // Values returned from coroutine structure intrinsics should not be part
1697    // of the Coroutine Frame.
1698    if (isCoroutineStructureIntrinsic(I) || &I == Shape.CoroBegin)
1699      continue;
1700
1701    // The Coroutine Promise always included into coroutine frame, no need to
1702    // check for suspend crossing.
1703    if (Shape.ABI == coro::ABI::Switch &&
1704        Shape.SwitchLowering.PromiseAlloca == &I)
1705      continue;
1706
1707    // Handle alloca.alloc specially here.
1708    if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) {
1709      // Check whether the alloca's lifetime is bounded by suspend points.
1710      if (isLocalAlloca(AI)) {
1711        LocalAllocas.push_back(AI);
1712        continue;
1713      }
1714
1715      // If not, do a quick rewrite of the alloca and then add spills of
1716      // the rewritten value.  The rewrite doesn't invalidate anything in
1717      // Spills because the other alloca intrinsics have no other operands
1718      // besides AI, and it doesn't invalidate the iteration because we delay
1719      // erasing AI.
1720      auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions);
1721
1722      for (User *U : Alloc->users()) {
1723        if (Checker.isDefinitionAcrossSuspend(*Alloc, U))
1724          Spills.emplace_back(Alloc, U);
1725      }
1726      continue;
1727    }
1728
1729    // Ignore alloca.get; we process this as part of coro.alloca.alloc.
1730    if (isa<CoroAllocaGetInst>(I)) {
1731      continue;
1732    }
1733
1734    auto Iter = LifetimeMap.find(&I);
1735    for (User *U : I.users()) {
1736      bool NeedSpill = false;
1737
1738      // Check against lifetime.start if the instruction has the info.
1739      if (Iter != LifetimeMap.end())
1740        for (auto *S : *Iter->second) {
1741          if ((NeedSpill = Checker.isDefinitionAcrossSuspend(*S, U)))
1742            break;
1743        }
1744      else
1745        NeedSpill = Checker.isDefinitionAcrossSuspend(I, U);
1746
1747      if (NeedSpill) {
1748        // We cannot spill a token.
1749        if (I.getType()->isTokenTy())
1750          report_fatal_error(
1751              "token definition is separated from the use by a suspend point");
1752        Spills.emplace_back(&I, U);
1753      }
1754    }
1755  }
1756  LLVM_DEBUG(dump("Spills", Spills));
1757  if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce)
1758    sinkSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin);
1759  Shape.FrameTy = buildFrameType(F, Shape, Spills);
1760  Shape.FramePtr = insertSpills(Spills, Shape);
1761  lowerLocalAllocas(LocalAllocas, DeadInstructions);
1762
1763  for (auto I : DeadInstructions)
1764    I->eraseFromParent();
1765}
1766